Project

General

Profile

Download (52.1 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/ 
9

    
10
package eu.etaxonomy.cdm.strategy.parser;
11

    
12
import java.util.regex.Matcher;
13
import java.util.regex.Pattern;
14

    
15
import org.apache.commons.lang.StringUtils;
16
import org.apache.log4j.Logger;
17
import org.joda.time.DateTimeFieldType;
18
import org.joda.time.Partial;
19

    
20
import eu.etaxonomy.cdm.common.CdmUtils;
21
import eu.etaxonomy.cdm.model.agent.Person;
22
import eu.etaxonomy.cdm.model.agent.Team;
23
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
24
import eu.etaxonomy.cdm.model.common.CdmBase;
25
import eu.etaxonomy.cdm.model.common.IParsable;
26
import eu.etaxonomy.cdm.model.common.TimePeriod;
27
import eu.etaxonomy.cdm.model.name.BacterialName;
28
import eu.etaxonomy.cdm.model.name.BotanicalName;
29
import eu.etaxonomy.cdm.model.name.CultivarPlantName;
30
import eu.etaxonomy.cdm.model.name.HybridRelationshipType;
31
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
32
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
33
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
34
import eu.etaxonomy.cdm.model.name.NonViralName;
35
import eu.etaxonomy.cdm.model.name.Rank;
36
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
37
import eu.etaxonomy.cdm.model.name.ZoologicalName;
38
import eu.etaxonomy.cdm.model.reference.IBook;
39
import eu.etaxonomy.cdm.model.reference.IBookSection;
40
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
41
import eu.etaxonomy.cdm.model.reference.IVolumeReference;
42
import eu.etaxonomy.cdm.model.reference.Reference;
43
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
44
import eu.etaxonomy.cdm.model.reference.ReferenceType;
45
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
46
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
47

    
48

    
49
/**
50
 * @author a.mueller
51
 *
52
 */
53
public class NonViralNameParserImpl extends NonViralNameParserImplRegExBase implements INonViralNameParser<NonViralName> {
54
	private static final Logger logger = Logger.getLogger(NonViralNameParserImpl.class);
55
	
56
	// good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
57
	
58
	final static boolean MAKE_EMPTY = true;
59
	final static boolean MAKE_NOT_EMPTY = false;
60
	
61
	private boolean authorIsAlwaysTeam = true;
62
	
63
	public static NonViralNameParserImpl NewInstance(){
64
		return new NonViralNameParserImpl();
65
	}
66

    
67
	/* (non-Javadoc)
68
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericSimpleName(java.lang.String)
69
	 */
70
	public NonViralName parseSimpleName(String simpleName){
71
		return parseSimpleName(simpleName, null, null);
72
	}
73
	
74
	
75
	/* (non-Javadoc)
76
	 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseSimpleName(java.lang.String, eu.etaxonomy.cdm.model.name.NomenclaturalCode, eu.etaxonomy.cdm.model.name.Rank)
77
	 */
78
	public NonViralName parseSimpleName(String simpleName, NomenclaturalCode code, Rank rank){
79
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
80
		return parseFullName(simpleName, code, rank);
81
	}
82

    
83
	public void parseSimpleName(NonViralName nameToBeFilled, String simpleNameString, Rank rank, boolean makeEmpty){
84
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
85
		parseFullName(nameToBeFilled, simpleNameString, rank, makeEmpty);
86
	}
87

    
88
	
89
	public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code){
90
		return getNonViralNameInstance(fullString, code, null);
91
	}
92
	
93
	public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code, Rank rank){
94
		NonViralName result = null;
95
		if(code ==null) {
96
			boolean isBotanicalName = anyBotanicFullNamePattern.matcher(fullString).find();
97
			boolean isZoologicalName = anyZooFullNamePattern.matcher(fullString).find();;
98
			boolean isBacteriologicalName = false;
99
			boolean isCultivatedPlantName = false;
100
			if ( (isBotanicalName || isCultivatedPlantName) && ! isZoologicalName && !isBacteriologicalName){
101
				if (isBotanicalName){
102
					result = BotanicalName.NewInstance(rank);
103
				}else{
104
					result = CultivarPlantName.NewInstance(rank);
105
				}
106
			}else if ( isZoologicalName /*&& ! isBotanicalName*/ && !isBacteriologicalName && !isCultivatedPlantName){
107
				result = ZoologicalName.NewInstance(rank);
108
			}else if ( isZoologicalName && ! isBotanicalName && !isBacteriologicalName && !isCultivatedPlantName){
109
				result = BacterialName.NewInstance(rank);
110
			}else {
111
				result =  NonViralName.NewInstance(rank);
112
			}
113
		} else {
114
			switch (code) {
115
			case ICBN:
116
				result = BotanicalName.NewInstance(rank);
117
				break;
118
			case ICZN:
119
				result = ZoologicalName.NewInstance(rank);
120
				break;
121
			case ICNCP:
122
				logger.warn("ICNCP parsing not yet implemented");
123
				result = CultivarPlantName.NewInstance(rank);
124
				break;
125
			case ICNB:
126
				logger.warn("ICNB not yet implemented");
127
				result = BacterialName.NewInstance(rank);
128
				break;
129
			case ICVCN:
130
				logger.error("Viral name is not a NonViralName !!");
131
				break;
132
			default:
133
				// FIXME Unreachable code
134
				logger.error("Unknown Nomenclatural Code !!");
135
			}
136
		}
137
		return result;
138
	}
139
	
140

    
141
	/* (non-Javadoc)
142
	 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseFullReference(java.lang.String)
143
	 */
144
	public NonViralName parseReferencedName(String fullReferenceString) {
145
		return parseReferencedName(fullReferenceString, null, null);
146
	}
147
	
148
	/* (non-Javadoc)
149
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
150
	 */
151
	public NonViralName parseReferencedName(String fullReferenceString, NomenclaturalCode nomCode, Rank rank) {
152
		if (fullReferenceString == null){
153
			return null;
154
		}else{
155
			NonViralName result = getNonViralNameInstance(fullReferenceString, nomCode, rank);
156
			parseReferencedName(result, fullReferenceString, rank, MAKE_EMPTY);
157
			return result;
158
		}
159
	}
160
	
161
	private String standardize(NonViralName nameToBeFilled, String fullReferenceString, boolean makeEmpty){
162
		//Check null and standardize
163
		if (fullReferenceString == null){
164
			//return null;
165
			return null;
166
		}
167
		if (makeEmpty){
168
			makeEmpty(nameToBeFilled);
169
		}
170
		fullReferenceString = fullReferenceString.replaceAll(oWs , " ");
171
		fullReferenceString = fullReferenceString.trim();
172
		if ("".equals(fullReferenceString)){
173
			fullReferenceString = null;
174
		}
175
		return fullReferenceString;
176
	}
177

    
178
	/**
179
	 * Returns the regEx to be used for the full-name depending on the code
180
	 * @param nameToBeFilled
181
	 * @return
182
	 */
183
	private String getLocalFullName(NonViralName nameToBeFilled){
184
		if (nameToBeFilled instanceof ZoologicalName){
185
			return anyZooFullName;
186
		}else if (nameToBeFilled instanceof BotanicalName) {
187
			return anyBotanicFullName;
188
		}else if (nameToBeFilled instanceof NonViralName) {
189
			return anyBotanicFullName;  //TODO ?
190
		}else{
191
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
192
			return null;
193
		}
194
	}
195
	
196
	/**
197
	 * Returns the regEx to be used for the fsimple-name depending on the code
198
	 * @param nameToBeFilled
199
	 * @return
200
	 */
201
	private String getLocalSimpleName(NonViralName nameToBeFilled){
202
		if (nameToBeFilled instanceof ZoologicalName){
203
			return anyZooName;
204
		}else if (nameToBeFilled instanceof NonViralName){
205
			return anyZooName;  //TODO ?
206
		}else if (nameToBeFilled instanceof BotanicalName) {
207
			return anyBotanicName;
208
		}else{
209
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
210
			return null;
211
		}
212
	}
213
	
214
	private Matcher getMatcher(String regEx, String matchString){
215
		Pattern pattern = Pattern.compile(regEx);
216
		Matcher matcher = pattern.matcher(matchString);
217
		return matcher;
218
	}
219
	
220
	/* (non-Javadoc)
221
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(eu.etaxonomy.cdm.model.name.BotanicalName, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, boolean)
222
	 */
223
	public void parseReferencedName(NonViralName nameToBeFilled, String fullReferenceString, Rank rank, boolean makeEmpty) {
224
		//standardize
225
		fullReferenceString = standardize(nameToBeFilled, fullReferenceString, makeEmpty);
226
		if (fullReferenceString == null){
227
			return;
228
		}
229
		// happens already in standardize(...)
230
//		makeProblemEmpty(nameToBeFilled);
231
		
232
		//make nomenclatural status and replace it by empty string 
233
	    fullReferenceString = parseNomStatus(fullReferenceString, nameToBeFilled);
234
	    nameToBeFilled.setProblemEnds(fullReferenceString.length());
235
		
236
	    //get full name reg
237
		String localFullName = getLocalFullName(nameToBeFilled);
238
		//get full name reg
239
		String localSimpleName = getLocalSimpleName(nameToBeFilled);
240
		
241
		//separate name and reference part
242
		String nameAndRefSeparator = "(^" + localFullName + ")("+ referenceSeperator + ")";
243
		Matcher nameAndRefSeparatorMatcher = getMatcher (nameAndRefSeparator, fullReferenceString);
244
		
245
		Matcher onlyNameMatcher = getMatcher (localFullName, fullReferenceString);
246
		Matcher onlySimpleNameMatcher = getMatcher (localSimpleName, fullReferenceString);
247
		
248
		if (nameAndRefSeparatorMatcher.find()){
249
			makeNameWithReference(nameToBeFilled, fullReferenceString, nameAndRefSeparatorMatcher, rank, makeEmpty);
250
		}else if (onlyNameMatcher.matches()){
251
			makeEmpty = false;
252
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
253
		}else if (onlySimpleNameMatcher.matches()){
254
			makeEmpty = false;
255
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);	//simpleName not yet implemented
256
		}else{
257
			makeNoFullRefMatch(nameToBeFilled, fullReferenceString, rank);
258
		}
259
		//problem handling. Start and end solved in subroutines
260
		if (! nameToBeFilled.hasProblem()){
261
			makeProblemEmpty(nameToBeFilled);
262
		}
263
	}
264
	
265
	private void makeProblemEmpty(IParsable parsable){
266
		boolean hasCheckRank = parsable.hasProblem(ParserProblem.CheckRank);
267
		parsable.setParsingProblem(0);
268
		if (hasCheckRank){
269
			parsable.addParsingProblem(ParserProblem.CheckRank);
270
		}
271
		parsable.setProblemStarts(-1);
272
		parsable.setProblemEnds(-1);
273
	}
274
	
275
	private void makeNoFullRefMatch(NonViralName nameToBeFilled, String fullReferenceString, Rank rank){
276
	    //try to parse first part as name, but keep in mind full string is not parsable
277
		int start = 0;
278
		
279
		String localFullName = getLocalFullName(nameToBeFilled);
280
		Matcher fullNameMatcher = getMatcher (pStart + localFullName, fullReferenceString);
281
		if (fullNameMatcher.find()){
282
			String fullNameString = fullNameMatcher.group(0);
283
			nameToBeFilled.setProtectedNameCache(false);
284
			parseFullName(nameToBeFilled, fullNameString, rank, false);
285
			String sure = nameToBeFilled.getNameCache();
286
			start = sure.length();
287
		}
288
		
289
//		String localSimpleName = getLocalSimpleName(nameToBeFilled);
290
//		Matcher simpleNameMatcher = getMatcher (start + localSimpleName, fullReferenceString);
291
//		if (simpleNameMatcher.find()){
292
//			String simpleNameString = simpleNameMatcher.group(0);
293
//			parseFullName(nameToBeFilled, simpleNameString, rank, false);
294
//			start = simpleNameString.length();
295
//		}
296
		
297
		//don't parse if name can't be separated
298
		nameToBeFilled.addParsingProblem(ParserProblem.NameReferenceSeparation);
299
		nameToBeFilled.setTitleCache(fullReferenceString,true);
300
		nameToBeFilled.setFullTitleCache(fullReferenceString,true);
301
		// FIXME Quick fix, otherwise search would not deliver results for unparsable names
302
		nameToBeFilled.setNameCache(fullReferenceString,true);
303
		// END
304
		nameToBeFilled.setProblemStarts(start);
305
		nameToBeFilled.setProblemEnds(fullReferenceString.length());
306
		logger.info("no applicable parsing rule could be found for \"" + fullReferenceString + "\"");    
307
	}
308
	
309
	private void makeNameWithReference(NonViralName nameToBeFilled, 
310
			String fullReferenceString, 
311
			Matcher nameAndRefSeparatorMatcher,
312
			Rank rank,
313
			boolean makeEmpty){
314
		
315
		String nameAndSeparator = nameAndRefSeparatorMatcher.group(0); 
316
	    String name = nameAndRefSeparatorMatcher.group(1); 
317
	    String referenceString = fullReferenceString.substring(nameAndRefSeparatorMatcher.end());
318
	    
319
	    // is reference an in ref?
320
	    String separator = nameAndSeparator.substring(name.length());
321
		boolean isInReference = separator.matches(inReferenceSeparator);
322
	    
323
	    //parse subparts
324
	    
325
		int oldProblemEnds = nameToBeFilled.getProblemEnds();
326
		parseFullName(nameToBeFilled, name, rank, makeEmpty);
327
	    nameToBeFilled.setProblemEnds(oldProblemEnds);
328
		
329
		//zoological new combinations should not have a nom. reference to be parsed
330
	    if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
331
			ZoologicalName zooName = CdmBase.deproxy(nameToBeFilled, ZoologicalName.class);
332
			//is name new combination?
333
			if (zooName.getBasionymAuthorTeam() != null || zooName.getOriginalPublicationYear() != null){
334
				ParserProblem parserProblem = ParserProblem.NewCombinationHasPublication;
335
				zooName.addParsingProblem(parserProblem);
336
				nameToBeFilled.setProblemStarts((nameToBeFilled.getProblemStarts()> -1) ? nameToBeFilled.getProblemStarts(): name.length());
337
				nameToBeFilled.setProblemEnds(Math.max(fullReferenceString.length(), nameToBeFilled.getProblemEnds()));
338
			}
339
		}
340
		
341
	    parseReference(nameToBeFilled, referenceString, isInReference); 
342
	    INomenclaturalReference ref = (INomenclaturalReference)nameToBeFilled.getNomenclaturalReference();
343

    
344
	    //problem start
345
	    int start = nameToBeFilled.getProblemStarts();
346
	    int nameLength = name.length();
347
	    int nameAndSeparatorLength = nameAndSeparator.length();
348
	    int fullRefLength = nameToBeFilled.getFullTitleCache().length();
349
	    
350
	    if (nameToBeFilled.isProtectedTitleCache() || nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)){
351
	    	start = Math.max(0, start);
352
		}else{
353
			if (ref != null && ref.getParsingProblem()!=0){
354
				start = Math.max(nameAndSeparatorLength, start);
355
		    	//TODO search within ref
356
			}	
357
		}
358
	    
359
	    //end
360
	    int end = nameToBeFilled.getProblemEnds();
361
	    
362
	    if (ref != null && ref.getParsingProblem()!=0){
363
	    	end = Math.min(nameAndSeparatorLength + ref.getProblemEnds(), end);
364
	    }else{
365
	    	if (nameToBeFilled.isProtectedTitleCache() ){
366
	    		end = Math.min(end, nameAndSeparatorLength);
367
	    		//TODO search within name
368
			}
369
	    }
370
	    nameToBeFilled.setProblemStarts(start);
371
	    nameToBeFilled.setProblemEnds(end);
372

    
373
	    //delegate has problem to name
374
	    if (ref != null && ref.getParsingProblem()!=0){
375
	    	nameToBeFilled.addParsingProblems(ref.getParsingProblem());
376
	    }
377
	    
378
	    Reference nomRef;
379
		if ( (nomRef = (Reference)nameToBeFilled.getNomenclaturalReference()) != null ){
380
			nomRef.setAuthorTeam((TeamOrPersonBase)nameToBeFilled.getCombinationAuthorTeam());
381
		}
382
	}
383
	
384
	//TODO make it an Array of status
385
	/**
386
	 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
387
	 * The nomenclatural status part ist deleted from the reference String.
388
	 * @return  String the new (shortend) reference String 
389
	 */ 
390
	public String parseNomStatus(String fullString, NonViralName nameToBeFilled) {
391
		String statusString;
392
		Pattern hasStatusPattern = Pattern.compile("(" + pNomStatusPhrase + ")"); 
393
		Matcher hasStatusMatcher = hasStatusPattern.matcher(fullString);
394
		
395
		if (hasStatusMatcher.find()) {
396
			String statusPhrase = hasStatusMatcher.group(0);
397
			
398
			Pattern statusPattern = Pattern.compile(pNomStatus);
399
			Matcher statusMatcher = statusPattern.matcher(statusPhrase);
400
			statusMatcher.find();
401
			statusString = statusMatcher.group(0);
402
			try {
403
				NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(statusString);
404
				NomenclaturalStatus nomStatus = NomenclaturalStatus.NewInstance(nomStatusType);
405
				nameToBeFilled.addStatus(nomStatus);
406
			    
407
				fullString = fullString.replace(statusPhrase, "");
408
			} catch (UnknownCdmTypeException e) {
409
				//Do nothing
410
			}
411
		}
412
		return fullString;
413
	}
414
	
415
	
416
	private void parseReference(NonViralName nameToBeFilled, String strReference, boolean isInReference){
417
		
418
		INomenclaturalReference ref;
419
		String originalStrReference = strReference;
420
		
421
		//End (just delete end (e.g. '.', may be ambigous for yearPhrase, but no real information gets lost
422
		Matcher endMatcher = getMatcher(referenceEnd + end, strReference);
423
		if (endMatcher.find()){
424
			String endPart = endMatcher.group(0);
425
			strReference = strReference.substring(0, strReference.length() - endPart.length());
426
		}
427
		
428
//		String pDetailYear = ".*" + detailSeparator + detail + fWs + yearSeperator + fWs + yearPhrase + fWs + end;
429
//		Matcher detailYearMatcher = getMatcher(pDetailYear, strReference);
430
		
431
		String strReferenceWithYear = strReference;
432
		//year
433
		String yearPart = null;
434
		String pYearPhrase = yearSeperator + fWs + yearPhrase + fWs + end;
435
		Matcher yearPhraseMatcher = getMatcher(pYearPhrase, strReference);
436
		if (yearPhraseMatcher.find()){
437
			yearPart = yearPhraseMatcher.group(0);
438
			strReference = strReference.substring(0, strReference.length() - yearPart.length());
439
			yearPart = yearPart.replaceFirst(pStart + yearSeperator, "").trim();
440
		}else{
441
			if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
442
				ZoologicalName zooName = CdmBase.deproxy(nameToBeFilled, ZoologicalName.class);
443
				yearPart = String.valueOf(zooName.getPublicationYear());
444
				//continue
445
			}else{
446
				ref = makeDetailYearUnparsable(nameToBeFilled,strReference);
447
				ref.setDatePublished(TimePeriod.parseString(yearPart));
448
				return;
449
			}
450
		}
451
		
452
			
453
		//detail
454
		String pDetailPhrase = detailSeparator + fWs + detail + fWs + end;
455
		Matcher detailPhraseMatcher = getMatcher(pDetailPhrase, strReference);
456
		if (detailPhraseMatcher.find()){
457
			String detailPart = detailPhraseMatcher.group(0);
458
			strReference = strReference.substring(0, strReference.length() - detailPart.length());
459
			detailPart = detailPart.replaceFirst(pStart + detailSeparator, "").trim();
460
			nameToBeFilled.setNomenclaturalMicroReference(detailPart);
461
		}else{
462
			makeDetailYearUnparsable(nameToBeFilled, strReferenceWithYear);
463
			return;
464
		}
465
		//parse title and author
466
		ref = parseReferenceTitle(strReference, yearPart, isInReference);
467
		if (ref.hasProblem()){
468
			ref.setTitleCache( (isInReference?"in ":"") +  originalStrReference,true);
469
		}
470
		nameToBeFilled.setNomenclaturalReference((Reference)ref);
471
		int end = Math.min(strReference.length(), ref.getProblemEnds());
472
		ref.setProblemEnds(end);
473
	}
474

    
475
	/**
476
	 * @param nameToBeFilled
477
	 * @param strReference
478
	 * @return 
479
	 */
480
	private INomenclaturalReference makeDetailYearUnparsable(NonViralName nameToBeFilled, String strReference) {
481
		INomenclaturalReference ref;
482
		//ref = Generic.NewInstance();
483
		
484
		ref = ReferenceFactory.newGeneric();
485
		ref.setTitleCache(strReference,true);
486
		ref.setProblemEnds(strReference.length());
487
		ref.addParsingProblem(ParserProblem.CheckDetailOrYear);
488
		nameToBeFilled.addParsingProblem(ParserProblem.CheckDetailOrYear);
489
		nameToBeFilled.setNomenclaturalReference((Reference)ref);
490
		return ref;
491
	}
492
		
493
	/**
494
	 * Parses the referenceTitlePart, including the author volume and edition.
495
	 * @param reference
496
	 * @param year
497
	 * @return
498
	 */
499
	private INomenclaturalReference parseReferenceTitle(String strReference, String year, boolean isInReference){
500
		IBook result = null;
501

    
502
		Matcher refSineDetailMatcher = referenceSineDetailPattern.matcher(strReference);
503
		if (! refSineDetailMatcher.matches()){
504
			//TODO ?
505
		}
506
		
507
		Matcher articleMatcher = getMatcher(pArticleReference, strReference);
508
		Matcher bookMatcher = getMatcher(pBookReference, strReference);
509
		
510
		Matcher softArticleMatcher = getMatcher(pSoftArticleReference, strReference);
511
		Matcher bookSectionMatcher = getMatcher(pBookSectionReference, strReference);
512
		
513
		
514
		if(isInReference == false){
515
			if (bookMatcher.matches() ){
516
				result = parseBook(strReference);
517
			}else{
518
				logger.warn("Non-InRef must be book but does not match book");
519
				result = ReferenceFactory.newBook();
520
				makeUnparsableRefTitle(result, strReference);
521
			}
522
		}else{  //inRef
523
			if (articleMatcher.matches()){
524
				//article without separators like ","
525
				result = parseArticle(strReference);
526
			}else if (softArticleMatcher.matches()){
527
				result = parseArticle(strReference);
528
			}else if (bookSectionMatcher.matches()){
529
				result = parseBookSection(strReference);
530
			}else{
531
				result =  ReferenceFactory.newGeneric();
532
				makeUnparsableRefTitle(result, "in " + strReference);
533
			}
534
		}
535
		//make year
536
		if (makeYear(result, year) == false){
537
			//TODO
538
			logger.warn("Year could not be parsed");
539
		}
540
		result.setProblemStarts(0);
541
		result.setProblemEnds(strReference.length());
542
		return result;
543
	}
544
	
545
	private void makeUnparsableRefTitle(INomenclaturalReference result, String reference){
546
		result.setTitleCache(reference,true);
547
		result.addParsingProblem(ParserProblem.UnparsableReferenceTitle);
548
	}
549
	
550
	/**
551
	 * Parses a single date string. If the string is not parsable a StringNotParsableException is thrown
552
	 * @param singleDateString
553
	 * @return
554
	 * @throws StringNotParsableException
555
	 */
556
	private static Partial parseSingleDate(String singleDateString) 
557
			throws StringNotParsableException{
558
		Partial dt = new Partial();
559
		if (CdmUtils.isNumeric(singleDateString)){
560
			try {
561
				Integer year = Integer.valueOf(singleDateString.trim());
562
				if (year > 1750 && year < 2050){
563
					dt = dt.with(DateTimeFieldType.year(), year);
564
				}else{
565
					dt = null;
566
				}
567
			} catch (NumberFormatException e) {
568
				logger.debug("Not a Integer format in getCalendar()");
569
				throw new StringNotParsableException(singleDateString + "is not parsable as a single Date");
570
			}
571
		}
572
		return dt;
573
	}
574

    
575
	
576
	/**
577
	 * Parses the publication date part. 
578
	 * @param nomRef
579
	 * @param year
580
	 * @return If the string is not parsable <code>false</code>
581
	 * is returned. <code>True</code> otherwise
582
	 */
583
	private boolean makeYear(INomenclaturalReference nomRef, String year){
584
		boolean result = true;
585
		if (year == null){
586
			return false;
587
		}
588
		if ("".equals(year.trim())){
589
			return true;
590
		}
591
		TimePeriod datePublished = TimePeriod.parseString(year);
592
		
593
		if (nomRef.getType().equals(ReferenceType.BookSection)){
594
			handleBookSectionYear((IBookSection)nomRef, datePublished);
595
		}else if (nomRef instanceof Reference){
596
			((Reference)nomRef).setDatePublished(datePublished);	
597
		}else{
598
			throw new ClassCastException("nom Ref is not of type Reference but " + (nomRef == null? "(null)" : nomRef.getClass()));
599
		}
600
		return result;	
601
	}
602
	
603
	private String makeVolume(IVolumeReference nomRef, String strReference){
604
		//volume
605
		String volPart = null;
606
		String pVolPhrase = volumeSeparator +  volume + end;
607
		Matcher volPhraseMatcher = getMatcher(pVolPhrase, strReference);
608
		if (volPhraseMatcher.find()){
609
			volPart = volPhraseMatcher.group(0);
610
			strReference = strReference.substring(0, strReference.length() - volPart.length());
611
			volPart = volPart.replaceFirst(pStart + volumeSeparator, "").trim();
612
			nomRef.setVolume(volPart);
613
		}
614
		return strReference;
615
	}
616
	
617
	private String makeEdition(IBook book, String strReference){
618
		//volume
619
		String editionPart = null;
620
		Matcher editionPhraseMatcher = getMatcher(pEditionPart, strReference);
621
		
622
		Matcher editionVolumeMatcher = getMatcher(pEditionVolPart, strReference);
623
		boolean isEditionAndVol = editionVolumeMatcher.find();
624
		
625
		if (editionPhraseMatcher.find()){
626
			editionPart = editionPhraseMatcher.group(0);
627
			int pos = strReference.indexOf(editionPart);
628
			int posEnd = pos + editionPart.length();
629
			if (isEditionAndVol){
630
				posEnd++;  //delete also comma
631
			}
632
			strReference = strReference.substring(0, pos) + strReference.substring(posEnd);
633
			editionPart = editionPart.replaceFirst(pStart + editionSeparator, "").trim();
634
			book.setEdition(editionPart);
635
		}
636
		return strReference;
637
	}
638
	
639
	private IBook parseBook(String reference){
640
		IBook result = ReferenceFactory.newBook();
641
		reference = makeEdition(result, reference);
642
		reference = makeVolume(result, reference);
643
		result.setTitle(reference);
644
		return result;
645
	}
646
	
647
	
648
	private Reference parseArticle(String reference){
649
		//if (articlePatter)
650
		//(type, author, title, volume, editor, series;
651
		Reference result = ReferenceFactory.newArticle();
652
		reference = makeVolume(result, reference);
653
		Reference inJournal = ReferenceFactory.newJournal();
654
		inJournal.setTitle(reference);
655
		result.setInReference(inJournal);
656
		return result;
657
	}
658
	
659
	private Reference parseBookSection(String reference){
660
		Reference result = ReferenceFactory.newBookSection();
661
		String[] parts = reference.split(referenceAuthorSeparator, 2);
662
		if (parts.length != 2){
663
			logger.warn("Unexpected number of parts");
664
			result.setTitleCache(reference,true);
665
		}else{
666
			String authorString = parts[0];
667
			String bookString = parts[1];
668
			
669
			TeamOrPersonBase<?> authorTeam = author(authorString);
670
			IBook inBook = parseBook(bookString);
671
			inBook.setAuthorTeam(authorTeam);
672
			result.setInBook(inBook);
673
		}
674
		return result;
675
	}
676
	
677
	/**
678
	 * If the publication date of a book section and it's inBook do differ this is usually 
679
	 * caused by the fact that a book has been published during a period, because originally 
680
	 * it consisted of several parts that only later where put together to one book.
681
	 * If so, the book section's publication date may be a point in time (year or month of year)
682
	 * whereas the books publication date may be a period of several years.
683
	 * Therefore a valid nomenclatural reference string should use the book sections 
684
	 * publication date rather then the book's publication date.<BR>
685
	 * This method in general adds the publication date to the book section.
686
	 * An exception exists if the publication date is a period. Then the parser
687
	 * assumes that the nomenclatural reference string does not follow the above rule but
688
	 * the books publication date is set.
689
	 * @param bookSection
690
	 * @param datePublished
691
	 */
692
	private void handleBookSectionYear(IBookSection bookSection, TimePeriod datePublished){
693
		if (datePublished == null || datePublished.getStart() == null || bookSection == null){
694
			return;
695
		}
696
		if (datePublished.isPeriod() && bookSection.getInBook() != null){
697
			bookSection.getInBook().setDatePublished(datePublished);
698
		}else{
699
			bookSection.setDatePublished(datePublished);	
700
		}
701
	}
702
	
703
	
704
	/* (non-Javadoc)
705
	 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseFullName(java.lang.String)
706
	 */
707
	public NonViralName parseFullName(String fullNameString){
708
		return parseFullName(fullNameString, null, null);
709
	}
710
	
711
	
712
	/* (non-Javadoc)
713
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
714
	 */
715
	public NonViralName parseFullName(String fullNameString, NomenclaturalCode nomCode, Rank rank) {
716
		
717
		if (fullNameString == null){
718
			return null;
719
		}else{
720
			NonViralName result = getNonViralNameInstance(fullNameString, nomCode, rank);
721
			parseFullName(result, fullNameString, rank, false);
722
			return result;
723
		}
724
	}
725
		
726
	
727
	public void parseFullName(NonViralName nameToBeFilled, String fullNameString, Rank rank, boolean makeEmpty) {
728
		//TODO prol. etc.
729
		boolean hasCheckRankProblem = false; //was rank guessed in a previous parsing process?
730
		if (nameToBeFilled == null){
731
			logger.warn("name is null!");
732
		}else{
733
			hasCheckRankProblem = nameToBeFilled.hasProblem(ParserProblem.CheckRank);
734
			nameToBeFilled.removeParsingProblem(ParserProblem.CheckRank);
735
		}
736
		String authorString = null;
737
		
738
		if (fullNameString == null){
739
			return;
740
		}
741
		
742
		if (makeEmpty){
743
			makeEmpty(nameToBeFilled);
744
		}
745
		fullNameString.replaceAll(oWs , " ");
746
		//TODO 
747
		// OLD: fullName = oWsRE.subst(fullName, " "); //substitute multiple whitespaces		   
748
		fullNameString = fullNameString.trim();
749
		
750
		fullNameString = removeHybridBlanks(fullNameString);
751
		String[] epi = pattern.split(fullNameString);
752
		try {
753
	    	//cultivars //TODO 2 implement cultivars
754
//		    if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
755
//		    	result = parseCultivar(fullName);
756
//		    }
757
		    //hybrids //TODO 2 implement hybrids
758
		    //else 
759
//		    if (hybridPattern.matcher(fullNameString).find() ){
760
//		    	parseHybrid(nameToBeFilled, fullNameString, rank, makeEmpty);
761
//		    } else 
762
		      if (genusOrSupraGenusPattern.matcher(fullNameString).matches()){
763
		    	//supraGeneric
764
				if (rank != null && ! hasCheckRankProblem  && (rank.isSupraGeneric()|| rank.isGenus())){
765
					nameToBeFilled.setRank(rank);
766
					nameToBeFilled.setGenusOrUninomial(epi[0]);
767
				} 
768
				 //genus or guess rank
769
				 else {
770
					rank = guessUninomialRank(nameToBeFilled, epi[0]); 
771
					nameToBeFilled.setRank(rank);
772
					nameToBeFilled.setGenusOrUninomial(epi[0]);
773
					nameToBeFilled.addParsingProblem(ParserProblem.CheckRank);
774
					nameToBeFilled.setProblemStarts(0);
775
					nameToBeFilled.setProblemEnds(epi[0].length());
776
				}
777
				authorString = fullNameString.substring(epi[0].length());
778
			}
779
			 //infra genus
780
			 else if (infraGenusPattern.matcher(fullNameString).matches()){
781
				Rank infraGenericRank;
782
				if ("[unranked]".equals(epi[1])){
783
					infraGenericRank = Rank.INFRAGENERICTAXON();
784
				}else{
785
					infraGenericRank = Rank.getRankByAbbreviation(epi[1]);
786
				}
787
				nameToBeFilled.setRank(infraGenericRank);
788
				nameToBeFilled.setGenusOrUninomial(epi[0]);
789
				nameToBeFilled.setInfraGenericEpithet(epi[2]);
790
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length()+ 1 + epi[2].length());
791
			}
792
			 //aggr. or group
793
			 else if (aggrOrGroupPattern.matcher(fullNameString).matches()){
794
				nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[2]));
795
				nameToBeFilled.setGenusOrUninomial(epi[0]);
796
				nameToBeFilled.setSpecificEpithet(epi[1]);
797
			}
798
			 //species
799
			 else if (speciesPattern.matcher(fullNameString).matches()){
800
				nameToBeFilled.setRank(Rank.SPECIES());
801
				nameToBeFilled.setGenusOrUninomial(epi[0]);
802
				nameToBeFilled.setSpecificEpithet(epi[1]);
803
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length());
804
			}
805
			 //autonym
806
			 else if (autonymPattern.matcher(fullNameString).matches()){
807
				nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[epi.length - 2]));
808
				nameToBeFilled.setGenusOrUninomial(epi[0]);
809
				nameToBeFilled.setSpecificEpithet(epi[1]);
810
				nameToBeFilled.setInfraSpecificEpithet(epi[epi.length - 1]);
811
				int lenSpecies = 2 + epi[0].length()+epi[1].length();
812
				int lenInfraSpecies =  2 + epi[epi.length - 2].length() + epi[epi.length - 1].length();
813
				authorString = fullNameString.substring(lenSpecies, fullNameString.length() - lenInfraSpecies);
814
			}
815
			 //infraSpecies
816
			 else if (infraSpeciesPattern.matcher(fullNameString).matches()){
817
				String infraSpecRankEpi = epi[2];
818
				String infraSpecEpi = epi[3];
819
				if ("tax.".equals(infraSpecRankEpi)){
820
					infraSpecRankEpi += " " +  epi[3];
821
					infraSpecEpi = epi[4];
822
				}
823
				Rank infraSpecificRank;
824
				if ("[unranked]".equals(infraSpecRankEpi)){
825
					infraSpecificRank = Rank.INFRASPECIFICTAXON();
826
				}else{
827
					infraSpecificRank = Rank.getRankByAbbreviation(infraSpecRankEpi);
828
				}
829
				nameToBeFilled.setRank(infraSpecificRank);
830
				nameToBeFilled.setGenusOrUninomial(epi[0]);
831
				nameToBeFilled.setSpecificEpithet(epi[1]);
832
				nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
833
				authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecRankEpi.length() + 1 + infraSpecEpi.length());
834
			}//old infraSpecies
835
			 else if (oldInfraSpeciesPattern.matcher(fullNameString).matches()){
836
				boolean implemented = false;
837
				if (implemented){
838
					nameToBeFilled.setRank(Rank.getRankByNameOrAbbreviation(epi[2]));
839
					nameToBeFilled.setGenusOrUninomial(epi[0]);
840
					nameToBeFilled.setSpecificEpithet(epi[1]);
841
					//TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
842
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + epi[2].length() + 1 + epi[3].length());
843
				}else{
844
					nameToBeFilled.addParsingProblem(ParserProblem.OldInfraSpeciesNotSupported);
845
					nameToBeFilled.setTitleCache(fullNameString,true);
846
					// FIXME Quick fix, otherwise search would not deilver results for unparsable names
847
					nameToBeFilled.setNameCache(fullNameString,true);
848
					// END
849
					logger.info("Name string " + fullNameString + " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
850
				}
851
			}
852
		     //hybrid formula
853
			 else if (hybridFormulaPattern.matcher(fullNameString).matches()){
854
				 String firstNameString = "";
855
				 String secondNameString = "";
856
				 boolean isFirstName = true;
857
				 for (String str : epi){
858
					 if (str.matches(hybridSign)){
859
						 isFirstName = false;
860
					 }else if(isFirstName){
861
						 firstNameString += " " + str;
862
					 }else {
863
						 secondNameString += " " + str;
864
					 }
865
				 }
866
				 nameToBeFilled.setHybridFormula(true);
867
				 NomenclaturalCode code = nameToBeFilled.getNomenclaturalCode();
868
				 NonViralName firstName = this.parseFullName(firstNameString.trim(), code, rank);
869
				 NonViralName secondName = this.parseFullName(secondNameString.trim(), code, rank);
870
				 nameToBeFilled.addHybridParent(firstName, HybridRelationshipType.FIRST_PARENT(), null);
871
				 nameToBeFilled.addHybridParent(secondName, HybridRelationshipType.SECOND_PARENT(), null);
872
				 Rank newRank;
873
				 Rank firstRank = firstName.getRank();
874
				 Rank secondRank = secondName.getRank();
875
				 
876
				 if (firstRank == null || firstRank.isHigher(secondRank)){
877
					 newRank = secondRank;
878
				 }else{
879
					 newRank = firstRank;
880
				 }
881
				 nameToBeFilled.setRank(newRank);
882
			 }
883
		    //none
884
			else{ 
885
				nameToBeFilled.addParsingProblem(ParserProblem.UnparsableNamePart);
886
				nameToBeFilled.setTitleCache(fullNameString,true);
887
				// FIXME Quick fix, otherwise search would not deilver results for unparsable names
888
				nameToBeFilled.setNameCache(fullNameString,true);
889
				// END
890
				logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
891
		    }
892
		    //hybrid bits	
893
		    handleHybridBits(nameToBeFilled);
894
			//authors
895
		    if (nameToBeFilled != null && StringUtils.isNotBlank(authorString) ){ 
896
				handleAuthors(nameToBeFilled, fullNameString, authorString);
897
			}	
898
			//return
899
			if (nameToBeFilled != null){
900
		    	//return(BotanicalName)result;
901
				return;
902
			}
903
		} catch (UnknownCdmTypeException e) {
904
			nameToBeFilled.addParsingProblem(ParserProblem.RankNotSupported);
905
			nameToBeFilled.setTitleCache(fullNameString,true);
906
			// FIXME Quick fix, otherwise search would not deilver results for unparsable names
907
			nameToBeFilled.setNameCache(fullNameString,true);
908
			// END
909
			logger.info("unknown rank (" + (rank == null? "null":rank) + ") or abbreviation in string " +  fullNameString);
910
			//return result;
911
			return;
912
		}
913
	}
914

    
915
	private void handleHybridBits(NonViralName nameToBeFilled) {
916
		//uninomial
917
		String uninomial = CdmUtils.Nz(nameToBeFilled.getGenusOrUninomial());
918
		boolean isUninomialHybrid = uninomial.startsWith(hybridSign);
919
		if (isUninomialHybrid){
920
			nameToBeFilled.setMonomHybrid(true);
921
			nameToBeFilled.setGenusOrUninomial(uninomial.replace(hybridSign, ""));
922
		}
923
		//infrageneric
924
		String infrageneric = CdmUtils.Nz(nameToBeFilled.getInfraGenericEpithet());
925
		boolean isInfraGenericHybrid = infrageneric.startsWith(hybridSign);
926
		if (isInfraGenericHybrid){
927
			nameToBeFilled.setBinomHybrid(true);
928
			nameToBeFilled.setInfraGenericEpithet(infrageneric.replace(hybridSign, ""));
929
		}
930
		//species Epi
931
		String speciesEpi = CdmUtils.Nz(nameToBeFilled.getSpecificEpithet());
932
		boolean isSpeciesHybrid = speciesEpi.startsWith(hybridSign);
933
		if (isSpeciesHybrid){
934
			if (StringUtils.isBlank(infrageneric)){
935
				nameToBeFilled.setBinomHybrid(true);
936
			}else{
937
				nameToBeFilled.setTrinomHybrid(true);
938
			}
939
			nameToBeFilled.setSpecificEpithet(speciesEpi.replace(hybridSign, ""));
940
		}
941
		//infra species
942
		String infraSpeciesEpi = CdmUtils.Nz(nameToBeFilled.getInfraSpecificEpithet());
943
		boolean isInfraSpeciesHybrid = infraSpeciesEpi.startsWith(hybridSign);
944
		if (isInfraSpeciesHybrid){
945
			nameToBeFilled.setTrinomHybrid(true);
946
			nameToBeFilled.setInfraSpecificEpithet(infraSpeciesEpi.replace(hybridSign, ""));
947
		}
948
		
949
	}
950

    
951
	private String removeHybridBlanks(String fullNameString) {
952
		fullNameString = fullNameString.replaceAll(hybridFull, " "+hybridSign).trim();
953
		return fullNameString;
954
	}
955

    
956
	/**
957
	 * Author parser for external use
958
	 * @param nonViralName
959
	 * @param authorString
960
	 * @throws StringNotParsableException
961
	 */
962
	public void parseAuthors(NonViralName nonViralName, String authorString) throws StringNotParsableException{
963
		TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
964
		Integer[] years = new Integer[4];
965
		Class<? extends NonViralName> clazz = nonViralName.getClass();
966
		fullAuthors(authorString, authors, years, clazz);
967
		nonViralName.setCombinationAuthorTeam(authors[0]);
968
		nonViralName.setExCombinationAuthorTeam(authors[1]);
969
		nonViralName.setBasionymAuthorTeam(authors[2]);
970
		nonViralName.setExBasionymAuthorTeam(authors[3]);
971
		if (nonViralName instanceof ZoologicalName){
972
			ZoologicalName zooName = CdmBase.deproxy(nonViralName, ZoologicalName.class);
973
			zooName.setPublicationYear(years[0]);
974
			zooName.setOriginalPublicationYear(years[2]);
975
		}
976
	}
977
	
978
	/**
979
	 * @param nameToBeFilled
980
	 * @param fullNameString
981
	 * @param authorString
982
	 */
983
	public void handleAuthors(NonViralName nameToBeFilled, String fullNameString, String authorString) {
984
		TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
985
		Integer[] years = new Integer[4];
986
		try {
987
			Class<? extends NonViralName> clazz = nameToBeFilled.getClass();
988
			fullAuthors(authorString, authors, years, clazz);
989
		} catch (StringNotParsableException e) {
990
			nameToBeFilled.addParsingProblem(ParserProblem.UnparsableAuthorPart);
991
			nameToBeFilled.setTitleCache(fullNameString,true);
992
			// FIXME Quick fix, otherwise search would not deliver results for unparsable names
993
			nameToBeFilled.setNameCache(fullNameString,true);
994
			// END
995
			logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");;
996
		}
997
		nameToBeFilled.setCombinationAuthorTeam(authors[0]);
998
		nameToBeFilled.setExCombinationAuthorTeam(authors[1]);
999
		nameToBeFilled.setBasionymAuthorTeam(authors[2]);
1000
		nameToBeFilled.setExBasionymAuthorTeam(authors[3]);
1001
		if (nameToBeFilled instanceof ZoologicalName){
1002
			ZoologicalName zooName = (ZoologicalName)nameToBeFilled;
1003
			zooName.setPublicationYear(years[0]);
1004
			zooName.setOriginalPublicationYear(years[2]);
1005
		}
1006
	}
1007

    
1008
	
1009
	
1010
	/**
1011
	 * Guesses the rank of uninomial depending on the typical endings for ranks
1012
	 * @param nameToBeFilled
1013
	 * @param string
1014
	 */
1015
	private Rank guessUninomialRank(NonViralName nameToBeFilled, String uninomial) {
1016
		Rank result = Rank.GENUS();
1017
		if (nameToBeFilled.isInstanceOf(BotanicalName.class)){
1018
			if (false){
1019
				//
1020
			}else if (uninomial.endsWith("phyta") || uninomial.endsWith("mycota") ){  //plants, fungi
1021
				result = Rank.SECTION_BOTANY();
1022
			}else if (uninomial.endsWith("bionta")){
1023
				result = Rank.SUBKINGDOM();  //TODO
1024
			}else if (uninomial.endsWith("phytina")|| uninomial.endsWith("mycotina")  ){  //plants, fungi
1025
				result = Rank.SUBSECTION_BOTANY();
1026
			}else if (uninomial.endsWith("opsida") || uninomial.endsWith("phyceae") || uninomial.endsWith("mycetes")){  //plants, algae, fungi
1027
				result = Rank.CLASS();
1028
			}else if (uninomial.endsWith("idae") || uninomial.endsWith("phycidae") || uninomial.endsWith("mycetidae")){ //plants, algae, fungi
1029
				result = Rank.SUBCLASS();
1030
			}else if (uninomial.endsWith("ales")){
1031
				result = Rank.ORDER();
1032
			}else if (uninomial.endsWith("ineae")){
1033
				result = Rank.SUBORDER();
1034
			}else if (uninomial.endsWith("aceae")){
1035
					result = Rank.FAMILY();
1036
			}else if (uninomial.endsWith("oideae")){
1037
				result = Rank.SUBFAMILY();
1038
			}else if (uninomial.endsWith("eae")){
1039
				result = Rank.TRIBE();
1040
			}else if (uninomial.endsWith("inae")){
1041
				result = Rank.SUBTRIBE();
1042
			}else if (uninomial.endsWith("ota")){
1043
				result = Rank.KINGDOM();  //TODO
1044
			}
1045
		}else if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
1046
			if (false){
1047
				//
1048
			}else if (uninomial.endsWith("oideae")){
1049
				result = Rank.SUPERFAMILY();
1050
			}else if (uninomial.endsWith("idae")){
1051
					result = Rank.FAMILY();
1052
			}else if (uninomial.endsWith("inae")){
1053
				result = Rank.SUBFAMILY();
1054
			}else if (uninomial.endsWith("inae")){
1055
				result = Rank.SUBFAMILY();
1056
			}else if (uninomial.endsWith("ini")){
1057
				result = Rank.TRIBE();
1058
			}else if (uninomial.endsWith("ina")){
1059
				result = Rank.SUBTRIBE();
1060
			}
1061
		}else{
1062
			//
1063
		}
1064
		return result;
1065
	}
1066

    
1067
	/**
1068
	 * Parses the fullAuthorString
1069
	 * @param fullAuthorString
1070
	 * @return array of Teams containing the Team[0], 
1071
	 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
1072
	 */
1073
	protected void fullAuthors (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years, Class<? extends NonViralName> clazz)
1074
			throws StringNotParsableException{
1075
		if (fullAuthorString == null || clazz == null){
1076
			return;
1077
		}
1078
		fullAuthorString = fullAuthorString.trim();
1079
		
1080
		//Botanic
1081
		if ( BotanicalName.class.isAssignableFrom(clazz) ){
1082
			if (! fullBotanicAuthorStringPattern.matcher(fullAuthorString).matches() ){
1083
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1084
			}
1085
		}
1086
		//Zoo
1087
		else if ( ZoologicalName.class.isAssignableFrom(clazz) ){
1088
			if (! fullZooAuthorStringPattern.matcher(fullAuthorString).matches() ){
1089
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1090
			}
1091
		}else {
1092
			//TODO
1093
			logger.warn ("Full author String parsable only for defined BotanicalNames or ZoologicalNames but this is " + clazz.getSimpleName());
1094
			throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1095
		}
1096
		fullAuthorsChecked(fullAuthorString, authors, years);
1097
	}
1098
	
1099
	/*
1100
	 * like fullTeams but without trim and match check
1101
	 */
1102
	protected void fullAuthorsChecked (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years){
1103
		int authorTeamStart = 0;
1104
		Matcher basionymMatcher = basionymPattern.matcher(fullAuthorString);
1105
		
1106
		if (basionymMatcher.find(0)){
1107
			
1108
			String basString = basionymMatcher.group();
1109
			basString = basString.replaceFirst(basStart, "");
1110
			basString = basString.replaceAll(basEnd, "").trim();
1111
			authorTeamStart = basionymMatcher.end(1) + 1;
1112
			
1113
			TeamOrPersonBase<?>[] basAuthors = new TeamOrPersonBase[2];
1114
			Integer[] basYears = new Integer[2];
1115
			authorsAndEx(basString, basAuthors, basYears);
1116
			authors[2]= basAuthors[0];
1117
			years[2] = basYears[0];
1118
			authors[3]= basAuthors[1];
1119
			years[3] = basYears[1];
1120
		}
1121
		if (fullAuthorString.length() >= authorTeamStart){
1122
			TeamOrPersonBase<?>[] combinationAuthors = new TeamOrPersonBase[2];;
1123
			Integer[] combinationYears = new Integer[2];
1124
			authorsAndEx(fullAuthorString.substring(authorTeamStart), combinationAuthors, combinationYears);
1125
			authors[0]= combinationAuthors[0] ;
1126
			years[0] = combinationYears[0];
1127
			authors[1]= combinationAuthors[1];
1128
			years[1] = combinationYears[1];
1129
		}
1130
	}
1131
	
1132
	
1133
	/**
1134
	 * Parses the author and ex-author String
1135
	 * @param authorTeamString String representing the author and the ex-author team
1136
	 * @return array of Teams containing the Team[0] and the ExTeam[1]
1137
	 */
1138
	protected void authorsAndEx (String authorTeamString, TeamOrPersonBase<?>[] authors, Integer[] years){
1139
		//TODO noch allgemeiner am anfang durch Replace etc. 
1140
		authorTeamString = authorTeamString.trim();
1141
		authorTeamString = authorTeamString.replaceFirst(oWs + "ex" + oWs, " ex. " ); 
1142
		//int authorEnd = authorTeamString.length();
1143
		int authorBegin = 0;
1144
		
1145
		Matcher exAuthorMatcher = exAuthorPattern.matcher(authorTeamString);
1146
		if (exAuthorMatcher.find(0)){
1147
			authorBegin = exAuthorMatcher.end(0);
1148
			int exAuthorEnd = exAuthorMatcher.start(0);
1149
			String exString = authorTeamString.substring(0, exAuthorEnd).trim();
1150
			authors [1] = author(exString);
1151
		}
1152
		zooOrBotanicAuthor(authorTeamString.substring(authorBegin), authors, years );
1153
	}
1154
	
1155
	/**
1156
	 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
1157
	 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies 
1158
	 * to the year in case of an zoological name. 
1159
	 * @param authorString
1160
	 * @param team
1161
	 * @param year
1162
	 */
1163
	protected void zooOrBotanicAuthor(String authorString, TeamOrPersonBase<?>[] team, Integer[] year){
1164
		if (authorString == null){ 
1165
			return;
1166
		}else if ((authorString = authorString.trim()).length() == 0){
1167
			return;
1168
		}
1169
		Matcher zooAuthorAddidtionMatcher = zooAuthorAddidtionPattern.matcher(authorString);
1170
		if (zooAuthorAddidtionMatcher.find()){
1171
			int index = zooAuthorAddidtionMatcher.start(0); 
1172
			String strYear = authorString.substring(index);
1173
			strYear = strYear.replaceAll(zooAuthorYearSeperator, "").trim();
1174
			year[0] = Integer.valueOf(strYear);
1175
			authorString = authorString.substring(0, index).trim();
1176
		}
1177
		team[0] = author(authorString);
1178
	}
1179
	
1180
	
1181
	/**
1182
	 * Parses an authorTeam String and returns the Team 
1183
	 * !!! TODO (atomization not yet implemented)
1184
	 * @param authorTeamString String representing the author team
1185
	 * @return an Team 
1186
	 */
1187
	protected TeamOrPersonBase<?> author (String authorString){
1188
		if (authorString == null){ 
1189
			return null;
1190
		}else if ((authorString = authorString.trim()).length() == 0){
1191
			return null;
1192
		}else if (! teamSplitterPattern.matcher(authorString).find() && ! authorIsAlwaysTeam){
1193
			//1 Person
1194
			Person result = Person.NewInstance();
1195
			result.setNomenclaturalTitle(authorString);
1196
			return result;
1197
		}else{
1198
			return parsedTeam(authorString);
1199
		} 
1200
		
1201
	}
1202
	
1203
	/**
1204
	 * Parses an authorString (reprsenting a team into the single authors and add
1205
	 * them to the return Team.
1206
	 * @param authorString
1207
	 * @return Team
1208
	 */
1209
	protected Team parsedTeam(String authorString){
1210
		Team result = Team.NewInstance();
1211
		String[] authors = authorString.split(teamSplitter);
1212
		for (String author : authors){
1213
			Person person = Person.NewInstance();
1214
			person.setNomenclaturalTitle(author);
1215
			result.addTeamMember(person); 
1216
		}
1217
		return result;
1218
	}
1219
	
1220

    
1221
	//Parsing of the given full name that has been identified as hybrid already somewhere else.
1222
	private void parseHybrid(NonViralName nameToBeFilled, String fullNameString, Rank rank, boolean makeEmpty){
1223
	    logger.warn("parseHybrid --> function not yet implemented");
1224
	    
1225
//	    String nonHybridName  = fullNameString;
1226
//	    boolean isMonomHybrid = isMonomHybrid(fullNameString);
1227
//	    if (isMonomHybrid){
1228
//	    	nonHybridName.replaceAll(hybrid, "");
1229
//	    }
1230
//	    
1231
//	    String[] split = nonHybridName.split("\\s");
1232
//	    parseFullName(nameToBeFilled, nonHybridName, rank, makeEmpty);
1233
//	    
1234
//	    nonHybridName = nonHybridName.replaceAll(hybrid, " ");
1235
//
1236
//	    boolean isBinomHybrid = isBinomHybrid(split);
1237
////	    boolean isTrinomHybrid = isTrinomHybrid(split);
1238
//	    
1239
//	    nonHybridName = nonHybridName.replaceAll(hybrid, " ");
1240
//	     
1241
//	    parseFullName(nameToBeFilled, nonHybridName, rank, makeEmpty);
1242
//	    nameToBeFilled.getTitleCache();
1243
//	    nameToBeFilled.setMonomHybrid(isMonomHybrid);
1244
//	    nameToBeFilled.setBinomHybrid(isBinomHybrid);
1245
//	    nameToBeFilled.setBinomHybrid(isTrinomHybrid);
1246
	    
1247
	    nameToBeFilled.setTitleCache(fullNameString,true);
1248
	    return;
1249
    }
1250
	
1251
//	private boolean isBinomHybrid(String[] split) {
1252
//		if (){
1253
//			
1254
//		}
1255
//		return false;
1256
//	}
1257

    
1258
	private boolean isMonomHybrid(String fullNameString) {
1259
		Matcher matcher = hybridPattern.matcher(fullNameString);
1260
		boolean find = matcher.find();
1261
		int start = matcher.start();
1262
		if (find == true && start == 0){
1263
			return true;
1264
		}else{
1265
			return false;
1266
		}
1267
	}
1268

    
1269
//	// Parsing of the given full name that has been identified as a cultivar already somwhere else.
1270
//	// The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
1271
	public BotanicalName parseCultivar(String fullName)	throws StringNotParsableException{
1272
		CultivarPlantName result = null;
1273
		    String[] words = oWsPattern.split(fullName);
1274
			
1275
		    /* ---------------------------------------------------------------------------------
1276
		     * cultivar
1277
		     * ---------------------------------------------------------------------------------*/
1278
			if (fullName.indexOf(" '") != 0){
1279
				//TODO location of 'xx' is probably not arbitrary
1280
				Matcher cultivarMatcher = cultivarPattern.matcher(fullName);
1281
				if (cultivarMatcher.find()){
1282
					String namePart = fullName.replaceFirst(cultivar, "");
1283
					
1284
					String cultivarPart = cultivarMatcher.group(0).replace("'","").trim();
1285
					//OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
1286
					
1287
					result = (CultivarPlantName)parseFullName(namePart);
1288
					result.setCultivarName(cultivarPart);
1289
				}	
1290
			}else if (fullName.indexOf(" cv.") != 0){
1291
				// cv. is old form (not official) 
1292
				throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
1293
			}
1294
				
1295
		    /* ---------------------------------------------------------------------------------
1296
		     * cultivar group
1297
		     * ---------------------------------------------------------------------------------
1298
		     */ 
1299
			// TODO in work 
1300
			//Ann. this is not the official way of noting cultivar groups
1301
		    String group = oWs + "Group" + oWs + capitalEpiWord + end;
1302
			Pattern groupRE = Pattern.compile(group);
1303
			Matcher groupMatcher = groupRE.matcher(fullName);
1304
			if (groupMatcher.find()){
1305
		    	if (! words[words.length - 2].equals("group")){
1306
		            throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName + " should be 'group'");
1307
		        }else{
1308
		        	
1309
		        	String namePart = fullName.substring(0, groupMatcher.start(0) - 0);
1310
		        	//OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
1311
		        	
1312
		        	String cultivarPart = words[words.length -1];
1313
		        	result = (CultivarPlantName)parseFullName(namePart);
1314
		        	if (result != null){
1315
		        		result.setCultivarName(cultivarPart);
1316
			        	
1317
		        		//OLD: result.setCultivarGroupName(cultivarPart);
1318
		        	}
1319
		        }
1320

    
1321
		    }
1322
//		    // ---------------------------------------------------------------------------------
1323
//		    if ( result = "" ){
1324
//		        return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
1325
//		    }else{
1326
//		        return result;
1327
	//	    }
1328
			return result; //TODO
1329
	}
1330

    
1331
	
1332
	private void makeEmpty(NonViralName nameToBeFilled){
1333
		nameToBeFilled.setRank(null);
1334
		nameToBeFilled.setTitleCache(null, false);
1335
		nameToBeFilled.setFullTitleCache(null, false);
1336
		nameToBeFilled.setNameCache(null, false);
1337
				
1338
		nameToBeFilled.setAppendedPhrase(null);
1339
		nameToBeFilled.setBasionymAuthorTeam(null);
1340
		nameToBeFilled.setCombinationAuthorTeam(null);
1341
		nameToBeFilled.setExBasionymAuthorTeam(null);
1342
		nameToBeFilled.setExCombinationAuthorTeam(null);
1343
		nameToBeFilled.setAuthorshipCache(null, false);
1344
		
1345
		
1346
		//delete problems except check rank
1347
		makeProblemEmpty(nameToBeFilled);
1348
				
1349
		// TODO ?
1350
		//nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
1351

    
1352
		
1353
		nameToBeFilled.setGenusOrUninomial(null);
1354
		nameToBeFilled.setInfraGenericEpithet(null);
1355
		nameToBeFilled.setSpecificEpithet(null);
1356
		nameToBeFilled.setInfraSpecificEpithet(null);
1357
		
1358
		nameToBeFilled.setNomenclaturalMicroReference(null);
1359
		nameToBeFilled.setNomenclaturalReference(null);
1360
		
1361
		nameToBeFilled.setHybridFormula(false);
1362
		nameToBeFilled.setMonomHybrid(false);
1363
		nameToBeFilled.setBinomHybrid(false);
1364
		nameToBeFilled.setTrinomHybrid(false);
1365
		
1366
		if (nameToBeFilled.isInstanceOf(BotanicalName.class)){
1367
			BotanicalName botanicalName = (BotanicalName)nameToBeFilled;
1368
			botanicalName.setAnamorphic(false);
1369
		}
1370
		
1371
		if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
1372
			ZoologicalName zoologicalName = (ZoologicalName)nameToBeFilled;
1373
			zoologicalName.setBreed(null);
1374
			zoologicalName.setOriginalPublicationYear(null);
1375
			
1376
		}
1377
	}
1378
	
1379
	
1380
    
1381
}
(2-2/5)