Project

General

Profile

Download (57.5 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.strategy.parser;
11

    
12
import java.util.HashSet;
13
import java.util.Set;
14
import java.util.regex.Matcher;
15
import java.util.regex.Pattern;
16

    
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.log4j.Logger;
19
import org.joda.time.DateTimeFieldType;
20
import org.joda.time.Partial;
21

    
22
import eu.etaxonomy.cdm.common.CdmUtils;
23
import eu.etaxonomy.cdm.common.UTF8;
24
import eu.etaxonomy.cdm.model.agent.Person;
25
import eu.etaxonomy.cdm.model.agent.Team;
26
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.IParsable;
29
import eu.etaxonomy.cdm.model.common.TimePeriod;
30
import eu.etaxonomy.cdm.model.name.HybridRelationship;
31
import eu.etaxonomy.cdm.model.name.HybridRelationshipType;
32
import eu.etaxonomy.cdm.model.name.IBotanicalName;
33
import eu.etaxonomy.cdm.model.name.ICultivarPlantName;
34
import eu.etaxonomy.cdm.model.name.INonViralName;
35
import eu.etaxonomy.cdm.model.name.IZoologicalName;
36
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
37
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
38
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
39
import eu.etaxonomy.cdm.model.name.Rank;
40
import eu.etaxonomy.cdm.model.name.TaxonName;
41
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
42
import eu.etaxonomy.cdm.model.reference.IBook;
43
import eu.etaxonomy.cdm.model.reference.IBookSection;
44
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
45
import eu.etaxonomy.cdm.model.reference.IVolumeReference;
46
import eu.etaxonomy.cdm.model.reference.Reference;
47
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
48
import eu.etaxonomy.cdm.model.reference.ReferenceType;
49
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
50
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
51

    
52

    
53
/**
54
 * @author a.mueller
55
 *
56
 */
57
public class NonViralNameParserImpl
58
            extends NonViralNameParserImplRegExBase
59
            implements INonViralNameParser<INonViralName> {
60
	private static final Logger logger = Logger.getLogger(NonViralNameParserImpl.class);
61

    
62
	// good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
63

    
64
	final static boolean MAKE_EMPTY = true;
65
	final static boolean MAKE_NOT_EMPTY = false;
66

    
67
	private final boolean authorIsAlwaysTeam = false;
68

    
69
	public static NonViralNameParserImpl NewInstance(){
70
		return new NonViralNameParserImpl();
71
	}
72

    
73
	@Override
74
    public INonViralName parseSimpleName(String simpleName){
75
		return parseSimpleName(simpleName, null, null);
76
	}
77

    
78
	@Override
79
    public INonViralName parseSimpleName(String simpleName, NomenclaturalCode code, Rank rank){
80
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
81
		return parseFullName(simpleName, code, rank);
82
	}
83

    
84
	public void parseSimpleName(INonViralName nameToBeFilled, String simpleNameString, Rank rank, boolean makeEmpty){
85
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
86
		parseFullName(nameToBeFilled, simpleNameString, rank, makeEmpty);
87
	}
88

    
89
	public INonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code){
90
		return getNonViralNameInstance(fullString, code, null);
91
	}
92

    
93
	public INonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code, Rank rank){
94
		INonViralName result = null;
95
		if(code ==null) {
96
			boolean isBotanicalName = anyBotanicFullNamePattern.matcher(fullString).find();
97
			boolean isZoologicalName = anyZooFullNamePattern.matcher(fullString).find();;
98
			boolean isBacteriologicalName = false;
99
			boolean isCultivatedPlantName = false;
100
			if ( (isBotanicalName || isCultivatedPlantName) && ! isZoologicalName && !isBacteriologicalName){
101
				if (isBotanicalName){
102
					result = TaxonNameFactory.NewBotanicalInstance(rank);
103
				}else{
104
					result = TaxonNameFactory.NewCultivarInstance(rank);
105
				}
106
			}else if ( isZoologicalName /*&& ! isBotanicalName*/ && !isBacteriologicalName && !isCultivatedPlantName){
107
				result = TaxonNameFactory.NewZoologicalInstance(rank);
108
			}else if ( isZoologicalName && ! isBotanicalName && !isBacteriologicalName && !isCultivatedPlantName){
109
				result = TaxonNameFactory.NewBacterialInstance(rank);
110
			}else {
111
				result =  TaxonNameFactory.NewNonViralInstance(rank);
112
			}
113
		} else {
114
			switch (code) {
115
			case ICNAFP:
116
				result = TaxonNameFactory.NewBotanicalInstance(rank);
117
				break;
118
			case ICZN:
119
				result = TaxonNameFactory.NewZoologicalInstance(rank);
120
				break;
121
			case ICNCP:
122
				logger.warn("ICNCP parsing not yet implemented");
123
				result = TaxonNameFactory.NewCultivarInstance(rank);
124
				break;
125
			case ICNB:
126
				logger.warn("ICNB not yet implemented");
127
				result = TaxonNameFactory.NewBacterialInstance(rank);
128
				break;
129
			case ICVCN:
130
				logger.error("Viral name is not a NonViralName !!");
131
				break;
132
			default:
133
				// FIXME Unreachable code
134
				logger.error("Unknown Nomenclatural Code !!");
135
			}
136
		}
137
		return result;
138
	}
139

    
140
	@Override
141
    public TaxonName parseReferencedName(String fullReferenceString) {
142
		return parseReferencedName(fullReferenceString, null, null);
143
	}
144

    
145
	@Override
146
    public TaxonName parseReferencedName(String fullReferenceString, NomenclaturalCode nomCode, Rank rank) {
147
		if (fullReferenceString == null){
148
			return null;
149
		}else{
150
		    INonViralName result = getNonViralNameInstance(fullReferenceString, nomCode, rank);
151
			parseReferencedName(result, fullReferenceString, rank, MAKE_EMPTY);
152
			return TaxonName.castAndDeproxy(result);
153
		}
154
	}
155

    
156
	private String standardize(INonViralName nameToBeFilled, String fullReferenceString, boolean makeEmpty){
157
		//Check null and standardize
158
		if (fullReferenceString == null){
159
			//return null;
160
			return null;
161
		}
162
		if (makeEmpty){
163
			makeEmpty(nameToBeFilled);
164
		}
165
		fullReferenceString = fullReferenceString.replaceAll(oWs , " ");
166
		fullReferenceString = fullReferenceString.trim();
167
		if ("".equals(fullReferenceString)){
168
			fullReferenceString = null;
169
		}
170
		return fullReferenceString;
171
	}
172

    
173
	/**
174
	 * Returns the regEx to be used for the full-name depending on the code
175
	 * @param nameToBeFilled
176
	 * @return
177
	 */
178
	private String getCodeSpecificFullNameRegEx(INonViralName nameToBeFilledOrig){
179
	    INonViralName nameToBeFilled = CdmBase.deproxy(nameToBeFilledOrig);
180
		if (nameToBeFilled.isZoological()){
181
			return anyZooFullName;
182
		}else if (nameToBeFilled.isBotanical()) {
183
			return anyBotanicFullName;
184
		}else if (nameToBeFilled.isNonViral()) {
185
			return anyBotanicFullName;  //TODO ?
186
		}else{
187
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
188
			return null;
189
		}
190
	}
191

    
192
	/**
193
	 * Returns the regEx to be used for the fsimple-name depending on the code
194
	 * @param nameToBeFilled
195
	 * @return
196
	 */
197
	private String getCodeSpecificSimpleNameRegEx(INonViralName nameToBeFilled){
198
		nameToBeFilled = CdmBase.deproxy(nameToBeFilled);
199

    
200
		if (nameToBeFilled.isZoological()){
201
			return anyZooName;
202
		}else if (nameToBeFilled.isBotanical()) {
203
		    return anyBotanicName;
204
		}else if (nameToBeFilled.isNonViral()){
205
			return anyZooName;  //TODO ?
206
		}else{
207
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
208
			return null;
209
		}
210
	}
211

    
212
	private Matcher getMatcher(String regEx, String matchString){
213
		Pattern pattern = Pattern.compile(regEx);
214
		Matcher matcher = pattern.matcher(matchString);
215
		return matcher;
216
	}
217

    
218
	@Override
219
    public void parseReferencedName(INonViralName nameToBeFilled, String fullReferenceStringOrig, Rank rank, boolean makeEmpty) {
220
		//standardize
221
		String fullReferenceString = standardize(nameToBeFilled, fullReferenceStringOrig, makeEmpty);
222
		if (fullReferenceString == null){
223
			return;
224
		}
225
		// happens already in standardize(...)
226
//		makeProblemEmpty(nameToBeFilled);
227

    
228
		//make nomenclatural status and replace it by empty string
229
	    fullReferenceString = parseNomStatus(fullReferenceString, nameToBeFilled, makeEmpty);
230
	    nameToBeFilled.setProblemEnds(fullReferenceString.length());
231

    
232
	    //get full name reg
233
		String localFullNameRegEx = getCodeSpecificFullNameRegEx(nameToBeFilled);
234
		//get full name reg
235
		String localSimpleNameRegEx = getCodeSpecificSimpleNameRegEx(nameToBeFilled);
236

    
237
		//separate name and reference part
238
		String nameAndRefSeparatorRegEx = "(^" + localFullNameRegEx + ")("+ referenceSeperator + ")";
239
		Matcher nameAndRefSeparatorMatcher = getMatcher (nameAndRefSeparatorRegEx, fullReferenceString);
240

    
241
		Matcher onlyNameMatcher = getMatcher (localFullNameRegEx, fullReferenceString);
242
		Matcher hybridMatcher = hybridFormulaPattern.matcher(fullReferenceString);
243
		Matcher onlySimpleNameMatcher = getMatcher (localSimpleNameRegEx, fullReferenceString);
244

    
245
		if (onlyNameMatcher.matches()){
246
			makeEmpty = false;
247
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
248
		} else if (nameAndRefSeparatorMatcher.find()){
249
			makeNameWithReference(nameToBeFilled, fullReferenceString, nameAndRefSeparatorMatcher, rank, makeEmpty);
250
		}else if (hybridMatcher.matches() ){
251
		    //I do not remember why we need makeEmpty = false for onlyNameMatcher,
252
		    //but for hybridMatcher we need to remove old Hybrid Relationships if necessary, therefore
253
		    //I removed it from here
254
            parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
255
        }else if (onlySimpleNameMatcher.matches()){
256
			makeEmpty = false;
257
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);	//simpleName not yet implemented
258
		}else{
259
			makeNoFullRefMatch(nameToBeFilled, fullReferenceString, rank);
260
		}
261
		//problem handling. Start and end solved in subroutines
262
		if (! nameToBeFilled.hasProblem()){
263
			makeProblemEmpty(nameToBeFilled);
264
		}
265
	}
266

    
267
	private void makeProblemEmpty(IParsable parsable){
268
		boolean hasCheckRank = parsable.hasProblem(ParserProblem.CheckRank);
269
		parsable.setParsingProblem(0);
270
		if (hasCheckRank){
271
			parsable.addParsingProblem(ParserProblem.CheckRank);
272
		}
273
		parsable.setProblemStarts(-1);
274
		parsable.setProblemEnds(-1);
275
	}
276

    
277
	private void makeNoFullRefMatch(INonViralName nameToBeFilled, String fullReferenceString, Rank rank){
278
	    //try to parse first part as name, but keep in mind full string is not parsable
279
		int start = 0;
280

    
281
		String localFullName = getCodeSpecificFullNameRegEx(nameToBeFilled);
282
		Matcher fullNameMatcher = getMatcher (pStart + localFullName, fullReferenceString);
283
		if (fullNameMatcher.find()){
284
			String fullNameString = fullNameMatcher.group(0);
285
			nameToBeFilled.setProtectedNameCache(false);
286
			parseFullName(nameToBeFilled, fullNameString, rank, false);
287
			String sure = nameToBeFilled.getNameCache();
288
			start = sure.length();
289
		}
290

    
291
//		String localSimpleName = getLocalSimpleName(nameToBeFilled);
292
//		Matcher simpleNameMatcher = getMatcher (start + localSimpleName, fullReferenceString);
293
//		if (simpleNameMatcher.find()){
294
//			String simpleNameString = simpleNameMatcher.group(0);
295
//			parseFullName(nameToBeFilled, simpleNameString, rank, false);
296
//			start = simpleNameString.length();
297
//		}
298

    
299
		//don't parse if name can't be separated
300
		nameToBeFilled.addParsingProblem(ParserProblem.NameReferenceSeparation);
301
		nameToBeFilled.setTitleCache(fullReferenceString, true);
302
		nameToBeFilled.setFullTitleCache(fullReferenceString, true);
303
		// FIXME Quick fix, otherwise search would not deliver results for unparsable names
304
		nameToBeFilled.setNameCache(fullReferenceString, true);
305
		// END
306
		nameToBeFilled.setProblemStarts(start);
307
		nameToBeFilled.setProblemEnds(fullReferenceString.length());
308
		logger.info("no applicable parsing rule could be found for \"" + fullReferenceString + "\"");
309
	}
310

    
311
	private void makeNameWithReference(INonViralName nameToBeFilled,
312
			String fullReferenceString,
313
			Matcher nameAndRefSeparatorMatcher,
314
			Rank rank,
315
			boolean makeEmpty){
316

    
317
		String nameAndSeparator = nameAndRefSeparatorMatcher.group(0);
318
	    String name = nameAndRefSeparatorMatcher.group(1);
319
	    String referenceString = fullReferenceString.substring(nameAndRefSeparatorMatcher.end());
320

    
321
	    // is reference an in ref?
322
	    String separator = nameAndSeparator.substring(name.length());
323
		boolean isInReference = separator.matches(inReferenceSeparator);
324

    
325
	    //parse subparts
326

    
327
		int oldProblemEnds = nameToBeFilled.getProblemEnds();
328
		parseFullName(nameToBeFilled, name, rank, makeEmpty);
329
	    nameToBeFilled.setProblemEnds(oldProblemEnds);
330

    
331
		//zoological new combinations should not have a nom. reference to be parsed
332
	    if (nameToBeFilled.isZoological()){
333
			IZoologicalName zooName = (IZoologicalName)CdmBase.deproxy(nameToBeFilled);
334
			//is name new combination?
335
			if (zooName.getBasionymAuthorship() != null || zooName.getOriginalPublicationYear() != null){
336
				ParserProblem parserProblem = ParserProblem.NewCombinationHasPublication;
337
				zooName.addParsingProblem(parserProblem);
338
				nameToBeFilled.setProblemStarts((nameToBeFilled.getProblemStarts()> -1) ? nameToBeFilled.getProblemStarts(): name.length());
339
				nameToBeFilled.setProblemEnds(Math.max(fullReferenceString.length(), nameToBeFilled.getProblemEnds()));
340
			}
341
		}
342

    
343
	    parseReference(nameToBeFilled, referenceString, isInReference);
344
	    INomenclaturalReference ref = nameToBeFilled.getNomenclaturalReference();
345

    
346
	    //problem start
347
	    int start = nameToBeFilled.getProblemStarts();
348
	    int nameLength = name.length();
349
	    int nameAndSeparatorLength = nameAndSeparator.length();
350
	    int fullRefLength = nameToBeFilled.getFullTitleCache().length();
351

    
352
	    if (nameToBeFilled.isProtectedTitleCache() || nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)){
353
	    	start = Math.max(0, start);
354
		}else{
355
			if (ref != null && ref.getParsingProblem()!=0){
356
				start = Math.max(nameAndSeparatorLength, start);
357
		    	//TODO search within ref
358
			}
359
		}
360

    
361
	    //end
362
	    int end = nameToBeFilled.getProblemEnds();
363

    
364
	    if (ref != null && ref.getParsingProblem()!=0){
365
	    	end = Math.min(nameAndSeparatorLength + ref.getProblemEnds(), end);
366
	    }else{
367
	    	if (nameToBeFilled.isProtectedTitleCache() ){
368
	    		end = Math.min(end, nameAndSeparatorLength);
369
	    		//TODO search within name
370
			}
371
	    }
372
	    nameToBeFilled.setProblemStarts(start);
373
	    nameToBeFilled.setProblemEnds(end);
374

    
375
	    //delegate has problem to name
376
	    if (ref != null && ref.getParsingProblem()!=0){
377
	    	nameToBeFilled.addParsingProblems(ref.getParsingProblem());
378
	    }
379

    
380
	    Reference nomRef;
381
		if ( (nomRef = (Reference)nameToBeFilled.getNomenclaturalReference()) != null ){
382
			nomRef.setAuthorship(nameToBeFilled.getCombinationAuthorship());
383
		}
384
	}
385

    
386
	//TODO make it an Array of status
387
	/**
388
	 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonName}.
389
	 * The nomenclatural status part ist deleted from the reference String.
390
	 * @return  String the new (shortend) reference String
391
	 */
392
	public String parseNomStatus(String fullString, INonViralName nameToBeFilled, boolean makeEmpty) {
393
		Set<NomenclaturalStatusType> existingStatusTypeSet = new HashSet<NomenclaturalStatusType>();
394
		Set<NomenclaturalStatusType> newStatusTypeSet = new HashSet<NomenclaturalStatusType>();
395
		for (NomenclaturalStatus existingStatus : nameToBeFilled.getStatus()){
396
			existingStatusTypeSet.add(existingStatus.getType());
397
		}
398

    
399
		String statusString;
400
		Pattern hasStatusPattern = Pattern.compile("(" + pNomStatusPhrase + ")");
401
		Matcher hasStatusMatcher = hasStatusPattern.matcher(fullString);
402

    
403
		if (hasStatusMatcher.find()) {
404
			String statusPhrase = hasStatusMatcher.group(0);
405

    
406
			Pattern statusPattern = Pattern.compile(pNomStatus);
407
			Matcher statusMatcher = statusPattern.matcher(statusPhrase);
408
			statusMatcher.find();
409
			statusString = statusMatcher.group(0);
410
			try {
411
			    TaxonName nameToBeFilledCasted =  TaxonName.castAndDeproxy(nameToBeFilled);
412
				NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(statusString, nameToBeFilledCasted);
413
				if (! existingStatusTypeSet.contains(nomStatusType)){
414
					NomenclaturalStatus nomStatus = NomenclaturalStatus.NewInstance(nomStatusType);
415
					nameToBeFilled.addStatus(nomStatus);
416
				}
417
				newStatusTypeSet.add(nomStatusType);
418
				fullString = fullString.replace(statusPhrase, "");
419
			} catch (UnknownCdmTypeException e) {
420
				//Do nothing
421
			}
422
		}
423
		//remove not existing nom status
424
		if (makeEmpty){
425
			Set<NomenclaturalStatus> tmpStatus = new HashSet<NomenclaturalStatus>();
426
			tmpStatus.addAll(nameToBeFilled.getStatus());
427
			for (NomenclaturalStatus status : tmpStatus){
428
				if (! newStatusTypeSet.contains(status.getType())){
429
					nameToBeFilled.removeStatus(status);
430
				}
431
			}
432
		}
433

    
434
		return fullString;
435
	}
436

    
437

    
438
	private void parseReference(INonViralName nameToBeFilled, String strReference, boolean isInReference){
439

    
440
		INomenclaturalReference ref;
441
		String originalStrReference = strReference;
442

    
443
		//End (just delete end (e.g. '.', may be ambigous for yearPhrase, but no real information gets lost
444
		Matcher endMatcher = getMatcher(referenceEnd + end, strReference);
445
		if (endMatcher.find()){
446
			String endPart = endMatcher.group(0);
447
			strReference = strReference.substring(0, strReference.length() - endPart.length());
448
		}
449

    
450
//		String pDetailYear = ".*" + detailSeparator + detail + fWs + yearSeperator + fWs + yearPhrase + fWs + end;
451
//		Matcher detailYearMatcher = getMatcher(pDetailYear, strReference);
452

    
453
		String strReferenceWithYear = strReference;
454
		//year
455
		String yearPart = null;
456
		String pYearPhrase = yearSeperator + fWs + yearPhrase + fWs + end;
457
		Matcher yearPhraseMatcher = getMatcher(pYearPhrase, strReference);
458
		if (yearPhraseMatcher.find()){
459
			yearPart = yearPhraseMatcher.group(0);
460
			strReference = strReference.substring(0, strReference.length() - yearPart.length());
461
			yearPart = yearPart.replaceFirst(pStart + yearSeperator, "").trim();
462
		}else{
463
			if (nameToBeFilled.isZoological()){
464
				IZoologicalName zooName = (IZoologicalName)CdmBase.deproxy(nameToBeFilled);
465
				yearPart = String.valueOf(zooName.getPublicationYear());
466
				//continue
467
			}else{
468
				ref = makeDetailYearUnparsable(nameToBeFilled,strReference);
469
				ref.setDatePublished(TimePeriodParser.parseString(yearPart));
470
				return;
471
			}
472
		}
473

    
474

    
475
		//detail
476
		String pDetailPhrase = detailSeparator + fWs + detail + fWs + end;
477
		Matcher detailPhraseMatcher = getMatcher(pDetailPhrase, strReference);
478
		if (detailPhraseMatcher.find()){
479
			String detailPart = detailPhraseMatcher.group(0);
480
			strReference = strReference.substring(0, strReference.length() - detailPart.length());
481
			detailPart = detailPart.replaceFirst(pStart + detailSeparator, "").trim();
482
			nameToBeFilled.setNomenclaturalMicroReference(detailPart);
483
		}else{
484
			makeDetailYearUnparsable(nameToBeFilled, strReferenceWithYear);
485
			return;
486
		}
487
		//parse title and author
488
		ref = parseReferenceTitle(strReference, yearPart, isInReference);
489
		if (ref.hasProblem()){
490
		    //we need to protect both caches otherwise the titleCache is incorrectly build from atomized parts
491
			ref.setTitleCache( (isInReference ? "in ":"") +  originalStrReference, true);
492
			ref.setAbbrevTitleCache( (isInReference ? "in ":"") +  originalStrReference, true);
493
		}
494
		nameToBeFilled.setNomenclaturalReference(ref);
495
		int end = Math.min(strReference.length(), ref.getProblemEnds());
496
		ref.setProblemEnds(end);
497
	}
498

    
499
	/**
500
	 * @param nameToBeFilled
501
	 * @param strReference
502
	 * @return
503
	 */
504
	private Reference makeDetailYearUnparsable(INonViralName nameToBeFilled, String strReference) {
505
		Reference ref;
506

    
507
		ref = ReferenceFactory.newGeneric();
508
		ref.setTitleCache(strReference, true);
509
        ref.setAbbrevTitleCache(strReference, true);
510
		ref.setProblemEnds(strReference.length());
511
		ref.addParsingProblem(ParserProblem.CheckDetailOrYear);
512
		nameToBeFilled.addParsingProblem(ParserProblem.CheckDetailOrYear);
513
		nameToBeFilled.setNomenclaturalReference(ref);
514
		return ref;
515
	}
516

    
517
	/**
518
	 * Parses the referenceTitlePart, including the author volume and edition.
519
	 * @param reference
520
	 * @param year
521
	 * @return
522
	 */
523
	public INomenclaturalReference parseReferenceTitle(String strReference, String year, boolean isInReference){
524
		IBook result = null;
525

    
526
		Matcher refSineDetailMatcher = referenceSineDetailPattern.matcher(strReference);
527
		if (! refSineDetailMatcher.matches()){
528
			//TODO ?
529
		}
530

    
531
		Matcher articleMatcher = getMatcher(pArticleReference, strReference);
532
		Matcher bookMatcher = getMatcher(pBookReference, strReference);
533

    
534
		Matcher softArticleMatcher = getMatcher(pSoftArticleReference, strReference);
535
		Matcher bookSectionMatcher = getMatcher(pBookSectionReference, strReference);
536

    
537

    
538
		if(isInReference == false){
539
			if (bookMatcher.matches() ){
540
				result = parseBook(strReference);
541
			}else{
542
				logger.warn("Non-InRef must be book but does not match book: "+ strReference);
543
				result = ReferenceFactory.newBook();
544
				makeUnparsableRefTitle(result, strReference);
545
			}
546
		}else{  //inRef
547
			if (articleMatcher.matches()){
548
				//article without separators like ","
549
				result = parseArticle(strReference);
550
			}else if (softArticleMatcher.matches()){
551
				result = parseArticle(strReference);
552
			}else if (bookSectionMatcher.matches()){
553
				result = parseBookSection(strReference);
554
			}else{
555
				result =  ReferenceFactory.newGeneric();
556
				makeUnparsableRefTitle(result, "in " + strReference);
557
			}
558
		}
559
		//make year
560
		if (makeYear(result, year) == false){
561
			//TODO
562
			logger.warn("Year could not be parsed");
563
		}
564
		result.setProblemStarts(0);
565
		result.setProblemEnds(strReference.length());
566
		return result;
567
	}
568

    
569
	private void makeUnparsableRefTitle(INomenclaturalReference result, String reference){
570
	    //need to set both to protected otherwise titleCache is created from atomized parts
571
	    result.setTitleCache(reference, true);
572
		result.setAbbrevTitleCache(reference, true);
573
		result.addParsingProblem(ParserProblem.UnparsableReferenceTitle);
574
	}
575

    
576
	/**
577
	 * Parses a single date string. If the string is not parsable a StringNotParsableException is thrown
578
	 * @param singleDateString
579
	 * @return
580
	 * @throws StringNotParsableException
581
	 */
582
	private static Partial parseSingleDate(String singleDateString)
583
			throws StringNotParsableException{
584
		Partial dt = new Partial();
585
		if (CdmUtils.isNumeric(singleDateString)){
586
			try {
587
				Integer year = Integer.valueOf(singleDateString.trim());
588
				if (year > 1750 && year < 2050){
589
					dt = dt.with(DateTimeFieldType.year(), year);
590
				}else{
591
					dt = null;
592
				}
593
			} catch (NumberFormatException e) {
594
				logger.debug("Not a Integer format in getCalendar()");
595
				throw new StringNotParsableException(singleDateString + "is not parsable as a single Date");
596
			}
597
		}
598
		return dt;
599
	}
600

    
601

    
602
	/**
603
	 * Parses the publication date part.
604
	 * @param nomRef
605
	 * @param year
606
	 * @return If the string is not parsable <code>false</code>
607
	 * is returned. <code>True</code> otherwise
608
	 */
609
	private boolean makeYear(INomenclaturalReference nomRef, String year){
610
		boolean result = true;
611
		if (year == null){
612
			return false;
613
		}
614
		if ("".equals(year.trim())){
615
			return true;
616
		}
617
		TimePeriod datePublished = TimePeriodParser.parseString(year);
618

    
619
		if (nomRef.getType().equals(ReferenceType.BookSection)){
620
			handleBookSectionYear((IBookSection)nomRef, datePublished);
621
		}else if (nomRef instanceof Reference){
622
			((Reference)nomRef).setDatePublished(datePublished);
623
		}else{
624
			throw new ClassCastException("nom Ref is not of type Reference but " + (nomRef == null? "(null)" : nomRef.getClass()));
625
		}
626
		return result;
627
	}
628

    
629
	private String makeVolume(IVolumeReference nomRef, String strReference){
630
		//volume
631
		String volPart = null;
632
		String pVolPhrase = volumeSeparator +  volume + end;
633
		Matcher volPhraseMatcher = getMatcher(pVolPhrase, strReference);
634
		if (volPhraseMatcher.find()){
635
			volPart = volPhraseMatcher.group(0);
636
			strReference = strReference.substring(0, strReference.length() - volPart.length());
637
			volPart = volPart.replaceFirst(pStart + volumeSeparator, "").trim();
638
			nomRef.setVolume(volPart);
639
		}
640
		return strReference;
641
	}
642

    
643
	private String makeEdition(IBook book, String strReference){
644
		//volume
645
		String editionPart = null;
646
		Matcher editionPhraseMatcher = getMatcher(pEditionPart, strReference);
647

    
648
		Matcher editionVolumeMatcher = getMatcher(pEditionVolPart, strReference);
649
		boolean isEditionAndVol = editionVolumeMatcher.find();
650

    
651
		if (editionPhraseMatcher.find()){
652
			editionPart = editionPhraseMatcher.group(0);
653
			int pos = strReference.indexOf(editionPart);
654
			int posEnd = pos + editionPart.length();
655
			if (isEditionAndVol){
656
				posEnd++;  //delete also comma
657
			}
658
			strReference = strReference.substring(0, pos) + strReference.substring(posEnd);
659
			editionPart = editionPart.replaceFirst(pStart + editionSeparator, "").trim();
660
			book.setEdition(editionPart);
661
		}
662
		return strReference;
663
	}
664

    
665
	private IBook parseBook(String reference){
666
		IBook result = ReferenceFactory.newBook();
667
		reference = makeEdition(result, reference);
668
		reference = makeVolume(result, reference);
669
		result.setAbbrevTitle(reference);
670
		return result;
671
	}
672

    
673

    
674
	private Reference parseArticle(String reference){
675
		//if (articlePatter)
676
		//(type, author, title, volume, editor, series;
677
		Reference result = ReferenceFactory.newArticle();
678
		reference = makeVolume(result, reference);
679
		Reference inJournal = ReferenceFactory.newJournal();
680
		inJournal.setAbbrevTitle(reference);
681
		result.setInReference(inJournal);
682
		return result;
683
	}
684

    
685
	private Reference parseBookSection(String reference){
686
		Reference result = ReferenceFactory.newBookSection();
687

    
688
		Pattern authorPattern = Pattern.compile("^" + authorTeam + referenceAuthorSeparator);
689
		Matcher authorMatcher = authorPattern.matcher(reference);
690
		boolean find = authorMatcher.find();
691
		if (find){
692
			String authorString = authorMatcher.group(0).trim();
693
			String bookString = reference.substring(authorString.length()).trim();
694
			authorString = authorString.substring(0, authorString.length() -1);
695

    
696
			TeamOrPersonBase<?> authorTeam = author(authorString);
697
			IBook inBook = parseBook(bookString);
698
			inBook.setAuthorship(authorTeam);
699
			result.setInBook(inBook);
700
		}else{
701
			logger.warn("Unexpected non matching book section author part");
702
			//TODO do we want to record a 'problem' here?
703
			result.setTitleCache(reference, true);
704
			result.setAbbrevTitleCache(reference, true);
705
		}
706

    
707
		return result;
708
	}
709

    
710
	/**
711
	 * If the publication date of a book section and it's inBook do differ this is usually
712
	 * caused by the fact that a book has been published during a period, because originally
713
	 * it consisted of several parts that only later where put together to one book.
714
	 * If so, the book section's publication date may be a point in time (year or month of year)
715
	 * whereas the books publication date may be a period of several years.
716
	 * Therefore a valid nomenclatural reference string should use the book sections
717
	 * publication date rather then the book's publication date.<BR>
718
	 * This method in general adds the publication date to the book section.
719
	 * An exception exists if the publication date is a period. Then the parser
720
	 * assumes that the nomenclatural reference string does not follow the above rule but
721
	 * the books publication date is set.
722
	 * @param bookSection
723
	 * @param datePublished
724
	 */
725
	private void handleBookSectionYear(IBookSection bookSection, TimePeriod datePublished){
726
		if (datePublished == null || datePublished.getStart() == null || bookSection == null){
727
			return;
728
		}
729
		if (datePublished.isPeriod() && bookSection.getInBook() != null){
730
			bookSection.getInBook().setDatePublished(datePublished);
731
		}else{
732
			bookSection.setDatePublished(datePublished);
733
		}
734
	}
735

    
736
	@Override
737
    public INonViralName parseFullName(String fullNameString){
738
		return parseFullName(fullNameString, null, null);
739
	}
740

    
741
	@Override
742
    public INonViralName parseFullName(String fullNameString, NomenclaturalCode nomCode, Rank rank) {
743

    
744
		if (fullNameString == null){
745
			return null;
746
		}else{
747
			INonViralName result = getNonViralNameInstance(fullNameString, nomCode, rank);
748
			parseFullName(result, fullNameString, rank, false);
749
			return result;
750
		}
751
	}
752

    
753
	@Override
754
	public void parseFullName(INonViralName nameToBeFilledOrig, String fullNameStringOrig, Rank rank, boolean makeEmpty) {
755
	    INonViralName nameToBeFilled = nameToBeFilledOrig;
756

    
757
	    //TODO prol. etc.
758
		boolean hasCheckRankProblem = false; //was rank guessed in a previous parsing process?
759
		if (nameToBeFilled == null){
760
			throw new IllegalArgumentException("NameToBeFilled must not be null in name parser");
761
		}else{
762
			hasCheckRankProblem = nameToBeFilled.hasProblem(ParserProblem.CheckRank);
763
			nameToBeFilled.removeParsingProblem(ParserProblem.CheckRank);
764
		}
765
		String authorString = null;
766
		if (fullNameStringOrig == null){
767
			return;
768
		}
769
		if (makeEmpty){
770
			makeEmpty(nameToBeFilled);
771
		}
772

    
773
		String fullNameString = fullNameStringOrig.replaceAll(oWs , " ").trim();
774

    
775
		fullNameString = removeHybridBlanks(fullNameString);
776
		fullNameString = removeSpNovBlanks(fullNameString);
777
		String[] epi = pattern.split(fullNameString);
778
		try {
779
	    	//cultivars //TODO 2 implement cultivars
780
//		    if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
781
//		    	result = parseCultivar(fullName);
782
//		    }
783

    
784
		    if (genusOrSupraGenusPattern.matcher(fullNameString).matches()){
785
		    	//supraGeneric
786
				if (rank != null && ! hasCheckRankProblem  && (rank.isSupraGeneric()|| rank.isGenus())){
787
					nameToBeFilled.setRank(rank);
788
					nameToBeFilled.setGenusOrUninomial(epi[0]);
789
				}
790
				//genus or guess rank
791
				else {
792
					rank = guessUninomialRank(nameToBeFilled, epi[0]);
793
					nameToBeFilled.setRank(rank);
794
					nameToBeFilled.setGenusOrUninomial(epi[0]);
795
					nameToBeFilled.addParsingProblem(ParserProblem.CheckRank);
796
					nameToBeFilled.setProblemStarts(0);
797
					nameToBeFilled.setProblemEnds(epi[0].length());
798
				}
799
				authorString = fullNameString.substring(epi[0].length());
800
			}
801
			 //infra genus
802
			 else if (infraGenusPattern.matcher(fullNameString).matches()){
803
				Rank infraGenericRank;
804
				if ("[unranked]".equals(epi[1])){
805
					infraGenericRank = Rank.INFRAGENERICTAXON();
806
				}else{
807
				    String infraGenericRankMarker = epi[1];
808
				    if (infraGenericRankMarker.startsWith(notho)){  //#3868
809
                        nameToBeFilled.setBinomHybrid(true);
810
                        infraGenericRankMarker = infraGenericRankMarker.substring(notho.length());
811
                    }else if(infraGenericRankMarker.startsWith("n")){
812
                        nameToBeFilled.setBinomHybrid(true);
813
                        infraGenericRankMarker = infraGenericRankMarker.substring(1);
814
                    }
815
                    infraGenericRank = Rank.getRankByIdInVoc(infraGenericRankMarker, nameToBeFilledOrig.getNomenclaturalCode());
816
				}
817
				nameToBeFilled.setRank(infraGenericRank);
818
				nameToBeFilled.setGenusOrUninomial(epi[0]);
819
				nameToBeFilled.setInfraGenericEpithet(epi[2]);
820
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length()+ 1 + epi[2].length());
821
			}
822
			 //aggr. or group
823
			 else if (aggrOrGroupPattern.matcher(fullNameString).matches()){
824
				nameToBeFilled.setRank(Rank.getRankByIdInVoc(epi[2]));
825
				nameToBeFilled.setGenusOrUninomial(epi[0]);
826
				nameToBeFilled.setSpecificEpithet(epi[1]);
827
			}
828
		     //species
829
			 else if (speciesPattern.matcher(fullNameString).matches()){
830
				nameToBeFilled.setRank(Rank.SPECIES());
831
				nameToBeFilled.setGenusOrUninomial(epi[0]);
832
				nameToBeFilled.setSpecificEpithet(normalizeSpNov(epi[1]));
833
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length());
834
			}
835
		    //species with infra generic epithet
836
			 else if (speciesWithInfraGenPattern.matcher(fullNameString).matches()){
837
			     nameToBeFilled.setRank(Rank.SPECIES());
838
	             nameToBeFilled.setGenusOrUninomial(epi[0]);
839
                 nameToBeFilled.setInfraGenericEpithet(epi[2]);
840
	             nameToBeFilled.setSpecificEpithet(epi[4]);
841
	             authorString = fullNameString.substring(epi[0].length() + 2 + epi[2].length() + 2 + epi[4].length());
842
			 }
843
			 //autonym
844
			 else if (autonymPattern.matcher(fullNameString.replace(UTF8.HYBRID.toString(), "")).matches()){
845
			    String infraSpecRankMarker = epi[epi.length - 2];
846
			    boolean isTriHybrid = false;
847
			    if (infraSpecRankMarker.startsWith(notho)){  //#3868
848
                    nameToBeFilled.setTrinomHybrid(true);
849
                    infraSpecRankMarker = infraSpecRankMarker.substring(notho.length());
850
                    isTriHybrid = true;
851
                }else if(infraSpecRankMarker.startsWith("n")){
852
                    nameToBeFilled.setTrinomHybrid(true);
853
                    infraSpecRankMarker = infraSpecRankMarker.substring(1);
854
                    isTriHybrid = true;
855
                }
856
			    if (epi[1].startsWith(UTF8.HYBRID.toString())) {
857
			        nameToBeFilled.setBinomHybrid(true);
858
                }
859
			    nameToBeFilled.setRank(Rank.getRankByIdInVoc(infraSpecRankMarker));
860
				nameToBeFilled.setGenusOrUninomial(epi[0]);
861
				nameToBeFilled.setSpecificEpithet(epi[1]);
862
				nameToBeFilled.setInfraSpecificEpithet(epi[epi.length - 1]);
863
				int lenSpecies = 2 + epi[0].length()+epi[1].length();
864
				int lenInfraSpecies =  2 + epi[epi.length - 2].length() + epi[epi.length - 1].length();
865
				authorString = fullNameString.substring(lenSpecies, fullNameString.length() - lenInfraSpecies);
866
			}
867
			 //infraSpecies
868
			 else if (infraSpeciesPattern.matcher(fullNameString).matches()){
869
				String infraSpecRankMarker = epi[2];
870
				String infraSpecEpi = epi[3];
871
				if ("tax.".equals(infraSpecRankMarker)){
872
					infraSpecRankMarker += " " +  epi[3];
873
					infraSpecEpi = epi[4];
874
				}
875
				Rank infraSpecificRank;
876
				if ("[unranked]".equals(infraSpecRankMarker)){
877
					infraSpecificRank = Rank.INFRASPECIFICTAXON();
878
				}else{
879
					String localInfraSpecRankMarker;
880
					if (infraSpecRankMarker.startsWith(notho)){  //#3868
881
	                    nameToBeFilled.setTrinomHybrid(true);
882
	                    localInfraSpecRankMarker = infraSpecRankMarker.substring(notho.length());
883
					}else if(infraSpecRankMarker.startsWith("n")){
884
	                    nameToBeFilled.setTrinomHybrid(true);
885
	                    localInfraSpecRankMarker = infraSpecRankMarker.substring(1);
886
                    }else{
887
                        localInfraSpecRankMarker = infraSpecRankMarker;
888
                    }
889
				    infraSpecificRank = Rank.getRankByIdInVoc(localInfraSpecRankMarker);
890
				}
891
				nameToBeFilled.setRank(infraSpecificRank);
892
				nameToBeFilled.setGenusOrUninomial(epi[0]);
893
				nameToBeFilled.setSpecificEpithet(epi[1]);
894
				nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
895
				authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecRankMarker.length() + 1 + infraSpecEpi.length());
896

    
897
			 }
898
		      //infraSpecies without marker
899
			 else if (zooInfraSpeciesPattern.matcher(fullNameString).matches()){
900
					String infraSpecEpi = epi[2];
901
					Rank infraSpecificRank = Rank.SUBSPECIES();
902
					nameToBeFilled.setRank(infraSpecificRank);
903
					nameToBeFilled.setGenusOrUninomial(epi[0]);
904
					nameToBeFilled.setSpecificEpithet(epi[1]);
905
					nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
906
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecEpi.length());
907

    
908
			 }//old infraSpecies
909
			 else if (oldInfraSpeciesPattern.matcher(fullNameString).matches()){
910
				boolean implemented = false;
911
				if (implemented){
912
					nameToBeFilled.setRank(Rank.getRankByNameOrIdInVoc(epi[2]));
913
					nameToBeFilled.setGenusOrUninomial(epi[0]);
914
					nameToBeFilled.setSpecificEpithet(epi[1]);
915
					//TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
916
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + epi[2].length() + 1 + epi[3].length());
917
				}else{
918
					nameToBeFilled.addParsingProblem(ParserProblem.OldInfraSpeciesNotSupported);
919
					nameToBeFilled.setTitleCache(fullNameString, true);
920
					// FIXME Quick fix, otherwise search would not deilver results for unparsable names
921
					nameToBeFilled.setNameCache(fullNameString,true);
922
					// END
923
					logger.info("Name string " + fullNameString + " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
924
				}
925
			}
926
		     //hybrid formula
927
			 else if (hybridFormulaPattern.matcher(fullNameString).matches()){
928
				 Set<HybridRelationship> existingRelations = new HashSet<>();
929
				 Set<HybridRelationship> notToBeDeleted = new HashSet<>();
930

    
931
				 for ( HybridRelationship rel : nameToBeFilled.getHybridChildRelations()){
932
				     existingRelations.add(rel);
933
				 }
934

    
935
			     String firstNameString = "";
936
				 String secondNameString = "";
937
				 boolean isFirstName = true;
938
				 for (String str : epi){
939
					 if (str.matches(hybridSign)){
940
						 isFirstName = false;
941
					 }else if(isFirstName){
942
						 firstNameString += " " + str;
943
					 }else {
944
						 secondNameString += " " + str;
945
					 }
946
				 }
947
				 firstNameString = firstNameString.trim();
948
				 secondNameString = secondNameString.trim();
949
				 nameToBeFilled.setHybridFormula(true);
950
				 NomenclaturalCode code = nameToBeFilled.getNomenclaturalCode();
951
				 INonViralName firstName = this.parseFullName(firstNameString, code, rank);
952
				 if (secondNameString.matches(abbrevHybridSecondPart)){
953
				     secondNameString = extendSecondHybridPart(firstName, secondNameString);
954
				 }
955
				 INonViralName secondName = this.parseFullName(secondNameString, code, rank);
956
				 HybridRelationship firstRel = nameToBeFilled.addHybridParent(firstName, HybridRelationshipType.FIRST_PARENT(), null);
957
				 HybridRelationship second = nameToBeFilled.addHybridParent(secondName, HybridRelationshipType.SECOND_PARENT(), null);
958
				 checkRelationExist(firstRel, existingRelations, notToBeDeleted);
959
				 checkRelationExist(second, existingRelations, notToBeDeleted);
960

    
961
				 Rank newRank;
962
				 Rank firstRank = firstName.getRank();
963
				 Rank secondRank = secondName.getRank();
964

    
965
				 if (firstRank == null || (secondRank != null && firstRank.isHigher(secondRank))){
966
					 newRank = secondRank;
967
				 }else{
968
					 newRank = firstRank;
969
				 }
970
				 nameToBeFilled.setRank(newRank);
971
				 //remove not existing hybrid relation
972
				 if (makeEmpty){
973
		            Set<HybridRelationship> tmpChildRels = new HashSet<HybridRelationship>();
974
		            tmpChildRels.addAll(nameToBeFilled.getHybridChildRelations());
975
		            for (HybridRelationship rel : tmpChildRels){
976
		                if (! notToBeDeleted.contains(rel)){
977
		                    nameToBeFilled.removeHybridRelationship(rel);
978
		                }
979
		            }
980
				 }
981
			 }
982
		    //none
983
			else{
984
				nameToBeFilled.addParsingProblem(ParserProblem.UnparsableNamePart);
985
				nameToBeFilled.setTitleCache(fullNameString, true);
986
				// FIXME Quick fix, otherwise search would not deilver results for unparsable names
987
				nameToBeFilled.setNameCache(fullNameString, true);
988
				// END
989
				logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
990
		    }
991
		    //hybrid bits
992
		    handleHybridBits(nameToBeFilled);
993
		    if (!nameToBeFilled.isHybridFormula()){
994
		        Set<HybridRelationship> hybridChildRelations = new HashSet<>();
995
		        hybridChildRelations.addAll(nameToBeFilled.getHybridChildRelations());
996

    
997
		        for (HybridRelationship hybridRelationship: hybridChildRelations){
998
		        	nameToBeFilled.removeHybridRelationship(hybridRelationship);
999
		        }
1000
		    }
1001

    
1002
			//authors
1003
		    if (StringUtils.isNotBlank(authorString) ){
1004
				handleAuthors(nameToBeFilled, fullNameString, authorString);
1005
			}
1006
		    return;
1007
		} catch (UnknownCdmTypeException e) {
1008
			nameToBeFilled.addParsingProblem(ParserProblem.RankNotSupported);
1009
			nameToBeFilled.setTitleCache(fullNameString, true);
1010
			// FIXME Quick fix, otherwise search would not deilver results for unparsable names
1011
			nameToBeFilled.setNameCache(fullNameString,true);
1012
			// END
1013
			logger.info("unknown rank (" + (rank == null? "null":rank) + ") or abbreviation in string " +  fullNameString);
1014
			//return result;
1015
			return;
1016
		}
1017
	}
1018

    
1019
	/**
1020
     * @param string
1021
     * @return
1022
     */
1023
    private String normalizeSpNov(String epi) {
1024
        if (spNovPattern.matcher(epi).matches()){
1025
            epi = epi.replace(".", ". ").replace("\\s+", " ").trim();
1026
        }
1027
        return epi;
1028
    }
1029

    
1030
    /**
1031
     * @param firstName
1032
     * @param secondNameString
1033
     * @return
1034
     */
1035
    private String extendSecondHybridPart(INonViralName firstName, String secondNameString) {
1036
        //first letter of genus given
1037
        if (secondNameString.matches("^" + abbrevHybridGenus + ".*")){
1038
            if (StringUtils.isNotBlank(firstName.getGenusOrUninomial())){
1039
                if (secondNameString.substring(0,1).equals(firstName.getGenusOrUninomial().substring(0, 1))){
1040
                    secondNameString = secondNameString.replaceAll("^" + abbrevHybridGenus, firstName.getGenusOrUninomial() + " ");
1041
                }
1042
            }
1043
        }else if (secondNameString.matches(abbrevHybridSecondPartOnlyInfraSpecies)){
1044
            secondNameString = CdmUtils.concat(" " , firstName.getGenusOrUninomial(), firstName.getSpecificEpithet(), secondNameString);
1045
        }else if (true){  //there will be further alternatives in future maybe
1046
            secondNameString = CdmUtils.concat(" " , firstName.getGenusOrUninomial(), secondNameString);
1047
        }
1048
        return secondNameString;
1049
    }
1050

    
1051
    /**
1052
     * Checks if a hybrid relation exists in the Set of existing relations
1053
     * and <BR>
1054
     *  if it does not adds it to relations not to be deleted <BR>
1055
     *  if it does adds the existing relations to the relations not to be deleted
1056
     *
1057
     * @param firstRel
1058
     * @param existingRelations
1059
     * @param notToBeDeleted
1060
     */
1061
    private void checkRelationExist(
1062
            HybridRelationship newRelation,
1063
            Set<HybridRelationship> existingRelations,
1064
            Set<HybridRelationship> notToBeDeleted) {
1065
        HybridRelationship relToKeep = newRelation;
1066
        for (HybridRelationship existingRelation : existingRelations){
1067
            if (existingRelation.equals(newRelation)){
1068
                relToKeep = existingRelation;
1069
                break;
1070
            }
1071
        }
1072
        notToBeDeleted.add(relToKeep);
1073
    }
1074

    
1075
    private void handleHybridBits(INonViralName nameToBeFilled) {
1076
		//uninomial
1077
		String uninomial = CdmUtils.Nz(nameToBeFilled.getGenusOrUninomial());
1078
		boolean isUninomialHybrid = uninomial.startsWith(hybridSign);
1079
		if (isUninomialHybrid){
1080
			nameToBeFilled.setMonomHybrid(true);
1081
			nameToBeFilled.setGenusOrUninomial(uninomial.replace(hybridSign, ""));
1082
		}
1083
		//infrageneric
1084
		String infrageneric = CdmUtils.Nz(nameToBeFilled.getInfraGenericEpithet());
1085
		boolean isInfraGenericHybrid = infrageneric.startsWith(hybridSign);
1086
		if (isInfraGenericHybrid){
1087
			nameToBeFilled.setBinomHybrid(true);
1088
			nameToBeFilled.setInfraGenericEpithet(infrageneric.replace(hybridSign, ""));
1089
		}
1090
		//species Epi
1091
		String speciesEpi = CdmUtils.Nz(nameToBeFilled.getSpecificEpithet());
1092
		boolean isSpeciesHybrid = speciesEpi.startsWith(hybridSign);
1093
		if (isSpeciesHybrid){
1094
			if (StringUtils.isBlank(infrageneric)){
1095
				nameToBeFilled.setBinomHybrid(true);
1096
			}else{
1097
				nameToBeFilled.setTrinomHybrid(true);
1098
			}
1099
			nameToBeFilled.setSpecificEpithet(speciesEpi.replace(hybridSign, ""));
1100
		}
1101
		//infra species
1102
		String infraSpeciesEpi = CdmUtils.Nz(nameToBeFilled.getInfraSpecificEpithet());
1103
		boolean isInfraSpeciesHybrid = infraSpeciesEpi.startsWith(hybridSign);
1104
		if (isInfraSpeciesHybrid){
1105
			nameToBeFilled.setTrinomHybrid(true);
1106
			nameToBeFilled.setInfraSpecificEpithet(infraSpeciesEpi.replace(hybridSign, ""));
1107
		}
1108

    
1109
	}
1110

    
1111
	private String removeHybridBlanks(String fullNameString) {
1112
		String result = fullNameString
1113
		        .replaceAll(oWs + "[xX]" + oWs + "(?=[A-Z])", " " + hybridSign + " ")
1114
		        .replaceAll(hybridFull, " " + hybridSign).trim();
1115
		if (result.contains(hybridSign + " ") &&
1116
		        result.matches("^" + capitalEpiWord + oWs + hybridSign + oWs + nonCapitalEpiWord + ".*")){
1117
		    result = result.replaceFirst(hybridSign + oWs, hybridSign);
1118
		}
1119
		return result;
1120
	}
1121

    
1122

    
1123
    private String removeSpNovBlanks(String fullNameString) {
1124
        Matcher spNovMatcher = spNovPattern.matcher(fullNameString);
1125
        if (spNovMatcher.find()){
1126
            String spNov = spNovMatcher.group(0);
1127
            String spNovShort = spNov.replaceAll("\\s", "");
1128
            if (spNov.length() != spNovShort.length()){
1129
                fullNameString = fullNameString.replace(spNov, spNovShort);
1130
            }
1131
        }
1132
        return fullNameString;
1133
    }
1134

    
1135

    
1136
	/**
1137
	 * Author parser for external use
1138
	 * @param nonViralName
1139
	 * @param authorString
1140
	 * @throws StringNotParsableException
1141
	 */
1142
	@Override
1143
	public void parseAuthors(INonViralName nonViralNameOrig, String authorString) throws StringNotParsableException{
1144
	    INonViralName nonViralName = CdmBase.deproxy(nonViralNameOrig);
1145
	    TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
1146
		Integer[] years = new Integer[4];
1147
		NomenclaturalCode code = nonViralName.getNameType();
1148
		fullAuthors(authorString, authors, years, code);
1149
		nonViralName.setCombinationAuthorship(authors[0]);
1150
		nonViralName.setExCombinationAuthorship(authors[1]);
1151
		nonViralName.setBasionymAuthorship(authors[2]);
1152
		nonViralName.setExBasionymAuthorship(authors[3]);
1153
		if (nonViralName.isZoological()){
1154
			IZoologicalName zooName = (IZoologicalName)nonViralName;
1155
			zooName.setPublicationYear(years[0]);
1156
			zooName.setOriginalPublicationYear(years[2]);
1157
		}
1158
	}
1159

    
1160
	/**
1161
	 * @param nameToBeFilled
1162
	 * @param fullNameString
1163
	 * @param authorString
1164
	 */
1165
	public void handleAuthors(INonViralName nameToBeFilled, String fullNameString, String authorString) {
1166
	    TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
1167
		Integer[] years = new Integer[4];
1168
		try {
1169
			NomenclaturalCode code = nameToBeFilled.getNameType();
1170
			fullAuthors(authorString, authors, years, code);
1171
		} catch (StringNotParsableException e) {
1172
			nameToBeFilled.addParsingProblem(ParserProblem.UnparsableAuthorPart);
1173
			nameToBeFilled.setTitleCache(fullNameString, true);
1174
			// FIXME Quick fix, otherwise search would not deliver results for unparsable names
1175
			nameToBeFilled.setNameCache(fullNameString, true);
1176
			// END
1177
			logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
1178
		}
1179
		nameToBeFilled.setCombinationAuthorship(authors[0]);
1180
		nameToBeFilled.setExCombinationAuthorship(authors[1]);
1181
		nameToBeFilled.setBasionymAuthorship(authors[2]);
1182
		nameToBeFilled.setExBasionymAuthorship(authors[3]);
1183
		if (nameToBeFilled.isZoological()){
1184
			IZoologicalName zooName = (IZoologicalName)nameToBeFilled;
1185
			zooName.setPublicationYear(years[0]);
1186
			zooName.setOriginalPublicationYear(years[2]);
1187
		}
1188
	}
1189

    
1190
	/**
1191
	 * Guesses the rank of uninomial depending on the typical endings for ranks
1192
	 * @param nameToBeFilled
1193
	 * @param string
1194
	 */
1195
	private Rank guessUninomialRank(INonViralName nameToBeFilled, String uninomial) {
1196
		Rank result = Rank.GENUS();
1197
		if (nameToBeFilled.isBotanical()){
1198
			if (false){
1199
				//
1200
			}else if (uninomial.endsWith("phyta") || uninomial.endsWith("mycota") ){  //plants, fungi
1201
				result = Rank.SECTION_BOTANY();
1202
			}else if (uninomial.endsWith("bionta")){
1203
				result = Rank.SUBKINGDOM();  //TODO
1204
			}else if (uninomial.endsWith("phytina")|| uninomial.endsWith("mycotina")  ){  //plants, fungi
1205
				result = Rank.SUBSECTION_BOTANY();
1206
			}else if (uninomial.endsWith("opsida") || uninomial.endsWith("phyceae") || uninomial.endsWith("mycetes")){  //plants, algae, fungi
1207
				result = Rank.CLASS();
1208
			}else if (uninomial.endsWith("idae") || uninomial.endsWith("phycidae") || uninomial.endsWith("mycetidae")){ //plants, algae, fungi
1209
				result = Rank.SUBCLASS();
1210
			}else if (uninomial.endsWith("ales")){
1211
				result = Rank.ORDER();
1212
			}else if (uninomial.endsWith("ineae")){
1213
				result = Rank.SUBORDER();
1214
			}else if (uninomial.endsWith("aceae")){
1215
					result = Rank.FAMILY();
1216
			}else if (uninomial.endsWith("oideae")){
1217
				result = Rank.SUBFAMILY();
1218
			}else if (uninomial.endsWith("eae")){
1219
				result = Rank.TRIBE();
1220
			}else if (uninomial.endsWith("inae")){
1221
				result = Rank.SUBTRIBE();
1222
			}else if (uninomial.endsWith("ota")){
1223
				result = Rank.KINGDOM();  //TODO
1224
			}
1225
		}else if (nameToBeFilled.isZoological()){
1226
			if (false){
1227
				//
1228
			}else if (uninomial.endsWith("oideae")){
1229
				result = Rank.SUPERFAMILY();
1230
			}else if (uninomial.endsWith("idae")){
1231
					result = Rank.FAMILY();
1232
			}else if (uninomial.endsWith("inae")){
1233
				result = Rank.SUBFAMILY();
1234
			}else if (uninomial.endsWith("inae")){
1235
				result = Rank.SUBFAMILY();
1236
			}else if (uninomial.endsWith("ini")){
1237
				result = Rank.TRIBE();
1238
			}else if (uninomial.endsWith("ina")){
1239
				result = Rank.SUBTRIBE();
1240
			}
1241
		}else{
1242
			//
1243
		}
1244
		return result;
1245
	}
1246

    
1247
	/**
1248
	 * Parses the fullAuthorString
1249
	 * @param fullAuthorString
1250
	 * @return array of Teams containing the Team[0],
1251
	 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
1252
	 */
1253
	protected void fullAuthors (String fullAuthorStringOrig, TeamOrPersonBase<?>[] authors,
1254
	        Integer[] years, NomenclaturalCode code)
1255
			throws StringNotParsableException{
1256
		if (fullAuthorStringOrig == null || code == null){
1257
			return;
1258
		}
1259
		String fullAuthorString = fullAuthorStringOrig.trim();
1260

    
1261
		//Botanic
1262
		if ( code.isBotanical() ){
1263
			if (! fullBotanicAuthorStringPattern.matcher(fullAuthorString).matches() ){
1264
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1265
			}
1266
		}
1267
		//Zoo
1268
		else if ( code.isZoological() ){
1269
			if (! fullZooAuthorStringPattern.matcher(fullAuthorString).matches() ){
1270
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1271
			}
1272
		}else {
1273
			//TODO
1274
			logger.warn ("Full author String parsable only for defined BotanicalNames or ZoologicalNames but this is " + code.getMessage());
1275
			throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1276
		}
1277
		fullAuthorsChecked(fullAuthorString, authors, years);
1278
	}
1279

    
1280
	/*
1281
	 * like fullTeams but without trim and match check
1282
	 */
1283
	protected void fullAuthorsChecked (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years){
1284
		int authorShipStart = 0;
1285
		Matcher basionymMatcher = basionymPattern.matcher(fullAuthorString);
1286

    
1287
		if (basionymMatcher.find(0)){
1288

    
1289
			String basString = basionymMatcher.group();
1290
			basString = basString.replaceFirst(basStart, "");
1291
			basString = basString.replaceAll(basEnd, "").trim();
1292
			authorShipStart = basionymMatcher.end(1);
1293

    
1294
			TeamOrPersonBase<?>[] basAuthors = new TeamOrPersonBase[2];
1295
			Integer[] basYears = new Integer[2];
1296
			authorsAndEx(basString, basAuthors, basYears);
1297
			authors[2]= basAuthors[0];
1298
			years[2] = basYears[0];
1299
			authors[3]= basAuthors[1];
1300
			years[3] = basYears[1];
1301
		}
1302
		if (fullAuthorString.length() >= authorShipStart){
1303
			TeamOrPersonBase<?>[] combinationAuthors = new TeamOrPersonBase[2];
1304
			Integer[] combinationYears = new Integer[2];
1305
			authorsAndEx(fullAuthorString.substring(authorShipStart), combinationAuthors, combinationYears);
1306
			authors[0]= combinationAuthors[0] ;
1307
			years[0] = combinationYears[0];
1308
			authors[1]= combinationAuthors[1];
1309
			years[1] = combinationYears[1];
1310
		}
1311
	}
1312

    
1313

    
1314
	/**
1315
	 * Parses the author and ex-author String
1316
	 * @param authorShipStringOrig String representing the author and the ex-author team
1317
	 * @return array of Teams containing the Team[0] and the ExTeam[1]
1318
	 */
1319
	protected void authorsAndEx (String authorShipStringOrig, TeamOrPersonBase<?>[] authors, Integer[] years){
1320
		//TODO noch allgemeiner am Anfang durch Replace etc.
1321
		String authorShipString = authorShipStringOrig.trim();
1322
		authorShipString = authorShipString.replaceFirst(oWs + "ex" + oWs, " ex. " );
1323

    
1324
		//int authorEnd = authorTeamString.length();
1325
		int authorBegin = 0;
1326

    
1327
		Matcher exAuthorMatcher = exAuthorPattern.matcher(authorShipString);
1328
		if (exAuthorMatcher.find(0)){
1329
			authorBegin = exAuthorMatcher.end(0);
1330
			int exAuthorEnd = exAuthorMatcher.start(0);
1331
			String exString = authorShipString.substring(0, exAuthorEnd).trim();
1332
			authors [1] = author(exString);
1333
		}
1334
		zooOrBotanicAuthor(authorShipString.substring(authorBegin), authors, years );
1335
	}
1336

    
1337
	/**
1338
	 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
1339
	 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies
1340
	 * to the year in case of an zoological name.
1341
	 * @param authorString
1342
	 * @param team
1343
	 * @param year
1344
	 */
1345
	protected void zooOrBotanicAuthor(String authorString, TeamOrPersonBase<?>[] team, Integer[] year){
1346
		if (authorString == null){
1347
			return;
1348
		}else if ((authorString = authorString.trim()).length() == 0){
1349
			return;
1350
		}
1351
		Matcher zooAuthorAddidtionMatcher = zooAuthorAddidtionPattern.matcher(authorString);
1352
		if (zooAuthorAddidtionMatcher.find()){
1353
			int index = zooAuthorAddidtionMatcher.start(0);
1354
			String strYear = authorString.substring(index);
1355
			strYear = strYear.replaceAll(zooAuthorYearSeperator, "").trim();
1356
			year[0] = Integer.valueOf(strYear);
1357
			authorString = authorString.substring(0, index).trim();
1358
		}
1359
		team[0] = author(authorString);
1360
	}
1361

    
1362

    
1363
	/**
1364
	 * Parses an authorTeam String and returns the team.
1365
	 * !!! TODO (atomization not yet implemented)
1366
	 * @param authorString String representing the author
1367
	 * @return an Team
1368
	 */
1369
	public TeamOrPersonBase<?> author (String authorString){
1370
		if (authorString == null){
1371
			return null;
1372
		}else if ((authorString = authorString.trim()).length() == 0){
1373
			return null;
1374
		}else if (! finalTeamSplitterPattern.matcher(authorString).find() && ! authorIsAlwaysTeam){
1375
			//1 Person
1376
			Person result = Person.NewInstance();
1377
			result.setNomenclaturalTitle(authorString);
1378
			return result;
1379
		}else{
1380
			return parsedTeam(authorString);
1381
		}
1382

    
1383
	}
1384

    
1385
	/**
1386
	 * Parses an authorString (reprsenting a team into the single authors and add
1387
	 * them to the return Team.
1388
	 * @param authorString
1389
	 * @return Team
1390
	 */
1391
	protected Team parsedTeam(String authorString){
1392
		Team result = Team.NewInstance();
1393
		String[] authors = authorString.split(notFinalTeamSplitter);
1394
		for (int i = 0; i < authors.length; i++){
1395
		    String author = authors[i];
1396
		    if ("al.".equals(author.trim()) && i == authors.length - 1){  //final al. is handled as hasMoreMembers
1397
			    result.setHasMoreMembers(true);
1398
			}else{
1399
			    Person person = Person.NewInstance();
1400
			    person.setNomenclaturalTitle(author);
1401
			    result.addTeamMember(person);
1402
			}
1403
		}
1404
		return result;
1405
	}
1406

    
1407

    
1408
//	// Parsing of the given full name that has been identified as a cultivar already somwhere else.
1409
//	// The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
1410
	public IBotanicalName parseCultivar(String fullName) throws StringNotParsableException{
1411
		ICultivarPlantName result = null;
1412
		    String[] words = oWsPattern.split(fullName);
1413

    
1414
		    /* ---------------------------------------------------------------------------------
1415
		     * cultivar
1416
		     * ---------------------------------------------------------------------------------*/
1417
			if (fullName.indexOf(" '") != 0){
1418
				//TODO location of 'xx' is probably not arbitrary
1419
				Matcher cultivarMatcher = cultivarPattern.matcher(fullName);
1420
				if (cultivarMatcher.find()){
1421
					String namePart = fullName.replaceFirst(cultivar, "");
1422

    
1423
					String cultivarPart = cultivarMatcher.group(0).replace("'","").trim();
1424
					//OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
1425

    
1426
					result = (ICultivarPlantName)parseFullName(namePart);
1427
					result.setCultivarName(cultivarPart);
1428
				}
1429
			}else if (fullName.indexOf(" cv.") != 0){
1430
				// cv. is old form (not official)
1431
				throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
1432
			}
1433

    
1434
		    /* ---------------------------------------------------------------------------------
1435
		     * cultivar group
1436
		     * ---------------------------------------------------------------------------------
1437
		     */
1438
			// TODO in work
1439
			//Ann. this is not the official way of noting cultivar groups
1440
		    String group = oWs + "Group" + oWs + capitalEpiWord + end;
1441
			Pattern groupRE = Pattern.compile(group);
1442
			Matcher groupMatcher = groupRE.matcher(fullName);
1443
			if (groupMatcher.find()){
1444
		    	if (! words[words.length - 2].equals("group")){
1445
		            throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName + " should be 'group'");
1446
		        }else{
1447

    
1448
		        	String namePart = fullName.substring(0, groupMatcher.start(0) - 0);
1449
		        	//OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
1450

    
1451
		        	String cultivarPart = words[words.length -1];
1452
		        	result = (ICultivarPlantName)parseFullName(namePart);
1453
		        	if (result != null){
1454
		        		result.setCultivarName(cultivarPart);
1455

    
1456
		        		//OLD: result.setCultivarGroupName(cultivarPart);
1457
		        	}
1458
		        }
1459

    
1460
		    }
1461
//		    // ---------------------------------------------------------------------------------
1462
//		    if ( result = "" ){
1463
//		        return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
1464
//		    }else{
1465
//		        return result;
1466
	//	    }
1467
			return result; //TODO
1468
	}
1469

    
1470

    
1471
	private void makeEmpty(INonViralName name){
1472
	    TaxonName nameToBeFilled = TaxonName.castAndDeproxy(name);
1473
		nameToBeFilled.setRank(null);
1474
		nameToBeFilled.setTitleCache(null, false);
1475
		nameToBeFilled.setFullTitleCache(null, false);
1476
		nameToBeFilled.setNameCache(null, false);
1477

    
1478
		nameToBeFilled.setAppendedPhrase(null);
1479
		nameToBeFilled.setBasionymAuthorship(null);
1480
		nameToBeFilled.setCombinationAuthorship(null);
1481
		nameToBeFilled.setExBasionymAuthorship(null);
1482
		nameToBeFilled.setExCombinationAuthorship(null);
1483
		nameToBeFilled.setAuthorshipCache(null, false);
1484

    
1485

    
1486
		//delete problems except check rank
1487
		makeProblemEmpty(nameToBeFilled);
1488

    
1489
		// TODO ?
1490
		//nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
1491

    
1492

    
1493
		nameToBeFilled.setGenusOrUninomial(null);
1494
		nameToBeFilled.setInfraGenericEpithet(null);
1495
		nameToBeFilled.setSpecificEpithet(null);
1496
		nameToBeFilled.setInfraSpecificEpithet(null);
1497

    
1498
		nameToBeFilled.setNomenclaturalMicroReference(null);
1499
		nameToBeFilled.setNomenclaturalReference(null);
1500

    
1501
		nameToBeFilled.setHybridFormula(false);
1502
		nameToBeFilled.setMonomHybrid(false);
1503
		nameToBeFilled.setBinomHybrid(false);
1504
		nameToBeFilled.setTrinomHybrid(false);
1505

    
1506
		nameToBeFilled.setAnamorphic(false);
1507

    
1508
		nameToBeFilled.setBreed(null);
1509
		nameToBeFilled.setOriginalPublicationYear(null);
1510

    
1511
		//nom status handled in nom status parser, otherwise we loose additional information like reference etc.
1512
		//hybrid relationships handled in hybrid formula and at end of fullNameParser
1513
	}
1514
}
(4-4/8)