Project

General

Profile

Download (58.4 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.strategy.parser;
11

    
12
import java.util.HashSet;
13
import java.util.Set;
14
import java.util.regex.Matcher;
15
import java.util.regex.Pattern;
16

    
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.log4j.Logger;
19
import org.joda.time.DateTimeFieldType;
20
import org.joda.time.Partial;
21

    
22
import eu.etaxonomy.cdm.common.CdmUtils;
23
import eu.etaxonomy.cdm.common.UTF8;
24
import eu.etaxonomy.cdm.model.agent.Person;
25
import eu.etaxonomy.cdm.model.agent.Team;
26
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.IParsable;
29
import eu.etaxonomy.cdm.model.common.TimePeriod;
30
import eu.etaxonomy.cdm.model.name.HybridRelationship;
31
import eu.etaxonomy.cdm.model.name.HybridRelationshipType;
32
import eu.etaxonomy.cdm.model.name.IBotanicalName;
33
import eu.etaxonomy.cdm.model.name.ICultivarPlantName;
34
import eu.etaxonomy.cdm.model.name.INonViralName;
35
import eu.etaxonomy.cdm.model.name.IZoologicalName;
36
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
37
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
38
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
39
import eu.etaxonomy.cdm.model.name.Rank;
40
import eu.etaxonomy.cdm.model.name.TaxonName;
41
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
42
import eu.etaxonomy.cdm.model.reference.IBook;
43
import eu.etaxonomy.cdm.model.reference.IBookSection;
44
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
45
import eu.etaxonomy.cdm.model.reference.IVolumeReference;
46
import eu.etaxonomy.cdm.model.reference.Reference;
47
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
48
import eu.etaxonomy.cdm.model.reference.ReferenceType;
49
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
50
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
51

    
52

    
53
/**
54
 * @author a.mueller
55
 *
56
 */
57
public class NonViralNameParserImpl
58
            extends NonViralNameParserImplRegExBase
59
            implements INonViralNameParser<INonViralName> {
60
	private static final Logger logger = Logger.getLogger(NonViralNameParserImpl.class);
61

    
62
	// good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
63

    
64
	final static boolean MAKE_EMPTY = true;
65
	final static boolean MAKE_NOT_EMPTY = false;
66

    
67
	private final boolean authorIsAlwaysTeam = false;
68
	private boolean removeSpaceAfterDot = false;
69

    
70
    public static NonViralNameParserImpl NewInstance(){
71
		return new NonViralNameParserImpl();
72
	}
73

    
74
	@Override
75
    public INonViralName parseSimpleName(String simpleName){
76
		return parseSimpleName(simpleName, null, null);
77
	}
78

    
79
	@Override
80
    public INonViralName parseSimpleName(String simpleName, NomenclaturalCode code, Rank rank){
81
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
82
		return parseFullName(simpleName, code, rank);
83
	}
84

    
85
	public void parseSimpleName(INonViralName nameToBeFilled, String simpleNameString, Rank rank, boolean makeEmpty){
86
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
87
		parseFullName(nameToBeFilled, simpleNameString, rank, makeEmpty);
88
	}
89

    
90
	public INonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code){
91
		return getNonViralNameInstance(fullString, code, null);
92
	}
93

    
94
	public INonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code, Rank rank){
95
		INonViralName result = null;
96
		if(code ==null) {
97
			boolean isBotanicalName = anyBotanicFullNamePattern.matcher(fullString).find();
98
			boolean isZoologicalName = anyZooFullNamePattern.matcher(fullString).find();;
99
			boolean isBacteriologicalName = false;
100
			boolean isCultivatedPlantName = false;
101
			if ( (isBotanicalName || isCultivatedPlantName) && ! isZoologicalName && !isBacteriologicalName){
102
				if (isBotanicalName){
103
					result = TaxonNameFactory.NewBotanicalInstance(rank);
104
				}else{
105
					result = TaxonNameFactory.NewCultivarInstance(rank);
106
				}
107
			}else if ( isZoologicalName /*&& ! isBotanicalName*/ && !isBacteriologicalName && !isCultivatedPlantName){
108
				result = TaxonNameFactory.NewZoologicalInstance(rank);
109
			}else if ( isZoologicalName && ! isBotanicalName && !isBacteriologicalName && !isCultivatedPlantName){
110
				result = TaxonNameFactory.NewBacterialInstance(rank);
111
			}else {
112
				result =  TaxonNameFactory.NewNonViralInstance(rank);
113
			}
114
		} else {
115
			switch (code) {
116
			case ICNAFP:
117
				result = TaxonNameFactory.NewBotanicalInstance(rank);
118
				break;
119
			case ICZN:
120
				result = TaxonNameFactory.NewZoologicalInstance(rank);
121
				break;
122
			case ICNCP:
123
				logger.warn("ICNCP parsing not yet implemented");
124
				result = TaxonNameFactory.NewCultivarInstance(rank);
125
				break;
126
			case ICNB:
127
				logger.warn("ICNB not yet implemented");
128
				result = TaxonNameFactory.NewBacterialInstance(rank);
129
				break;
130
			case ICVCN:
131
				logger.error("Viral name is not a NonViralName !!");
132
				break;
133
			default:
134
				// FIXME Unreachable code
135
				logger.error("Unknown Nomenclatural Code !!");
136
			}
137
		}
138
		return result;
139
	}
140

    
141
	@Override
142
    public TaxonName parseReferencedName(String fullReferenceString) {
143
		return parseReferencedName(fullReferenceString, null, null);
144
	}
145

    
146
	@Override
147
    public TaxonName parseReferencedName(String fullReferenceString, NomenclaturalCode nomCode, Rank rank) {
148
		if (fullReferenceString == null){
149
			return null;
150
		}else{
151
		    INonViralName result = getNonViralNameInstance(fullReferenceString, nomCode, rank);
152
			parseReferencedName(result, fullReferenceString, rank, MAKE_EMPTY);
153
			return TaxonName.castAndDeproxy(result);
154
		}
155
	}
156

    
157
	private String standardize(INonViralName nameToBeFilled, String fullReferenceString, boolean makeEmpty){
158
		//Check null and standardize
159
		if (fullReferenceString == null){
160
			//return null;
161
			return null;
162
		}
163
		if (makeEmpty){
164
			makeEmpty(nameToBeFilled);
165
		}
166
		fullReferenceString = fullReferenceString.replaceAll(oWs , " ");
167
		fullReferenceString = fullReferenceString.trim();
168
		if ("".equals(fullReferenceString)){
169
			fullReferenceString = null;
170
		}
171
		return fullReferenceString;
172
	}
173

    
174
	/**
175
	 * Returns the regEx to be used for the full-name depending on the code
176
	 * @param nameToBeFilled
177
	 * @return
178
	 */
179
	private String getCodeSpecificFullNameRegEx(INonViralName nameToBeFilledOrig){
180
	    INonViralName nameToBeFilled = CdmBase.deproxy(nameToBeFilledOrig);
181
		if (nameToBeFilled.isZoological()){
182
			return anyZooFullName;
183
		}else if (nameToBeFilled.isBotanical()) {
184
			return anyBotanicFullName;
185
		}else if (nameToBeFilled.isNonViral()) {
186
			return anyBotanicFullName;  //TODO ?
187
		}else{
188
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
189
			return null;
190
		}
191
	}
192

    
193
	/**
194
	 * Returns the regEx to be used for the fsimple-name depending on the code
195
	 * @param nameToBeFilled
196
	 * @return
197
	 */
198
	private String getCodeSpecificSimpleNameRegEx(INonViralName nameToBeFilled){
199
		nameToBeFilled = CdmBase.deproxy(nameToBeFilled);
200

    
201
		if (nameToBeFilled.isZoological()){
202
			return anyZooName;
203
		}else if (nameToBeFilled.isBotanical()) {
204
		    return anyBotanicName;
205
		}else if (nameToBeFilled.isNonViral()){
206
			return anyZooName;  //TODO ?
207
		}else{
208
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
209
			return null;
210
		}
211
	}
212

    
213
	private Matcher getMatcher(String regEx, String matchString){
214
		Pattern pattern = Pattern.compile(regEx);
215
		Matcher matcher = pattern.matcher(matchString);
216
		return matcher;
217
	}
218

    
219
	@Override
220
    public void parseReferencedName(INonViralName nameToBeFilled, String fullReferenceStringOrig, Rank rank, boolean makeEmpty) {
221
		//standardize
222
		String fullReferenceString = standardize(nameToBeFilled, fullReferenceStringOrig, makeEmpty);
223
		if (fullReferenceString == null){
224
			return;
225
		}
226
		// happens already in standardize(...)
227
//		makeProblemEmpty(nameToBeFilled);
228

    
229
		//make nomenclatural status and replace it by empty string
230
	    fullReferenceString = parseNomStatus(fullReferenceString, nameToBeFilled, makeEmpty);
231
	    nameToBeFilled.setProblemEnds(fullReferenceString.length());
232

    
233
	    //get full name reg
234
		String localFullNameRegEx = getCodeSpecificFullNameRegEx(nameToBeFilled);
235
		//get full name reg
236
		String localSimpleNameRegEx = getCodeSpecificSimpleNameRegEx(nameToBeFilled);
237

    
238
		//separate name and reference part
239
		String nameAndRefSeparatorRegEx = "(^" + localFullNameRegEx + ")("+ referenceSeperator + ")";
240
		Matcher nameAndRefSeparatorMatcher = getMatcher (nameAndRefSeparatorRegEx, fullReferenceString);
241

    
242
		Matcher onlyNameMatcher = getMatcher (localFullNameRegEx, fullReferenceString);
243
		Matcher hybridMatcher = hybridFormulaPattern.matcher(fullReferenceString);
244
		Matcher onlySimpleNameMatcher = getMatcher (localSimpleNameRegEx, fullReferenceString);
245

    
246
		if (onlyNameMatcher.matches()){
247
			makeEmpty = false;
248
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
249
		} else if (nameAndRefSeparatorMatcher.find()){
250
			makeNameWithReference(nameToBeFilled, fullReferenceString, nameAndRefSeparatorMatcher, rank, makeEmpty);
251
		}else if (hybridMatcher.matches() ){
252
		    //I do not remember why we need makeEmpty = false for onlyNameMatcher,
253
		    //but for hybridMatcher we need to remove old Hybrid Relationships if necessary, therefore
254
		    //I removed it from here
255
            parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
256
        }else if (onlySimpleNameMatcher.matches()){
257
			makeEmpty = false;
258
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);	//simpleName not yet implemented
259
		}else{
260
			makeNoFullRefMatch(nameToBeFilled, fullReferenceString, rank);
261
		}
262
		//problem handling. Start and end solved in subroutines
263
		if (! nameToBeFilled.hasProblem()){
264
			makeProblemEmpty(nameToBeFilled);
265
		}
266
	}
267

    
268
	private void makeProblemEmpty(IParsable parsable){
269
		boolean hasCheckRank = parsable.hasProblem(ParserProblem.CheckRank);
270
		parsable.setParsingProblem(0);
271
		if (hasCheckRank){
272
			parsable.addParsingProblem(ParserProblem.CheckRank);
273
		}
274
		parsable.setProblemStarts(-1);
275
		parsable.setProblemEnds(-1);
276
	}
277

    
278
	private void makeNoFullRefMatch(INonViralName nameToBeFilled, String fullReferenceString, Rank rank){
279
	    //try to parse first part as name, but keep in mind full string is not parsable
280
		int start = 0;
281

    
282
		String localFullName = getCodeSpecificFullNameRegEx(nameToBeFilled);
283
		Matcher fullNameMatcher = getMatcher (pStart + localFullName, fullReferenceString);
284
		if (fullNameMatcher.find()){
285
			String fullNameString = fullNameMatcher.group(0);
286
			nameToBeFilled.setProtectedNameCache(false);
287
			parseFullName(nameToBeFilled, fullNameString, rank, false);
288
			String sure = nameToBeFilled.getNameCache();
289
			start = sure.length();
290
		}
291

    
292
//		String localSimpleName = getLocalSimpleName(nameToBeFilled);
293
//		Matcher simpleNameMatcher = getMatcher (start + localSimpleName, fullReferenceString);
294
//		if (simpleNameMatcher.find()){
295
//			String simpleNameString = simpleNameMatcher.group(0);
296
//			parseFullName(nameToBeFilled, simpleNameString, rank, false);
297
//			start = simpleNameString.length();
298
//		}
299

    
300
		//don't parse if name can't be separated
301
		nameToBeFilled.addParsingProblem(ParserProblem.NameReferenceSeparation);
302
		nameToBeFilled.setTitleCache(fullReferenceString, true);
303
		nameToBeFilled.setFullTitleCache(fullReferenceString, true);
304
		// FIXME Quick fix, otherwise search would not deliver results for unparsable names
305
		nameToBeFilled.setNameCache(fullReferenceString, true);
306
		// END
307
		nameToBeFilled.setProblemStarts(start);
308
		nameToBeFilled.setProblemEnds(fullReferenceString.length());
309
		logger.info("no applicable parsing rule could be found for \"" + fullReferenceString + "\"");
310
	}
311

    
312
	private void makeNameWithReference(INonViralName nameToBeFilled,
313
			String fullReferenceString,
314
			Matcher nameAndRefSeparatorMatcher,
315
			Rank rank,
316
			boolean makeEmpty){
317

    
318
		String nameAndSeparator = nameAndRefSeparatorMatcher.group(0);
319
	    String name = nameAndRefSeparatorMatcher.group(1);
320
	    String referenceString = fullReferenceString.substring(nameAndRefSeparatorMatcher.end());
321

    
322
	    // is reference an in ref?
323
	    String separator = nameAndSeparator.substring(name.length());
324
		boolean isInReference = separator.matches(inReferenceSeparator);
325

    
326
	    //parse subparts
327

    
328
		int oldProblemEnds = nameToBeFilled.getProblemEnds();
329
		parseFullName(nameToBeFilled, name, rank, makeEmpty);
330
	    nameToBeFilled.setProblemEnds(oldProblemEnds);
331

    
332
		//zoological new combinations should not have a nom. reference to be parsed
333
	    if (nameToBeFilled.isZoological()){
334
			IZoologicalName zooName = (IZoologicalName)CdmBase.deproxy(nameToBeFilled);
335
			//is name new combination?
336
			if (zooName.getBasionymAuthorship() != null || zooName.getOriginalPublicationYear() != null){
337
				ParserProblem parserProblem = ParserProblem.NewCombinationHasPublication;
338
				zooName.addParsingProblem(parserProblem);
339
				nameToBeFilled.setProblemStarts((nameToBeFilled.getProblemStarts()> -1) ? nameToBeFilled.getProblemStarts(): name.length());
340
				nameToBeFilled.setProblemEnds(Math.max(fullReferenceString.length(), nameToBeFilled.getProblemEnds()));
341
			}
342
		}
343

    
344
	    parseReference(nameToBeFilled, referenceString, isInReference);
345
	    INomenclaturalReference ref = nameToBeFilled.getNomenclaturalReference();
346

    
347
	    //problem start
348
	    int start = nameToBeFilled.getProblemStarts();
349
	    int nameLength = name.length();
350
	    int nameAndSeparatorLength = nameAndSeparator.length();
351
	    int fullRefLength = nameToBeFilled.getFullTitleCache().length();
352

    
353
	    if (nameToBeFilled.isProtectedTitleCache() || nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)){
354
	    	start = Math.max(0, start);
355
		}else{
356
			if (ref != null && ref.getParsingProblem()!=0){
357
				start = Math.max(nameAndSeparatorLength, start);
358
		    	//TODO search within ref
359
			}
360
		}
361

    
362
	    //end
363
	    int end = nameToBeFilled.getProblemEnds();
364

    
365
	    if (ref != null && ref.getParsingProblem()!=0){
366
	    	end = Math.min(nameAndSeparatorLength + ref.getProblemEnds(), end);
367
	    }else{
368
	    	if (nameToBeFilled.isProtectedTitleCache() ){
369
	    		end = Math.min(end, nameAndSeparatorLength);
370
	    		//TODO search within name
371
			}
372
	    }
373
	    nameToBeFilled.setProblemStarts(start);
374
	    nameToBeFilled.setProblemEnds(end);
375

    
376
	    //delegate has problem to name
377
	    if (ref != null && ref.getParsingProblem()!=0){
378
	    	nameToBeFilled.addParsingProblems(ref.getParsingProblem());
379
	    }
380

    
381
	    Reference nomRef;
382
		if ( (nomRef = (Reference)nameToBeFilled.getNomenclaturalReference()) != null ){
383
			nomRef.setAuthorship(nameToBeFilled.getCombinationAuthorship());
384
		}
385
	}
386

    
387
	//TODO make it an Array of status
388
	/**
389
	 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonName}.
390
	 * The nomenclatural status part ist deleted from the reference String.
391
	 * @return  String the new (shortend) reference String
392
	 */
393
	public String parseNomStatus(String fullString, INonViralName nameToBeFilled, boolean makeEmpty) {
394
		Set<NomenclaturalStatusType> existingStatusTypeSet = new HashSet<NomenclaturalStatusType>();
395
		Set<NomenclaturalStatusType> newStatusTypeSet = new HashSet<NomenclaturalStatusType>();
396
		for (NomenclaturalStatus existingStatus : nameToBeFilled.getStatus()){
397
			existingStatusTypeSet.add(existingStatus.getType());
398
		}
399

    
400
		String statusString;
401
		Pattern hasStatusPattern = Pattern.compile("(" + pNomStatusPhrase + ")");
402
		Matcher hasStatusMatcher = hasStatusPattern.matcher(fullString);
403

    
404
		if (hasStatusMatcher.find()) {
405
			String statusPhrase = hasStatusMatcher.group(0);
406

    
407
			Pattern statusPattern = Pattern.compile(pNomStatus);
408
			Matcher statusMatcher = statusPattern.matcher(statusPhrase);
409
			statusMatcher.find();
410
			statusString = statusMatcher.group(0);
411
			try {
412
			    TaxonName nameToBeFilledCasted =  TaxonName.castAndDeproxy(nameToBeFilled);
413
				NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(statusString, nameToBeFilledCasted);
414
				if (! existingStatusTypeSet.contains(nomStatusType)){
415
					NomenclaturalStatus nomStatus = NomenclaturalStatus.NewInstance(nomStatusType);
416
					nameToBeFilled.addStatus(nomStatus);
417
				}
418
				newStatusTypeSet.add(nomStatusType);
419
				fullString = fullString.replace(statusPhrase, "");
420
			} catch (UnknownCdmTypeException e) {
421
				//Do nothing
422
			}
423
		}
424
		//remove not existing nom status
425
		if (makeEmpty){
426
			Set<NomenclaturalStatus> tmpStatus = new HashSet<NomenclaturalStatus>();
427
			tmpStatus.addAll(nameToBeFilled.getStatus());
428
			for (NomenclaturalStatus status : tmpStatus){
429
				if (! newStatusTypeSet.contains(status.getType())){
430
					nameToBeFilled.removeStatus(status);
431
				}
432
			}
433
		}
434

    
435
		return fullString;
436
	}
437

    
438

    
439
	private void parseReference(INonViralName nameToBeFilled, String strReference, boolean isInReference){
440

    
441
		INomenclaturalReference ref;
442
		String originalStrReference = strReference;
443

    
444
		//End (just delete end (e.g. '.', may be ambigous for yearPhrase, but no real information gets lost
445
		Matcher endMatcher = getMatcher(referenceEnd + end, strReference);
446
		if (endMatcher.find()){
447
			String endPart = endMatcher.group(0);
448
			strReference = strReference.substring(0, strReference.length() - endPart.length());
449
		}
450

    
451
//		String pDetailYear = ".*" + detailSeparator + detail + fWs + yearSeperator + fWs + yearPhrase + fWs + end;
452
//		Matcher detailYearMatcher = getMatcher(pDetailYear, strReference);
453

    
454
		String strReferenceWithYear = strReference;
455
		//year
456
		String yearPart = null;
457
		String pYearPhrase = yearSeperator + fWs + yearPhrase + fWs + end;
458
		Matcher yearPhraseMatcher = getMatcher(pYearPhrase, strReference);
459
		if (yearPhraseMatcher.find()){
460
			yearPart = yearPhraseMatcher.group(0);
461
			strReference = strReference.substring(0, strReference.length() - yearPart.length());
462
			yearPart = yearPart.replaceFirst(pStart + yearSeperator, "").trim();
463
		}else{
464
			if (nameToBeFilled.isZoological()){
465
				IZoologicalName zooName = (IZoologicalName)CdmBase.deproxy(nameToBeFilled);
466
				yearPart = String.valueOf(zooName.getPublicationYear());
467
				//continue
468
			}else{
469
				ref = makeDetailYearUnparsable(nameToBeFilled,strReference);
470
				ref.setDatePublished(TimePeriodParser.parseString(yearPart));
471
				return;
472
			}
473
		}
474

    
475

    
476
		//detail
477
		String pDetailPhrase = detailSeparator + fWs + detail + fWs + end;
478
		Matcher detailPhraseMatcher = getMatcher(pDetailPhrase, strReference);
479
		if (detailPhraseMatcher.find()){
480
			String detailPart = detailPhraseMatcher.group(0);
481
			strReference = strReference.substring(0, strReference.length() - detailPart.length());
482
			detailPart = detailPart.replaceFirst(pStart + detailSeparator, "").trim();
483
			nameToBeFilled.setNomenclaturalMicroReference(detailPart);
484
		}else{
485
			makeDetailYearUnparsable(nameToBeFilled, strReferenceWithYear);
486
			return;
487
		}
488
		//parse title and author
489
		ref = parseReferenceTitle(strReference, yearPart, isInReference);
490
		if (ref.hasProblem()){
491
		    //we need to protect both caches otherwise the titleCache is incorrectly build from atomized parts
492
			ref.setTitleCache( (isInReference ? "in ":"") +  originalStrReference, true);
493
			ref.setAbbrevTitleCache( (isInReference ? "in ":"") +  originalStrReference, true);
494
		}
495
		nameToBeFilled.setNomenclaturalReference(ref);
496
		int end = Math.min(strReference.length(), ref.getProblemEnds());
497
		ref.setProblemEnds(end);
498
	}
499

    
500
	/**
501
	 * @param nameToBeFilled
502
	 * @param strReference
503
	 * @return
504
	 */
505
	private Reference makeDetailYearUnparsable(INonViralName nameToBeFilled, String strReference) {
506
		Reference ref;
507

    
508
		ref = ReferenceFactory.newGeneric();
509
		ref.setTitleCache(strReference, true);
510
        ref.setAbbrevTitleCache(strReference, true);
511
		ref.setProblemEnds(strReference.length());
512
		ref.addParsingProblem(ParserProblem.CheckDetailOrYear);
513
		nameToBeFilled.addParsingProblem(ParserProblem.CheckDetailOrYear);
514
		nameToBeFilled.setNomenclaturalReference(ref);
515
		return ref;
516
	}
517

    
518
	/**
519
	 * Parses the referenceTitlePart, including the author volume and edition.
520
	 * @param reference
521
	 * @param year
522
	 * @return
523
	 */
524
	public INomenclaturalReference parseReferenceTitle(String strReference, String year, boolean isInReference){
525
		IBook result = null;
526

    
527
		Matcher refSineDetailMatcher = referenceSineDetailPattern.matcher(strReference);
528
		if (! refSineDetailMatcher.matches()){
529
			//TODO ?
530
		}
531

    
532
		Matcher articleMatcher = getMatcher(pArticleReference, strReference);
533
		Matcher bookMatcher = getMatcher(pBookReference, strReference);
534

    
535
		Matcher softArticleMatcher = getMatcher(pSoftArticleReference, strReference);
536
		Matcher bookSectionMatcher = getMatcher(pBookSectionReference, strReference);
537

    
538

    
539
		if(isInReference == false){
540
			if (bookMatcher.matches() ){
541
				result = parseBook(strReference);
542
			}else{
543
				logger.warn("Non-InRef must be book but does not match book: "+ strReference);
544
				result = ReferenceFactory.newBook();
545
				makeUnparsableRefTitle(result, strReference);
546
			}
547
		}else{  //inRef
548
			if (articleMatcher.matches()){
549
				//article without separators like ","
550
				result = parseArticle(strReference);
551
			}else if (softArticleMatcher.matches()){
552
				result = parseArticle(strReference);
553
			}else if (bookSectionMatcher.matches()){
554
				result = parseBookSection(strReference);
555
			}else{
556
				result =  ReferenceFactory.newGeneric();
557
				makeUnparsableRefTitle(result, "in " + strReference);
558
			}
559
		}
560
		//make year
561
		if (makeYear(result, year) == false){
562
			//TODO
563
			logger.warn("Year could not be parsed");
564
		}
565
		result.setProblemStarts(0);
566
		result.setProblemEnds(strReference.length());
567
		return result;
568
	}
569

    
570
	private void makeUnparsableRefTitle(INomenclaturalReference result, String reference){
571
	    //need to set both to protected otherwise titleCache is created from atomized parts
572
	    result.setTitleCache(reference, true);
573
		result.setAbbrevTitleCache(reference, true);
574
		result.addParsingProblem(ParserProblem.UnparsableReferenceTitle);
575
	}
576

    
577
	/**
578
	 * Parses a single date string. If the string is not parsable a StringNotParsableException is thrown
579
	 * @param singleDateString
580
	 * @return
581
	 * @throws StringNotParsableException
582
	 */
583
	private static Partial parseSingleDate(String singleDateString)
584
			throws StringNotParsableException{
585
		Partial dt = new Partial();
586
		if (CdmUtils.isNumeric(singleDateString)){
587
			try {
588
				Integer year = Integer.valueOf(singleDateString.trim());
589
				if (year > 1750 && year < 2050){
590
					dt = dt.with(DateTimeFieldType.year(), year);
591
				}else{
592
					dt = null;
593
				}
594
			} catch (NumberFormatException e) {
595
				logger.debug("Not a Integer format in getCalendar()");
596
				throw new StringNotParsableException(singleDateString + "is not parsable as a single Date");
597
			}
598
		}
599
		return dt;
600
	}
601

    
602

    
603
	/**
604
	 * Parses the publication date part.
605
	 * @param nomRef
606
	 * @param year
607
	 * @return If the string is not parsable <code>false</code>
608
	 * is returned. <code>True</code> otherwise
609
	 */
610
	private boolean makeYear(INomenclaturalReference nomRef, String year){
611
		boolean result = true;
612
		if (year == null){
613
			return false;
614
		}
615
		if ("".equals(year.trim())){
616
			return true;
617
		}
618
		TimePeriod datePublished = TimePeriodParser.parseString(year);
619

    
620
		if (nomRef.getType().equals(ReferenceType.BookSection)){
621
			handleBookSectionYear((IBookSection)nomRef, datePublished);
622
		}else if (nomRef instanceof Reference){
623
			((Reference)nomRef).setDatePublished(datePublished);
624
		}else{
625
			throw new ClassCastException("nom Ref is not of type Reference but " + (nomRef == null? "(null)" : nomRef.getClass()));
626
		}
627
		return result;
628
	}
629

    
630
	private String makeVolume(IVolumeReference nomRef, String strReference){
631
		//volume
632
		String volPart = null;
633
		String pVolPhrase = volumeSeparator +  volume + end;
634
		Matcher volPhraseMatcher = getMatcher(pVolPhrase, strReference);
635
		if (volPhraseMatcher.find()){
636
			volPart = volPhraseMatcher.group(0);
637
			strReference = strReference.substring(0, strReference.length() - volPart.length());
638
			volPart = volPart.replaceFirst(pStart + volumeSeparator, "").trim();
639
			nomRef.setVolume(volPart);
640
		}
641
		return strReference;
642
	}
643

    
644
	private String makeEdition(IBook book, String strReference){
645
		//volume
646
		String editionPart = null;
647
		Matcher editionPhraseMatcher = getMatcher(pEditionPart, strReference);
648

    
649
		Matcher editionVolumeMatcher = getMatcher(pEditionVolPart, strReference);
650
		boolean isEditionAndVol = editionVolumeMatcher.find();
651

    
652
		if (editionPhraseMatcher.find()){
653
			editionPart = editionPhraseMatcher.group(0);
654
			int pos = strReference.indexOf(editionPart);
655
			int posEnd = pos + editionPart.length();
656
			if (isEditionAndVol){
657
				posEnd++;  //delete also comma
658
			}
659
			strReference = strReference.substring(0, pos) + strReference.substring(posEnd);
660
			editionPart = editionPart.replaceFirst(pStart + editionSeparator, "").trim();
661
			book.setEdition(editionPart);
662
		}
663
		return strReference;
664
	}
665

    
666
	private IBook parseBook(String reference){
667
		IBook result = ReferenceFactory.newBook();
668
		reference = makeEdition(result, reference);
669
		reference = makeVolume(result, reference);
670
		result.setAbbrevTitle(reference);
671
		return result;
672
	}
673

    
674

    
675
	private Reference parseArticle(String reference){
676
		//if (articlePatter)
677
		//(type, author, title, volume, editor, series;
678
		Reference result = ReferenceFactory.newArticle();
679
		reference = makeVolume(result, reference);
680
		Reference inJournal = ReferenceFactory.newJournal();
681
		inJournal.setAbbrevTitle(reference);
682
		result.setInReference(inJournal);
683
		return result;
684
	}
685

    
686
	private Reference parseBookSection(String reference){
687
		Reference result = ReferenceFactory.newBookSection();
688

    
689
		Pattern authorPattern = Pattern.compile("^" + authorTeam + referenceAuthorSeparator);
690
		Matcher authorMatcher = authorPattern.matcher(reference);
691
		boolean find = authorMatcher.find();
692
		if (find){
693
			String authorString = authorMatcher.group(0).trim();
694
			String bookString = reference.substring(authorString.length()).trim();
695
			authorString = authorString.substring(0, authorString.length() -1);
696

    
697
			TeamOrPersonBase<?> authorTeam = author(authorString);
698
			IBook inBook = parseBook(bookString);
699
			inBook.setAuthorship(authorTeam);
700
			result.setInBook(inBook);
701
		}else{
702
			logger.warn("Unexpected non matching book section author part");
703
			//TODO do we want to record a 'problem' here?
704
			result.setTitleCache(reference, true);
705
			result.setAbbrevTitleCache(reference, true);
706
		}
707

    
708
		return result;
709
	}
710

    
711
	/**
712
	 * If the publication date of a book section and it's inBook do differ this is usually
713
	 * caused by the fact that a book has been published during a period, because originally
714
	 * it consisted of several parts that only later where put together to one book.
715
	 * If so, the book section's publication date may be a point in time (year or month of year)
716
	 * whereas the books publication date may be a period of several years.
717
	 * Therefore a valid nomenclatural reference string should use the book sections
718
	 * publication date rather then the book's publication date.<BR>
719
	 * This method in general adds the publication date to the book section.
720
	 * An exception exists if the publication date is a period. Then the parser
721
	 * assumes that the nomenclatural reference string does not follow the above rule but
722
	 * the books publication date is set.
723
	 * @param bookSection
724
	 * @param datePublished
725
	 */
726
	private void handleBookSectionYear(IBookSection bookSection, TimePeriod datePublished){
727
		if (datePublished == null || datePublished.getStart() == null || bookSection == null){
728
			return;
729
		}
730
		if (datePublished.isPeriod() && bookSection.getInBook() != null){
731
			bookSection.getInBook().setDatePublished(datePublished);
732
		}else{
733
			bookSection.setDatePublished(datePublished);
734
		}
735
	}
736

    
737
	@Override
738
    public INonViralName parseFullName(String fullNameString){
739
		return parseFullName(fullNameString, null, null);
740
	}
741

    
742
	@Override
743
    public INonViralName parseFullName(String fullNameString, NomenclaturalCode nomCode, Rank rank) {
744

    
745
		if (fullNameString == null){
746
			return null;
747
		}else{
748
			INonViralName result = getNonViralNameInstance(fullNameString, nomCode, rank);
749
			parseFullName(result, fullNameString, rank, false);
750
			return result;
751
		}
752
	}
753

    
754
	@Override
755
	public void parseFullName(INonViralName nameToBeFilledOrig, String fullNameStringOrig, Rank rank, boolean makeEmpty) {
756
	    INonViralName nameToBeFilled = nameToBeFilledOrig;
757

    
758
	    //TODO prol. etc.
759
		boolean hasCheckRankProblem = false; //was rank guessed in a previous parsing process?
760
		if (nameToBeFilled == null){
761
			throw new IllegalArgumentException("NameToBeFilled must not be null in name parser");
762
		}else{
763
			hasCheckRankProblem = nameToBeFilled.hasProblem(ParserProblem.CheckRank);
764
			nameToBeFilled.removeParsingProblem(ParserProblem.CheckRank);
765
		}
766
		String authorString = null;
767
		if (fullNameStringOrig == null){
768
			return;
769
		}
770
		if (makeEmpty){
771
			makeEmpty(nameToBeFilled);
772
		}
773

    
774
		String fullNameString = fullNameStringOrig.replaceAll(oWs , " ").trim();
775

    
776
		fullNameString = removeHybridBlanks(fullNameString);
777
		fullNameString = removeSpNovBlanks(fullNameString);
778
		String[] epi = pattern.split(fullNameString);
779
		try {
780
	    	//cultivars //TODO 2 implement cultivars
781
//		    if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
782
//		    	result = parseCultivar(fullName);
783
//		    }
784

    
785
		    if (genusOrSupraGenusPattern.matcher(fullNameString).matches()){
786
		    	//supraGeneric
787
				if (rank != null && ! hasCheckRankProblem  && (rank.isSupraGeneric()|| rank.isGenus())){
788
					nameToBeFilled.setRank(rank);
789
					nameToBeFilled.setGenusOrUninomial(epi[0]);
790
				}
791
				//genus or guess rank
792
				else {
793
					rank = guessUninomialRank(nameToBeFilled, epi[0]);
794
					nameToBeFilled.setRank(rank);
795
					nameToBeFilled.setGenusOrUninomial(epi[0]);
796
					nameToBeFilled.addParsingProblem(ParserProblem.CheckRank);
797
					nameToBeFilled.setProblemStarts(0);
798
					nameToBeFilled.setProblemEnds(epi[0].length());
799
				}
800
				authorString = fullNameString.substring(epi[0].length());
801
			}
802
			 //infra genus
803
			 else if (infraGenusPattern.matcher(fullNameString).matches()){
804
				Rank infraGenericRank;
805
				if ("[unranked]".equals(epi[1])){
806
					infraGenericRank = Rank.INFRAGENERICTAXON();
807
				}else{
808
				    String infraGenericRankMarker = epi[1];
809
				    if (infraGenericRankMarker.startsWith(notho)){  //#3868
810
                        nameToBeFilled.setBinomHybrid(true);
811
                        infraGenericRankMarker = infraGenericRankMarker.substring(notho.length());
812
                    }else if(infraGenericRankMarker.startsWith("n")){
813
                        nameToBeFilled.setBinomHybrid(true);
814
                        infraGenericRankMarker = infraGenericRankMarker.substring(1);
815
                    }
816
                    infraGenericRank = Rank.getRankByIdInVoc(infraGenericRankMarker, nameToBeFilledOrig.getNomenclaturalCode());
817
				}
818
				nameToBeFilled.setRank(infraGenericRank);
819
				nameToBeFilled.setGenusOrUninomial(epi[0]);
820
				nameToBeFilled.setInfraGenericEpithet(epi[2]);
821
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length()+ 1 + epi[2].length());
822
			}
823
			 //aggr. or group
824
			 else if (aggrOrGroupPattern.matcher(fullNameString).matches()){
825
				nameToBeFilled.setRank(Rank.getRankByIdInVoc(epi[2]));
826
				nameToBeFilled.setGenusOrUninomial(epi[0]);
827
				nameToBeFilled.setSpecificEpithet(epi[1]);
828
			}
829
		     //species
830
			 else if (speciesPattern.matcher(fullNameString).matches()){
831
				nameToBeFilled.setRank(Rank.SPECIES());
832
				nameToBeFilled.setGenusOrUninomial(epi[0]);
833
				nameToBeFilled.setSpecificEpithet(normalizeSpNov(epi[1]));
834
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length());
835
			}
836
		    //species with infra generic epithet
837
			 else if (speciesWithInfraGenPattern.matcher(fullNameString).matches()){
838
			     nameToBeFilled.setRank(Rank.SPECIES());
839
	             nameToBeFilled.setGenusOrUninomial(epi[0]);
840
                 nameToBeFilled.setInfraGenericEpithet(epi[2]);
841
	             nameToBeFilled.setSpecificEpithet(epi[4]);
842
	             authorString = fullNameString.substring(epi[0].length() + 2 + epi[2].length() + 2 + epi[4].length());
843
			 }
844
			 //autonym
845
			 else if (autonymPattern.matcher(fullNameString.replace(UTF8.HYBRID.toString(), "")).matches()){
846
			    String infraSpecRankMarker = epi[epi.length - 2];
847
			    boolean isTriHybrid = false;
848
			    if (infraSpecRankMarker.startsWith(notho)){  //#3868
849
                    nameToBeFilled.setTrinomHybrid(true);
850
                    infraSpecRankMarker = infraSpecRankMarker.substring(notho.length());
851
                    isTriHybrid = true;
852
                }else if(infraSpecRankMarker.startsWith("n")){
853
                    nameToBeFilled.setTrinomHybrid(true);
854
                    infraSpecRankMarker = infraSpecRankMarker.substring(1);
855
                    isTriHybrid = true;
856
                }
857
			    if (epi[1].startsWith(UTF8.HYBRID.toString())) {
858
			        nameToBeFilled.setBinomHybrid(true);
859
                }
860
			    nameToBeFilled.setRank(Rank.getRankByIdInVoc(infraSpecRankMarker));
861
				nameToBeFilled.setGenusOrUninomial(epi[0]);
862
				nameToBeFilled.setSpecificEpithet(epi[1]);
863
				nameToBeFilled.setInfraSpecificEpithet(epi[epi.length - 1]);
864
				int lenSpecies = 2 + epi[0].length()+epi[1].length();
865
				int lenInfraSpecies =  2 + epi[epi.length - 2].length() + epi[epi.length - 1].length();
866
				authorString = fullNameString.substring(lenSpecies, fullNameString.length() - lenInfraSpecies);
867
			}
868
			 //infraSpecies
869
			 else if (infraSpeciesPattern.matcher(fullNameString).matches()){
870
				String infraSpecRankMarker = epi[2];
871
				String infraSpecEpi = epi[3];
872
				if ("tax.".equals(infraSpecRankMarker)){
873
					infraSpecRankMarker += " " +  epi[3];
874
					infraSpecEpi = epi[4];
875
				}
876
				Rank infraSpecificRank;
877
				if ("[unranked]".equals(infraSpecRankMarker)){
878
					infraSpecificRank = Rank.INFRASPECIFICTAXON();
879
				}else{
880
					String localInfraSpecRankMarker;
881
					if (infraSpecRankMarker.startsWith(notho)){  //#3868
882
	                    nameToBeFilled.setTrinomHybrid(true);
883
	                    localInfraSpecRankMarker = infraSpecRankMarker.substring(notho.length());
884
					}else if(infraSpecRankMarker.startsWith("n")){
885
	                    nameToBeFilled.setTrinomHybrid(true);
886
	                    localInfraSpecRankMarker = infraSpecRankMarker.substring(1);
887
                    }else{
888
                        localInfraSpecRankMarker = infraSpecRankMarker;
889
                    }
890
				    infraSpecificRank = Rank.getRankByIdInVoc(localInfraSpecRankMarker);
891
				}
892
				nameToBeFilled.setRank(infraSpecificRank);
893
				nameToBeFilled.setGenusOrUninomial(epi[0]);
894
				nameToBeFilled.setSpecificEpithet(epi[1]);
895
				nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
896
				authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecRankMarker.length() + 1 + infraSpecEpi.length());
897

    
898
			 }
899
		      //infraSpecies without marker
900
			 else if (zooInfraSpeciesPattern.matcher(fullNameString).matches()){
901
					String infraSpecEpi = epi[2];
902
					Rank infraSpecificRank = Rank.SUBSPECIES();
903
					nameToBeFilled.setRank(infraSpecificRank);
904
					nameToBeFilled.setGenusOrUninomial(epi[0]);
905
					nameToBeFilled.setSpecificEpithet(epi[1]);
906
					nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
907
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecEpi.length());
908

    
909
			 }//old infraSpecies
910
			 else if (oldInfraSpeciesPattern.matcher(fullNameString).matches()){
911
				boolean implemented = false;
912
				if (implemented){
913
					nameToBeFilled.setRank(Rank.getRankByNameOrIdInVoc(epi[2]));
914
					nameToBeFilled.setGenusOrUninomial(epi[0]);
915
					nameToBeFilled.setSpecificEpithet(epi[1]);
916
					//TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
917
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + epi[2].length() + 1 + epi[3].length());
918
				}else{
919
					nameToBeFilled.addParsingProblem(ParserProblem.OldInfraSpeciesNotSupported);
920
					nameToBeFilled.setTitleCache(fullNameString, true);
921
					// FIXME Quick fix, otherwise search would not deilver results for unparsable names
922
					nameToBeFilled.setNameCache(fullNameString,true);
923
					// END
924
					logger.info("Name string " + fullNameString + " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
925
				}
926
			}
927
		     //hybrid formula
928
			 else if (hybridFormulaPattern.matcher(fullNameString).matches()){
929
				 Set<HybridRelationship> existingRelations = new HashSet<>();
930
				 Set<HybridRelationship> notToBeDeleted = new HashSet<>();
931

    
932
				 for ( HybridRelationship rel : nameToBeFilled.getHybridChildRelations()){
933
				     existingRelations.add(rel);
934
				 }
935

    
936
			     String firstNameString = "";
937
				 String secondNameString = "";
938
				 boolean isFirstName = true;
939
				 for (String str : epi){
940
					 if (str.matches(hybridSign)){
941
						 isFirstName = false;
942
					 }else if(isFirstName){
943
						 firstNameString += " " + str;
944
					 }else {
945
						 secondNameString += " " + str;
946
					 }
947
				 }
948
				 firstNameString = firstNameString.trim();
949
				 secondNameString = secondNameString.trim();
950
				 nameToBeFilled.setHybridFormula(true);
951
				 NomenclaturalCode code = nameToBeFilled.getNomenclaturalCode();
952
				 INonViralName firstName = this.parseFullName(firstNameString, code, rank);
953
				 if (secondNameString.matches(abbrevHybridSecondPart)){
954
				     secondNameString = extendSecondHybridPart(firstName, secondNameString);
955
				 }
956
				 INonViralName secondName = this.parseFullName(secondNameString, code, rank);
957
				 HybridRelationship firstRel = nameToBeFilled.addHybridParent(firstName, HybridRelationshipType.FIRST_PARENT(), null);
958
				 HybridRelationship second = nameToBeFilled.addHybridParent(secondName, HybridRelationshipType.SECOND_PARENT(), null);
959
				 checkRelationExist(firstRel, existingRelations, notToBeDeleted);
960
				 checkRelationExist(second, existingRelations, notToBeDeleted);
961

    
962
				 Rank newRank;
963
				 Rank firstRank = firstName.getRank();
964
				 Rank secondRank = secondName.getRank();
965

    
966
				 if (firstRank == null || (secondRank != null && firstRank.isHigher(secondRank))){
967
					 newRank = secondRank;
968
				 }else{
969
					 newRank = firstRank;
970
				 }
971
				 nameToBeFilled.setRank(newRank);
972
				 //remove not existing hybrid relation
973
				 if (makeEmpty){
974
		            Set<HybridRelationship> tmpChildRels = new HashSet<HybridRelationship>();
975
		            tmpChildRels.addAll(nameToBeFilled.getHybridChildRelations());
976
		            for (HybridRelationship rel : tmpChildRels){
977
		                if (! notToBeDeleted.contains(rel)){
978
		                    nameToBeFilled.removeHybridRelationship(rel);
979
		                }
980
		            }
981
				 }
982
			 }
983
		    //none
984
			else{
985
				nameToBeFilled.addParsingProblem(ParserProblem.UnparsableNamePart);
986
				nameToBeFilled.setTitleCache(fullNameString, true);
987
				// FIXME Quick fix, otherwise search would not deilver results for unparsable names
988
				nameToBeFilled.setNameCache(fullNameString, true);
989
				// END
990
				logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
991
		    }
992
		    //hybrid bits
993
		    handleHybridBits(nameToBeFilled);
994
		    if (!nameToBeFilled.isHybridFormula()){
995
		        Set<HybridRelationship> hybridChildRelations = new HashSet<>();
996
		        hybridChildRelations.addAll(nameToBeFilled.getHybridChildRelations());
997

    
998
		        for (HybridRelationship hybridRelationship: hybridChildRelations){
999
		        	nameToBeFilled.removeHybridRelationship(hybridRelationship);
1000
		        }
1001
		    }
1002

    
1003
			//authors
1004
		    if (StringUtils.isNotBlank(authorString) ){
1005
				handleAuthors(nameToBeFilled, fullNameString, authorString);
1006
			}
1007
		    return;
1008
		} catch (UnknownCdmTypeException e) {
1009
			nameToBeFilled.addParsingProblem(ParserProblem.RankNotSupported);
1010
			nameToBeFilled.setTitleCache(fullNameString, true);
1011
			// FIXME Quick fix, otherwise search would not deilver results for unparsable names
1012
			nameToBeFilled.setNameCache(fullNameString,true);
1013
			// END
1014
			logger.info("unknown rank (" + (rank == null? "null":rank) + ") or abbreviation in string " +  fullNameString);
1015
			//return result;
1016
			return;
1017
		}
1018
	}
1019

    
1020
	/**
1021
     * @param string
1022
     * @return
1023
     */
1024
    private String normalizeSpNov(String epi) {
1025
        if (spNovPattern.matcher(epi).matches()){
1026
            epi = epi.replace(".", ". ").replace("\\s+", " ").trim();
1027
        }
1028
        return epi;
1029
    }
1030

    
1031
    /**
1032
     * @param firstName
1033
     * @param secondNameString
1034
     * @return
1035
     */
1036
    private String extendSecondHybridPart(INonViralName firstName, String secondNameString) {
1037
        //first letter of genus given
1038
        if (secondNameString.matches("^" + abbrevHybridGenus + ".*")){
1039
            if (StringUtils.isNotBlank(firstName.getGenusOrUninomial())){
1040
                if (secondNameString.substring(0,1).equals(firstName.getGenusOrUninomial().substring(0, 1))){
1041
                    secondNameString = secondNameString.replaceAll("^" + abbrevHybridGenus, firstName.getGenusOrUninomial() + " ");
1042
                }
1043
            }
1044
        }else if (secondNameString.matches(abbrevHybridSecondPartOnlyInfraSpecies)){
1045
            secondNameString = CdmUtils.concat(" " , firstName.getGenusOrUninomial(), firstName.getSpecificEpithet(), secondNameString);
1046
        }else if (true){  //there will be further alternatives in future maybe
1047
            secondNameString = CdmUtils.concat(" " , firstName.getGenusOrUninomial(), secondNameString);
1048
        }
1049
        return secondNameString;
1050
    }
1051

    
1052
    /**
1053
     * Checks if a hybrid relation exists in the Set of existing relations
1054
     * and <BR>
1055
     *  if it does not adds it to relations not to be deleted <BR>
1056
     *  if it does adds the existing relations to the relations not to be deleted
1057
     *
1058
     * @param firstRel
1059
     * @param existingRelations
1060
     * @param notToBeDeleted
1061
     */
1062
    private void checkRelationExist(
1063
            HybridRelationship newRelation,
1064
            Set<HybridRelationship> existingRelations,
1065
            Set<HybridRelationship> notToBeDeleted) {
1066
        HybridRelationship relToKeep = newRelation;
1067
        for (HybridRelationship existingRelation : existingRelations){
1068
            if (existingRelation.equals(newRelation)){
1069
                relToKeep = existingRelation;
1070
                break;
1071
            }
1072
        }
1073
        notToBeDeleted.add(relToKeep);
1074
    }
1075

    
1076
    private void handleHybridBits(INonViralName nameToBeFilled) {
1077
		//uninomial
1078
		String uninomial = CdmUtils.Nz(nameToBeFilled.getGenusOrUninomial());
1079
		boolean isUninomialHybrid = uninomial.startsWith(hybridSign);
1080
		if (isUninomialHybrid){
1081
			nameToBeFilled.setMonomHybrid(true);
1082
			nameToBeFilled.setGenusOrUninomial(uninomial.replace(hybridSign, ""));
1083
		}
1084
		//infrageneric
1085
		String infrageneric = CdmUtils.Nz(nameToBeFilled.getInfraGenericEpithet());
1086
		boolean isInfraGenericHybrid = infrageneric.startsWith(hybridSign);
1087
		if (isInfraGenericHybrid){
1088
			nameToBeFilled.setBinomHybrid(true);
1089
			nameToBeFilled.setInfraGenericEpithet(infrageneric.replace(hybridSign, ""));
1090
		}
1091
		//species Epi
1092
		String speciesEpi = CdmUtils.Nz(nameToBeFilled.getSpecificEpithet());
1093
		boolean isSpeciesHybrid = speciesEpi.startsWith(hybridSign);
1094
		if (isSpeciesHybrid){
1095
			if (StringUtils.isBlank(infrageneric)){
1096
				nameToBeFilled.setBinomHybrid(true);
1097
			}else{
1098
				nameToBeFilled.setTrinomHybrid(true);
1099
			}
1100
			nameToBeFilled.setSpecificEpithet(speciesEpi.replace(hybridSign, ""));
1101
		}
1102
		//infra species
1103
		String infraSpeciesEpi = CdmUtils.Nz(nameToBeFilled.getInfraSpecificEpithet());
1104
		boolean isInfraSpeciesHybrid = infraSpeciesEpi.startsWith(hybridSign);
1105
		if (isInfraSpeciesHybrid){
1106
			nameToBeFilled.setTrinomHybrid(true);
1107
			nameToBeFilled.setInfraSpecificEpithet(infraSpeciesEpi.replace(hybridSign, ""));
1108
		}
1109

    
1110
	}
1111

    
1112
	private String removeHybridBlanks(String fullNameString) {
1113
		String result = fullNameString
1114
		        .replaceAll(oWs + "[xX]" + oWs + "(?=[A-Z])", " " + hybridSign + " ")
1115
		        .replaceAll(hybridFull, " " + hybridSign).trim();
1116
		if (result.contains(hybridSign + " ") &&
1117
		        result.matches("^" + capitalEpiWord + oWs + hybridSign + oWs + nonCapitalEpiWord + ".*")){
1118
		    result = result.replaceFirst(hybridSign + oWs, hybridSign);
1119
		}
1120
		return result;
1121
	}
1122

    
1123

    
1124
    private String removeSpNovBlanks(String fullNameString) {
1125
        Matcher spNovMatcher = spNovPattern.matcher(fullNameString);
1126
        if (spNovMatcher.find()){
1127
            String spNov = spNovMatcher.group(0);
1128
            String spNovShort = spNov.replaceAll("\\s", "");
1129
            if (spNov.length() != spNovShort.length()){
1130
                fullNameString = fullNameString.replace(spNov, spNovShort);
1131
            }
1132
        }
1133
        return fullNameString;
1134
    }
1135

    
1136

    
1137
	/**
1138
	 * Author parser for external use
1139
	 * @param nonViralName
1140
	 * @param authorString
1141
	 * @throws StringNotParsableException
1142
	 */
1143
	@Override
1144
	public void parseAuthors(INonViralName nonViralNameOrig, String authorString) throws StringNotParsableException{
1145
	    INonViralName nonViralName = CdmBase.deproxy(nonViralNameOrig);
1146
	    TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
1147
		Integer[] years = new Integer[4];
1148
		NomenclaturalCode code = nonViralName.getNameType();
1149
		fullAuthors(authorString, authors, years, code);
1150
		nonViralName.setCombinationAuthorship(authors[0]);
1151
		nonViralName.setExCombinationAuthorship(authors[1]);
1152
		nonViralName.setBasionymAuthorship(authors[2]);
1153
		nonViralName.setExBasionymAuthorship(authors[3]);
1154
		if (nonViralName.isZoological()){
1155
			IZoologicalName zooName = (IZoologicalName)nonViralName;
1156
			zooName.setPublicationYear(years[0]);
1157
			zooName.setOriginalPublicationYear(years[2]);
1158
		}
1159
	}
1160

    
1161
	/**
1162
	 * @param nameToBeFilled
1163
	 * @param fullNameString
1164
	 * @param authorString
1165
	 */
1166
	public void handleAuthors(INonViralName nameToBeFilled, String fullNameString, String authorString) {
1167
	    TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
1168
		Integer[] years = new Integer[4];
1169
		try {
1170
			NomenclaturalCode code = nameToBeFilled.getNameType();
1171
			fullAuthors(authorString, authors, years, code);
1172
		} catch (StringNotParsableException e) {
1173
			nameToBeFilled.addParsingProblem(ParserProblem.UnparsableAuthorPart);
1174
			nameToBeFilled.setTitleCache(fullNameString, true);
1175
			// FIXME Quick fix, otherwise search would not deliver results for unparsable names
1176
			nameToBeFilled.setNameCache(fullNameString, true);
1177
			// END
1178
			logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
1179
		}
1180
		nameToBeFilled.setCombinationAuthorship(authors[0]);
1181
		nameToBeFilled.setExCombinationAuthorship(authors[1]);
1182
		nameToBeFilled.setBasionymAuthorship(authors[2]);
1183
		nameToBeFilled.setExBasionymAuthorship(authors[3]);
1184
		if (nameToBeFilled.isZoological()){
1185
			IZoologicalName zooName = (IZoologicalName)nameToBeFilled;
1186
			zooName.setPublicationYear(years[0]);
1187
			zooName.setOriginalPublicationYear(years[2]);
1188
		}
1189
	}
1190

    
1191
	/**
1192
	 * Guesses the rank of uninomial depending on the typical endings for ranks
1193
	 * @param nameToBeFilled
1194
	 * @param string
1195
	 */
1196
	private Rank guessUninomialRank(INonViralName nameToBeFilled, String uninomial) {
1197
		Rank result = Rank.GENUS();
1198
		if (nameToBeFilled.isBotanical()){
1199
			if (false){
1200
				//
1201
			}else if (uninomial.endsWith("phyta") || uninomial.endsWith("mycota") ){  //plants, fungi
1202
				result = Rank.SECTION_BOTANY();
1203
			}else if (uninomial.endsWith("bionta")){
1204
				result = Rank.SUBKINGDOM();  //TODO
1205
			}else if (uninomial.endsWith("phytina")|| uninomial.endsWith("mycotina")  ){  //plants, fungi
1206
				result = Rank.SUBSECTION_BOTANY();
1207
			}else if (uninomial.endsWith("opsida") || uninomial.endsWith("phyceae") || uninomial.endsWith("mycetes")){  //plants, algae, fungi
1208
				result = Rank.CLASS();
1209
			}else if (uninomial.endsWith("idae") || uninomial.endsWith("phycidae") || uninomial.endsWith("mycetidae")){ //plants, algae, fungi
1210
				result = Rank.SUBCLASS();
1211
			}else if (uninomial.endsWith("ales")){
1212
				result = Rank.ORDER();
1213
			}else if (uninomial.endsWith("ineae")){
1214
				result = Rank.SUBORDER();
1215
			}else if (uninomial.endsWith("aceae")){
1216
					result = Rank.FAMILY();
1217
			}else if (uninomial.endsWith("oideae")){
1218
				result = Rank.SUBFAMILY();
1219
			}else if (uninomial.endsWith("eae")){
1220
				result = Rank.TRIBE();
1221
			}else if (uninomial.endsWith("inae")){
1222
				result = Rank.SUBTRIBE();
1223
			}else if (uninomial.endsWith("ota")){
1224
				result = Rank.KINGDOM();  //TODO
1225
			}
1226
		}else if (nameToBeFilled.isZoological()){
1227
			if (false){
1228
				//
1229
			}else if (uninomial.endsWith("oideae")){
1230
				result = Rank.SUPERFAMILY();
1231
			}else if (uninomial.endsWith("idae")){
1232
					result = Rank.FAMILY();
1233
			}else if (uninomial.endsWith("inae")){
1234
				result = Rank.SUBFAMILY();
1235
			}else if (uninomial.endsWith("inae")){
1236
				result = Rank.SUBFAMILY();
1237
			}else if (uninomial.endsWith("ini")){
1238
				result = Rank.TRIBE();
1239
			}else if (uninomial.endsWith("ina")){
1240
				result = Rank.SUBTRIBE();
1241
			}
1242
		}else{
1243
			//
1244
		}
1245
		return result;
1246
	}
1247

    
1248
	/**
1249
	 * Parses the fullAuthorString
1250
	 * @param fullAuthorString
1251
	 * @return array of Teams containing the Team[0],
1252
	 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
1253
	 */
1254
	protected void fullAuthors (String fullAuthorStringOrig, TeamOrPersonBase<?>[] authors,
1255
	        Integer[] years, NomenclaturalCode code)
1256
			throws StringNotParsableException{
1257
		if (fullAuthorStringOrig == null || code == null){
1258
			return;
1259
		}
1260
		String fullAuthorString = fullAuthorStringOrig.trim();
1261

    
1262
		//Botanic
1263
		if ( code.isBotanical() ){
1264
			if (! fullBotanicAuthorStringPattern.matcher(fullAuthorString).matches() ){
1265
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1266
			}
1267
		}
1268
		//Zoo
1269
		else if ( code.isZoological() ){
1270
			if (! fullZooAuthorStringPattern.matcher(fullAuthorString).matches() ){
1271
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1272
			}
1273
		}else {
1274
			//TODO
1275
			logger.warn ("Full author String parsable only for defined BotanicalNames or ZoologicalNames but this is " + code.getMessage());
1276
			throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1277
		}
1278
		fullAuthorsChecked(fullAuthorString, authors, years);
1279
	}
1280

    
1281
	/*
1282
	 * like fullTeams but without trim and match check
1283
	 */
1284
	protected void fullAuthorsChecked (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years){
1285
		int authorShipStart = 0;
1286
		Matcher basionymMatcher = basionymPattern.matcher(fullAuthorString);
1287

    
1288
		if (basionymMatcher.find(0)){
1289

    
1290
			String basString = basionymMatcher.group();
1291
			basString = basString.replaceFirst(basStart, "");
1292
			basString = basString.replaceAll(basEnd, "").trim();
1293
			authorShipStart = basionymMatcher.end(1);
1294

    
1295
			TeamOrPersonBase<?>[] basAuthors = new TeamOrPersonBase[2];
1296
			Integer[] basYears = new Integer[2];
1297
			authorsAndEx(basString, basAuthors, basYears);
1298
			authors[2]= basAuthors[0];
1299
			years[2] = basYears[0];
1300
			authors[3]= basAuthors[1];
1301
			years[3] = basYears[1];
1302
		}
1303
		if (fullAuthorString.length() >= authorShipStart){
1304
			TeamOrPersonBase<?>[] combinationAuthors = new TeamOrPersonBase[2];
1305
			Integer[] combinationYears = new Integer[2];
1306
			authorsAndEx(fullAuthorString.substring(authorShipStart), combinationAuthors, combinationYears);
1307
			authors[0]= combinationAuthors[0] ;
1308
			years[0] = combinationYears[0];
1309
			authors[1]= combinationAuthors[1];
1310
			years[1] = combinationYears[1];
1311
		}
1312
	}
1313

    
1314

    
1315
	/**
1316
	 * Parses the author and ex-author String
1317
	 * @param authorShipStringOrig String representing the author and the ex-author team
1318
	 * @return array of Teams containing the Team[0] and the ExTeam[1]
1319
	 */
1320
	protected void authorsAndEx (String authorShipStringOrig, TeamOrPersonBase<?>[] authors, Integer[] years){
1321
		//TODO noch allgemeiner am Anfang durch Replace etc.
1322
		String authorShipString = authorShipStringOrig.trim();
1323
		authorShipString = authorShipString.replaceFirst(oWs + "ex" + oWs, " ex. " );
1324

    
1325
		//int authorEnd = authorTeamString.length();
1326
		int authorBegin = 0;
1327

    
1328
		Matcher exAuthorMatcher = exAuthorPattern.matcher(authorShipString);
1329
		if (exAuthorMatcher.find(0)){
1330
			authorBegin = exAuthorMatcher.end(0);
1331
			int exAuthorEnd = exAuthorMatcher.start(0);
1332
			String exString = authorShipString.substring(0, exAuthorEnd).trim();
1333
			authors [1] = author(exString);
1334
		}
1335
		zooOrBotanicAuthor(authorShipString.substring(authorBegin), authors, years );
1336
	}
1337

    
1338
	/**
1339
	 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
1340
	 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies
1341
	 * to the year in case of an zoological name.
1342
	 * @param authorString
1343
	 * @param team
1344
	 * @param year
1345
	 */
1346
	protected void zooOrBotanicAuthor(String authorString, TeamOrPersonBase<?>[] team, Integer[] year){
1347
		if (authorString == null){
1348
			return;
1349
		}else if ((authorString = authorString.trim()).length() == 0){
1350
			return;
1351
		}
1352
		Matcher zooAuthorAddidtionMatcher = zooAuthorAddidtionPattern.matcher(authorString);
1353
		if (zooAuthorAddidtionMatcher.find()){
1354
			int index = zooAuthorAddidtionMatcher.start(0);
1355
			String strYear = authorString.substring(index);
1356
			strYear = strYear.replaceAll(zooAuthorYearSeperator, "").trim();
1357
			year[0] = Integer.valueOf(strYear);
1358
			authorString = authorString.substring(0, index).trim();
1359
		}
1360
		team[0] = author(authorString);
1361
	}
1362

    
1363

    
1364
	/**
1365
	 * Parses an author (person or team) string and returns the Person or Team.
1366
	 * @param authorString String representing the author
1367
	 * @return a person or team
1368
	 */
1369
	public TeamOrPersonBase<?> author (String authorString){
1370
		if (authorString == null){
1371
			return null;
1372
		}else if ((authorString = authorString.trim()).length() == 0){
1373
			return null;
1374
		}else if (! finalTeamSplitterPattern.matcher(authorString).find() && ! authorIsAlwaysTeam){
1375
			//1 Person
1376
			Person result = Person.NewInstance();
1377
			authorString = normalizeNomenclaturalPersonString(authorString);
1378
			result.setNomenclaturalTitle(authorString);
1379
			return result;
1380
		}else{
1381
			return parsedTeam(authorString);
1382
		}
1383

    
1384
	}
1385

    
1386
	/**
1387
	 * Parses an authorString (representing a team into the single authors and add
1388
	 * them to the return Team.
1389
	 * @param authorString
1390
	 * @return Team
1391
	 */
1392
	protected Team parsedTeam(String authorString){
1393
		Team result = Team.NewInstance();
1394
		String[] authors = authorString.split(notFinalTeamSplitter);
1395
		for (int i = 0; i < authors.length; i++){
1396
		    String author = authors[i];
1397
		    if ("al.".equals(author.trim()) && i == authors.length - 1){  //final al. is handled as hasMoreMembers
1398
			    result.setHasMoreMembers(true);
1399
			}else{
1400
			    Person person = Person.NewInstance();
1401
			    author = normalizeNomenclaturalPersonString(author);
1402
	            person.setNomenclaturalTitle(author);
1403
			    result.addTeamMember(person);
1404
			}
1405
		}
1406
		return result;
1407
	}
1408

    
1409

    
1410
/**
1411
     * @param author
1412
     * @return
1413
     */
1414
    private String normalizeNomenclaturalPersonString(String author) {
1415
        if (removeSpaceAfterDot){
1416
            author = author.replaceAll("\\.\\s", ".");
1417
        }
1418
        return author;
1419
    }
1420

    
1421
    //	// Parsing of the given full name that has been identified as a cultivar already somwhere else.
1422
//	// The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
1423
	public IBotanicalName parseCultivar(String fullName) throws StringNotParsableException{
1424
		ICultivarPlantName result = null;
1425
		    String[] words = oWsPattern.split(fullName);
1426

    
1427
		    /* ---------------------------------------------------------------------------------
1428
		     * cultivar
1429
		     * ---------------------------------------------------------------------------------*/
1430
			if (fullName.indexOf(" '") != 0){
1431
				//TODO location of 'xx' is probably not arbitrary
1432
				Matcher cultivarMatcher = cultivarPattern.matcher(fullName);
1433
				if (cultivarMatcher.find()){
1434
					String namePart = fullName.replaceFirst(cultivar, "");
1435

    
1436
					String cultivarPart = cultivarMatcher.group(0).replace("'","").trim();
1437
					//OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
1438

    
1439
					result = (ICultivarPlantName)parseFullName(namePart);
1440
					result.setCultivarName(cultivarPart);
1441
				}
1442
			}else if (fullName.indexOf(" cv.") != 0){
1443
				// cv. is old form (not official)
1444
				throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
1445
			}
1446

    
1447
		    /* ---------------------------------------------------------------------------------
1448
		     * cultivar group
1449
		     * ---------------------------------------------------------------------------------
1450
		     */
1451
			// TODO in work
1452
			//Ann. this is not the official way of noting cultivar groups
1453
		    String group = oWs + "Group" + oWs + capitalEpiWord + end;
1454
			Pattern groupRE = Pattern.compile(group);
1455
			Matcher groupMatcher = groupRE.matcher(fullName);
1456
			if (groupMatcher.find()){
1457
		    	if (! words[words.length - 2].equals("group")){
1458
		            throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName + " should be 'group'");
1459
		        }else{
1460

    
1461
		        	String namePart = fullName.substring(0, groupMatcher.start(0) - 0);
1462
		        	//OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
1463

    
1464
		        	String cultivarPart = words[words.length -1];
1465
		        	result = (ICultivarPlantName)parseFullName(namePart);
1466
		        	if (result != null){
1467
		        		result.setCultivarName(cultivarPart);
1468

    
1469
		        		//OLD: result.setCultivarGroupName(cultivarPart);
1470
		        	}
1471
		        }
1472

    
1473
		    }
1474
//		    // ---------------------------------------------------------------------------------
1475
//		    if ( result = "" ){
1476
//		        return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
1477
//		    }else{
1478
//		        return result;
1479
	//	    }
1480
			return result; //TODO
1481
	}
1482

    
1483

    
1484
	private void makeEmpty(INonViralName name){
1485
	    TaxonName nameToBeFilled = TaxonName.castAndDeproxy(name);
1486
		nameToBeFilled.setRank(null);
1487
		nameToBeFilled.setTitleCache(null, false);
1488
		nameToBeFilled.setFullTitleCache(null, false);
1489
		nameToBeFilled.setNameCache(null, false);
1490

    
1491
		nameToBeFilled.setAppendedPhrase(null);
1492
		nameToBeFilled.setBasionymAuthorship(null);
1493
		nameToBeFilled.setCombinationAuthorship(null);
1494
		nameToBeFilled.setExBasionymAuthorship(null);
1495
		nameToBeFilled.setExCombinationAuthorship(null);
1496
		nameToBeFilled.setAuthorshipCache(null, false);
1497

    
1498

    
1499
		//delete problems except check rank
1500
		makeProblemEmpty(nameToBeFilled);
1501

    
1502
		// TODO ?
1503
		//nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
1504

    
1505

    
1506
		nameToBeFilled.setGenusOrUninomial(null);
1507
		nameToBeFilled.setInfraGenericEpithet(null);
1508
		nameToBeFilled.setSpecificEpithet(null);
1509
		nameToBeFilled.setInfraSpecificEpithet(null);
1510

    
1511
		nameToBeFilled.setNomenclaturalMicroReference(null);
1512
		nameToBeFilled.setNomenclaturalReference(null);
1513

    
1514
		nameToBeFilled.setHybridFormula(false);
1515
		nameToBeFilled.setMonomHybrid(false);
1516
		nameToBeFilled.setBinomHybrid(false);
1517
		nameToBeFilled.setTrinomHybrid(false);
1518

    
1519
		nameToBeFilled.setAnamorphic(false);
1520

    
1521
		nameToBeFilled.setBreed(null);
1522
		nameToBeFilled.setOriginalPublicationYear(null);
1523

    
1524
		//nom status handled in nom status parser, otherwise we loose additional information like reference etc.
1525
		//hybrid relationships handled in hybrid formula and at end of fullNameParser
1526
	}
1527

    
1528

    
1529
    /**
1530
     * If <code>true</code> author names are parsed such that spaces after the abbreviated
1531
     * firstname are removed (IPNI style). see #7094
1532
     */
1533
    public boolean isRemoveSpaceAfterDot() {
1534
        return removeSpaceAfterDot;
1535
    }
1536
    /**
1537
     * @see #isRemoveSpaceAfterDot()
1538
     */
1539
    public void setRemoveSpaceAfterDot(boolean removeSpaceAfterDot) {
1540
        this.removeSpaceAfterDot = removeSpaceAfterDot;
1541
    }
1542
}
(4-4/8)