Project

General

Profile

Download (55.3 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.strategy.parser;
11

    
12
import java.util.HashSet;
13
import java.util.Set;
14
import java.util.regex.Matcher;
15
import java.util.regex.Pattern;
16

    
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.log4j.Logger;
19
import org.joda.time.DateTimeFieldType;
20
import org.joda.time.Partial;
21

    
22
import eu.etaxonomy.cdm.common.CdmUtils;
23
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
24
import eu.etaxonomy.cdm.model.agent.Person;
25
import eu.etaxonomy.cdm.model.agent.Team;
26
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.IParsable;
29
import eu.etaxonomy.cdm.model.common.TimePeriod;
30
import eu.etaxonomy.cdm.model.name.BotanicalName;
31
import eu.etaxonomy.cdm.model.name.CultivarPlantName;
32
import eu.etaxonomy.cdm.model.name.HybridRelationship;
33
import eu.etaxonomy.cdm.model.name.HybridRelationshipType;
34
import eu.etaxonomy.cdm.model.name.INonViralName;
35
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
36
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
37
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
38
import eu.etaxonomy.cdm.model.name.NonViralName;
39
import eu.etaxonomy.cdm.model.name.Rank;
40
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
41
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
42
import eu.etaxonomy.cdm.model.name.ZoologicalName;
43
import eu.etaxonomy.cdm.model.reference.IBook;
44
import eu.etaxonomy.cdm.model.reference.IBookSection;
45
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
46
import eu.etaxonomy.cdm.model.reference.IVolumeReference;
47
import eu.etaxonomy.cdm.model.reference.Reference;
48
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
49
import eu.etaxonomy.cdm.model.reference.ReferenceType;
50
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
51
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
52

    
53

    
54
/**
55
 * @author a.mueller
56
 *
57
 */
58
public class NonViralNameParserImpl extends NonViralNameParserImplRegExBase implements INonViralNameParser<INonViralName> {
59
	private static final Logger logger = Logger.getLogger(NonViralNameParserImpl.class);
60

    
61
	// good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
62

    
63
	final static boolean MAKE_EMPTY = true;
64
	final static boolean MAKE_NOT_EMPTY = false;
65

    
66
	private final boolean authorIsAlwaysTeam = false;
67

    
68
	public static NonViralNameParserImpl NewInstance(){
69
		return new NonViralNameParserImpl();
70
	}
71

    
72
	@Override
73
    public NonViralName parseSimpleName(String simpleName){
74
		return parseSimpleName(simpleName, null, null);
75
	}
76

    
77
	@Override
78
    public NonViralName parseSimpleName(String simpleName, NomenclaturalCode code, Rank rank){
79
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
80
		return parseFullName(simpleName, code, rank);
81
	}
82

    
83
	public void parseSimpleName(NonViralName nameToBeFilled, String simpleNameString, Rank rank, boolean makeEmpty){
84
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
85
		parseFullName(nameToBeFilled, simpleNameString, rank, makeEmpty);
86
	}
87

    
88
	public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code){
89
		return getNonViralNameInstance(fullString, code, null);
90
	}
91

    
92
	public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code, Rank rank){
93
		NonViralName<?> result = null;
94
		if(code ==null) {
95
			boolean isBotanicalName = anyBotanicFullNamePattern.matcher(fullString).find();
96
			boolean isZoologicalName = anyZooFullNamePattern.matcher(fullString).find();;
97
			boolean isBacteriologicalName = false;
98
			boolean isCultivatedPlantName = false;
99
			if ( (isBotanicalName || isCultivatedPlantName) && ! isZoologicalName && !isBacteriologicalName){
100
				if (isBotanicalName){
101
					result = TaxonNameFactory.NewBotanicalInstance(rank);
102
				}else{
103
					result = TaxonNameFactory.NewCultivarInstance(rank);
104
				}
105
			}else if ( isZoologicalName /*&& ! isBotanicalName*/ && !isBacteriologicalName && !isCultivatedPlantName){
106
				result = TaxonNameFactory.NewZoologicalInstance(rank);
107
			}else if ( isZoologicalName && ! isBotanicalName && !isBacteriologicalName && !isCultivatedPlantName){
108
				result = TaxonNameFactory.NewBacterialInstance(rank);
109
			}else {
110
				result =  TaxonNameFactory.NewNonViralInstance(rank);
111
			}
112
		} else {
113
			switch (code) {
114
			case ICNAFP:
115
				result = TaxonNameFactory.NewBotanicalInstance(rank);
116
				break;
117
			case ICZN:
118
				result = TaxonNameFactory.NewZoologicalInstance(rank);
119
				break;
120
			case ICNCP:
121
				logger.warn("ICNCP parsing not yet implemented");
122
				result = TaxonNameFactory.NewCultivarInstance(rank);
123
				break;
124
			case ICNB:
125
				logger.warn("ICNB not yet implemented");
126
				result = TaxonNameFactory.NewBacterialInstance(rank);
127
				break;
128
			case ICVCN:
129
				logger.error("Viral name is not a NonViralName !!");
130
				break;
131
			default:
132
				// FIXME Unreachable code
133
				logger.error("Unknown Nomenclatural Code !!");
134
			}
135
		}
136
		return result;
137
	}
138

    
139
	@Override
140
    public NonViralName parseReferencedName(String fullReferenceString) {
141
		return parseReferencedName(fullReferenceString, null, null);
142
	}
143

    
144
	@Override
145
    public NonViralName parseReferencedName(String fullReferenceString, NomenclaturalCode nomCode, Rank rank) {
146
		if (fullReferenceString == null){
147
			return null;
148
		}else{
149
			NonViralName<?> result = getNonViralNameInstance(fullReferenceString, nomCode, rank);
150
			parseReferencedName(result, fullReferenceString, rank, MAKE_EMPTY);
151
			return result;
152
		}
153
	}
154

    
155
	private String standardize(INonViralName nameToBeFilled, String fullReferenceString, boolean makeEmpty){
156
		//Check null and standardize
157
		if (fullReferenceString == null){
158
			//return null;
159
			return null;
160
		}
161
		if (makeEmpty){
162
			makeEmpty(nameToBeFilled);
163
		}
164
		fullReferenceString = fullReferenceString.replaceAll(oWs , " ");
165
		fullReferenceString = fullReferenceString.trim();
166
		if ("".equals(fullReferenceString)){
167
			fullReferenceString = null;
168
		}
169
		return fullReferenceString;
170
	}
171

    
172
	/**
173
	 * Returns the regEx to be used for the full-name depending on the code
174
	 * @param nameToBeFilled
175
	 * @return
176
	 */
177
	private String getCodeSpecificFullNameRegEx(INonViralName nameToBeFilledOrig){
178
	    NonViralName<?> nameToBeFilled = HibernateProxyHelper.deproxy(nameToBeFilledOrig, NonViralName.class);
179
		if (nameToBeFilled instanceof ZoologicalName){
180
			return anyZooFullName;
181
		}else if (nameToBeFilled instanceof BotanicalName) {
182
			return anyBotanicFullName;
183
		}else if (nameToBeFilled instanceof NonViralName) {
184
			return anyBotanicFullName;  //TODO ?
185
		}else{
186
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
187
			return null;
188
		}
189
	}
190

    
191
	/**
192
	 * Returns the regEx to be used for the fsimple-name depending on the code
193
	 * @param nameToBeFilled
194
	 * @return
195
	 */
196
	private String getCodeSpecificSimpleNameRegEx(INonViralName nameToBeFilled){
197
		nameToBeFilled = HibernateProxyHelper.deproxy(nameToBeFilled, NonViralName.class);
198

    
199
		if (nameToBeFilled instanceof ZoologicalName){
200
			return anyZooName;
201
		}else if (nameToBeFilled instanceof NonViralName){
202
			return anyZooName;  //TODO ?
203
		}else if (nameToBeFilled instanceof BotanicalName) {
204
			return anyBotanicName;
205
		}else{
206
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
207
			return null;
208
		}
209
	}
210

    
211
	private Matcher getMatcher(String regEx, String matchString){
212
		Pattern pattern = Pattern.compile(regEx);
213
		Matcher matcher = pattern.matcher(matchString);
214
		return matcher;
215
	}
216

    
217
	@Override
218
    public void parseReferencedName(INonViralName nameToBeFilled, String fullReferenceStringOrig, Rank rank, boolean makeEmpty) {
219
		//standardize
220
		String fullReferenceString = standardize(nameToBeFilled, fullReferenceStringOrig, makeEmpty);
221
		if (fullReferenceString == null){
222
			return;
223
		}
224
		// happens already in standardize(...)
225
//		makeProblemEmpty(nameToBeFilled);
226

    
227
		//make nomenclatural status and replace it by empty string
228
	    fullReferenceString = parseNomStatus(fullReferenceString, nameToBeFilled, makeEmpty);
229
	    nameToBeFilled.setProblemEnds(fullReferenceString.length());
230

    
231
	    //get full name reg
232
		String localFullNameRegEx = getCodeSpecificFullNameRegEx(nameToBeFilled);
233
		//get full name reg
234
		String localSimpleNameRegEx = getCodeSpecificSimpleNameRegEx(nameToBeFilled);
235

    
236
		//separate name and reference part
237
		String nameAndRefSeparatorRegEx = "(^" + localFullNameRegEx + ")("+ referenceSeperator + ")";
238
		Matcher nameAndRefSeparatorMatcher = getMatcher (nameAndRefSeparatorRegEx, fullReferenceString);
239

    
240
		Matcher onlyNameMatcher = getMatcher (localFullNameRegEx, fullReferenceString);
241
		Matcher hybridMatcher = hybridFormulaPattern.matcher(fullReferenceString);
242
		Matcher onlySimpleNameMatcher = getMatcher (localSimpleNameRegEx, fullReferenceString);
243

    
244
		if (onlyNameMatcher.matches()){
245
			makeEmpty = false;
246
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
247
		} else if (nameAndRefSeparatorMatcher.find()){
248
			makeNameWithReference(nameToBeFilled, fullReferenceString, nameAndRefSeparatorMatcher, rank, makeEmpty);
249
		}else if (hybridMatcher.matches() ){
250
		    //I do not remember why we need makeEmpty = false for onlyNameMatcher,
251
		    //but for hybridMatcher we need to remove old Hybrid Relationships if necessary, therefore
252
		    //I removed it from here
253
            parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
254
        }else if (onlySimpleNameMatcher.matches()){
255
			makeEmpty = false;
256
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);	//simpleName not yet implemented
257
		}else{
258
			makeNoFullRefMatch(nameToBeFilled, fullReferenceString, rank);
259
		}
260
		//problem handling. Start and end solved in subroutines
261
		if (! nameToBeFilled.hasProblem()){
262
			makeProblemEmpty(nameToBeFilled);
263
		}
264
	}
265

    
266
	private void makeProblemEmpty(IParsable parsable){
267
		boolean hasCheckRank = parsable.hasProblem(ParserProblem.CheckRank);
268
		parsable.setParsingProblem(0);
269
		if (hasCheckRank){
270
			parsable.addParsingProblem(ParserProblem.CheckRank);
271
		}
272
		parsable.setProblemStarts(-1);
273
		parsable.setProblemEnds(-1);
274
	}
275

    
276
	private void makeNoFullRefMatch(INonViralName nameToBeFilled, String fullReferenceString, Rank rank){
277
	    //try to parse first part as name, but keep in mind full string is not parsable
278
		int start = 0;
279

    
280
		String localFullName = getCodeSpecificFullNameRegEx(nameToBeFilled);
281
		Matcher fullNameMatcher = getMatcher (pStart + localFullName, fullReferenceString);
282
		if (fullNameMatcher.find()){
283
			String fullNameString = fullNameMatcher.group(0);
284
			nameToBeFilled.setProtectedNameCache(false);
285
			parseFullName(nameToBeFilled, fullNameString, rank, false);
286
			String sure = nameToBeFilled.getNameCache();
287
			start = sure.length();
288
		}
289

    
290
//		String localSimpleName = getLocalSimpleName(nameToBeFilled);
291
//		Matcher simpleNameMatcher = getMatcher (start + localSimpleName, fullReferenceString);
292
//		if (simpleNameMatcher.find()){
293
//			String simpleNameString = simpleNameMatcher.group(0);
294
//			parseFullName(nameToBeFilled, simpleNameString, rank, false);
295
//			start = simpleNameString.length();
296
//		}
297

    
298
		//don't parse if name can't be separated
299
		nameToBeFilled.addParsingProblem(ParserProblem.NameReferenceSeparation);
300
		nameToBeFilled.setTitleCache(fullReferenceString, true);
301
		nameToBeFilled.setFullTitleCache(fullReferenceString, true);
302
		// FIXME Quick fix, otherwise search would not deliver results for unparsable names
303
		nameToBeFilled.setNameCache(fullReferenceString, true);
304
		// END
305
		nameToBeFilled.setProblemStarts(start);
306
		nameToBeFilled.setProblemEnds(fullReferenceString.length());
307
		logger.info("no applicable parsing rule could be found for \"" + fullReferenceString + "\"");
308
	}
309

    
310
	private void makeNameWithReference(INonViralName nameToBeFilled,
311
			String fullReferenceString,
312
			Matcher nameAndRefSeparatorMatcher,
313
			Rank rank,
314
			boolean makeEmpty){
315

    
316
		String nameAndSeparator = nameAndRefSeparatorMatcher.group(0);
317
	    String name = nameAndRefSeparatorMatcher.group(1);
318
	    String referenceString = fullReferenceString.substring(nameAndRefSeparatorMatcher.end());
319

    
320
	    // is reference an in ref?
321
	    String separator = nameAndSeparator.substring(name.length());
322
		boolean isInReference = separator.matches(inReferenceSeparator);
323

    
324
	    //parse subparts
325

    
326
		int oldProblemEnds = nameToBeFilled.getProblemEnds();
327
		parseFullName(nameToBeFilled, name, rank, makeEmpty);
328
	    nameToBeFilled.setProblemEnds(oldProblemEnds);
329

    
330
		//zoological new combinations should not have a nom. reference to be parsed
331
	    if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
332
			ZoologicalName zooName = CdmBase.deproxy(nameToBeFilled, ZoologicalName.class);
333
			//is name new combination?
334
			if (zooName.getBasionymAuthorship() != null || zooName.getOriginalPublicationYear() != null){
335
				ParserProblem parserProblem = ParserProblem.NewCombinationHasPublication;
336
				zooName.addParsingProblem(parserProblem);
337
				nameToBeFilled.setProblemStarts((nameToBeFilled.getProblemStarts()> -1) ? nameToBeFilled.getProblemStarts(): name.length());
338
				nameToBeFilled.setProblemEnds(Math.max(fullReferenceString.length(), nameToBeFilled.getProblemEnds()));
339
			}
340
		}
341

    
342
	    parseReference(nameToBeFilled, referenceString, isInReference);
343
	    INomenclaturalReference ref = nameToBeFilled.getNomenclaturalReference();
344

    
345
	    //problem start
346
	    int start = nameToBeFilled.getProblemStarts();
347
	    int nameLength = name.length();
348
	    int nameAndSeparatorLength = nameAndSeparator.length();
349
	    int fullRefLength = nameToBeFilled.getFullTitleCache().length();
350

    
351
	    if (nameToBeFilled.isProtectedTitleCache() || nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)){
352
	    	start = Math.max(0, start);
353
		}else{
354
			if (ref != null && ref.getParsingProblem()!=0){
355
				start = Math.max(nameAndSeparatorLength, start);
356
		    	//TODO search within ref
357
			}
358
		}
359

    
360
	    //end
361
	    int end = nameToBeFilled.getProblemEnds();
362

    
363
	    if (ref != null && ref.getParsingProblem()!=0){
364
	    	end = Math.min(nameAndSeparatorLength + ref.getProblemEnds(), end);
365
	    }else{
366
	    	if (nameToBeFilled.isProtectedTitleCache() ){
367
	    		end = Math.min(end, nameAndSeparatorLength);
368
	    		//TODO search within name
369
			}
370
	    }
371
	    nameToBeFilled.setProblemStarts(start);
372
	    nameToBeFilled.setProblemEnds(end);
373

    
374
	    //delegate has problem to name
375
	    if (ref != null && ref.getParsingProblem()!=0){
376
	    	nameToBeFilled.addParsingProblems(ref.getParsingProblem());
377
	    }
378

    
379
	    Reference nomRef;
380
		if ( (nomRef = (Reference)nameToBeFilled.getNomenclaturalReference()) != null ){
381
			nomRef.setAuthorship(nameToBeFilled.getCombinationAuthorship());
382
		}
383
	}
384

    
385
	//TODO make it an Array of status
386
	/**
387
	 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
388
	 * The nomenclatural status part ist deleted from the reference String.
389
	 * @return  String the new (shortend) reference String
390
	 */
391
	public String parseNomStatus(String fullString, INonViralName nameToBeFilled, boolean makeEmpty) {
392
		Set<NomenclaturalStatusType> existingStatusTypeSet = new HashSet<NomenclaturalStatusType>();
393
		Set<NomenclaturalStatusType> newStatusTypeSet = new HashSet<NomenclaturalStatusType>();
394
		for (NomenclaturalStatus existingStatus : nameToBeFilled.getStatus()){
395
			existingStatusTypeSet.add(existingStatus.getType());
396
		}
397

    
398
		String statusString;
399
		Pattern hasStatusPattern = Pattern.compile("(" + pNomStatusPhrase + ")");
400
		Matcher hasStatusMatcher = hasStatusPattern.matcher(fullString);
401

    
402
		if (hasStatusMatcher.find()) {
403
			String statusPhrase = hasStatusMatcher.group(0);
404

    
405
			Pattern statusPattern = Pattern.compile(pNomStatus);
406
			Matcher statusMatcher = statusPattern.matcher(statusPhrase);
407
			statusMatcher.find();
408
			statusString = statusMatcher.group(0);
409
			try {
410
			    TaxonNameBase<?,?> nameToBeFilledCasted =  TaxonNameBase.castAndDeproxy(nameToBeFilled);
411
				NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(statusString, nameToBeFilledCasted);
412
				if (! existingStatusTypeSet.contains(nomStatusType)){
413
					NomenclaturalStatus nomStatus = NomenclaturalStatus.NewInstance(nomStatusType);
414
					nameToBeFilled.addStatus(nomStatus);
415
				}
416
				newStatusTypeSet.add(nomStatusType);
417
				fullString = fullString.replace(statusPhrase, "");
418
			} catch (UnknownCdmTypeException e) {
419
				//Do nothing
420
			}
421
		}
422
		//remove not existing nom status
423
		if (makeEmpty){
424
			Set<NomenclaturalStatus> tmpStatus = new HashSet<NomenclaturalStatus>();
425
			tmpStatus.addAll(nameToBeFilled.getStatus());
426
			for (NomenclaturalStatus status : tmpStatus){
427
				if (! newStatusTypeSet.contains(status.getType())){
428
					nameToBeFilled.removeStatus(status);
429
				}
430
			}
431
		}
432

    
433
		return fullString;
434
	}
435

    
436

    
437
	private void parseReference(INonViralName nameToBeFilled, String strReference, boolean isInReference){
438

    
439
		INomenclaturalReference ref;
440
		String originalStrReference = strReference;
441

    
442
		//End (just delete end (e.g. '.', may be ambigous for yearPhrase, but no real information gets lost
443
		Matcher endMatcher = getMatcher(referenceEnd + end, strReference);
444
		if (endMatcher.find()){
445
			String endPart = endMatcher.group(0);
446
			strReference = strReference.substring(0, strReference.length() - endPart.length());
447
		}
448

    
449
//		String pDetailYear = ".*" + detailSeparator + detail + fWs + yearSeperator + fWs + yearPhrase + fWs + end;
450
//		Matcher detailYearMatcher = getMatcher(pDetailYear, strReference);
451

    
452
		String strReferenceWithYear = strReference;
453
		//year
454
		String yearPart = null;
455
		String pYearPhrase = yearSeperator + fWs + yearPhrase + fWs + end;
456
		Matcher yearPhraseMatcher = getMatcher(pYearPhrase, strReference);
457
		if (yearPhraseMatcher.find()){
458
			yearPart = yearPhraseMatcher.group(0);
459
			strReference = strReference.substring(0, strReference.length() - yearPart.length());
460
			yearPart = yearPart.replaceFirst(pStart + yearSeperator, "").trim();
461
		}else{
462
			if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
463
				ZoologicalName zooName = CdmBase.deproxy(nameToBeFilled, ZoologicalName.class);
464
				yearPart = String.valueOf(zooName.getPublicationYear());
465
				//continue
466
			}else{
467
				ref = makeDetailYearUnparsable(nameToBeFilled,strReference);
468
				ref.setDatePublished(TimePeriodParser.parseString(yearPart));
469
				return;
470
			}
471
		}
472

    
473

    
474
		//detail
475
		String pDetailPhrase = detailSeparator + fWs + detail + fWs + end;
476
		Matcher detailPhraseMatcher = getMatcher(pDetailPhrase, strReference);
477
		if (detailPhraseMatcher.find()){
478
			String detailPart = detailPhraseMatcher.group(0);
479
			strReference = strReference.substring(0, strReference.length() - detailPart.length());
480
			detailPart = detailPart.replaceFirst(pStart + detailSeparator, "").trim();
481
			nameToBeFilled.setNomenclaturalMicroReference(detailPart);
482
		}else{
483
			makeDetailYearUnparsable(nameToBeFilled, strReferenceWithYear);
484
			return;
485
		}
486
		//parse title and author
487
		ref = parseReferenceTitle(strReference, yearPart, isInReference);
488
		if (ref.hasProblem()){
489
		    //we need to protect both caches otherwise the titleCache is incorrectly build from atomized parts
490
			ref.setTitleCache( (isInReference ? "in ":"") +  originalStrReference, true);
491
			ref.setAbbrevTitleCache( (isInReference ? "in ":"") +  originalStrReference, true);
492
		}
493
		nameToBeFilled.setNomenclaturalReference(ref);
494
		int end = Math.min(strReference.length(), ref.getProblemEnds());
495
		ref.setProblemEnds(end);
496
	}
497

    
498
	/**
499
	 * @param nameToBeFilled
500
	 * @param strReference
501
	 * @return
502
	 */
503
	private Reference makeDetailYearUnparsable(INonViralName nameToBeFilled, String strReference) {
504
		Reference ref;
505

    
506
		ref = ReferenceFactory.newGeneric();
507
		ref.setTitleCache(strReference, true);
508
        ref.setAbbrevTitleCache(strReference, true);
509
		ref.setProblemEnds(strReference.length());
510
		ref.addParsingProblem(ParserProblem.CheckDetailOrYear);
511
		nameToBeFilled.addParsingProblem(ParserProblem.CheckDetailOrYear);
512
		nameToBeFilled.setNomenclaturalReference(ref);
513
		return ref;
514
	}
515

    
516
	/**
517
	 * Parses the referenceTitlePart, including the author volume and edition.
518
	 * @param reference
519
	 * @param year
520
	 * @return
521
	 */
522
	public INomenclaturalReference parseReferenceTitle(String strReference, String year, boolean isInReference){
523
		IBook result = null;
524

    
525
		Matcher refSineDetailMatcher = referenceSineDetailPattern.matcher(strReference);
526
		if (! refSineDetailMatcher.matches()){
527
			//TODO ?
528
		}
529

    
530
		Matcher articleMatcher = getMatcher(pArticleReference, strReference);
531
		Matcher bookMatcher = getMatcher(pBookReference, strReference);
532

    
533
		Matcher softArticleMatcher = getMatcher(pSoftArticleReference, strReference);
534
		Matcher bookSectionMatcher = getMatcher(pBookSectionReference, strReference);
535

    
536

    
537
		if(isInReference == false){
538
			if (bookMatcher.matches() ){
539
				result = parseBook(strReference);
540
			}else{
541
				logger.warn("Non-InRef must be book but does not match book: "+ strReference);
542
				result = ReferenceFactory.newBook();
543
				makeUnparsableRefTitle(result, strReference);
544
			}
545
		}else{  //inRef
546
			if (articleMatcher.matches()){
547
				//article without separators like ","
548
				result = parseArticle(strReference);
549
			}else if (softArticleMatcher.matches()){
550
				result = parseArticle(strReference);
551
			}else if (bookSectionMatcher.matches()){
552
				result = parseBookSection(strReference);
553
			}else{
554
				result =  ReferenceFactory.newGeneric();
555
				makeUnparsableRefTitle(result, "in " + strReference);
556
			}
557
		}
558
		//make year
559
		if (makeYear(result, year) == false){
560
			//TODO
561
			logger.warn("Year could not be parsed");
562
		}
563
		result.setProblemStarts(0);
564
		result.setProblemEnds(strReference.length());
565
		return result;
566
	}
567

    
568
	private void makeUnparsableRefTitle(INomenclaturalReference result, String reference){
569
	    //need to set both to protected otherwise titleCache is created from atomized parts
570
	    result.setTitleCache(reference, true);
571
		result.setAbbrevTitleCache(reference, true);
572
		result.addParsingProblem(ParserProblem.UnparsableReferenceTitle);
573
	}
574

    
575
	/**
576
	 * Parses a single date string. If the string is not parsable a StringNotParsableException is thrown
577
	 * @param singleDateString
578
	 * @return
579
	 * @throws StringNotParsableException
580
	 */
581
	private static Partial parseSingleDate(String singleDateString)
582
			throws StringNotParsableException{
583
		Partial dt = new Partial();
584
		if (CdmUtils.isNumeric(singleDateString)){
585
			try {
586
				Integer year = Integer.valueOf(singleDateString.trim());
587
				if (year > 1750 && year < 2050){
588
					dt = dt.with(DateTimeFieldType.year(), year);
589
				}else{
590
					dt = null;
591
				}
592
			} catch (NumberFormatException e) {
593
				logger.debug("Not a Integer format in getCalendar()");
594
				throw new StringNotParsableException(singleDateString + "is not parsable as a single Date");
595
			}
596
		}
597
		return dt;
598
	}
599

    
600

    
601
	/**
602
	 * Parses the publication date part.
603
	 * @param nomRef
604
	 * @param year
605
	 * @return If the string is not parsable <code>false</code>
606
	 * is returned. <code>True</code> otherwise
607
	 */
608
	private boolean makeYear(INomenclaturalReference nomRef, String year){
609
		boolean result = true;
610
		if (year == null){
611
			return false;
612
		}
613
		if ("".equals(year.trim())){
614
			return true;
615
		}
616
		TimePeriod datePublished = TimePeriodParser.parseString(year);
617

    
618
		if (nomRef.getType().equals(ReferenceType.BookSection)){
619
			handleBookSectionYear((IBookSection)nomRef, datePublished);
620
		}else if (nomRef instanceof Reference){
621
			((Reference)nomRef).setDatePublished(datePublished);
622
		}else{
623
			throw new ClassCastException("nom Ref is not of type Reference but " + (nomRef == null? "(null)" : nomRef.getClass()));
624
		}
625
		return result;
626
	}
627

    
628
	private String makeVolume(IVolumeReference nomRef, String strReference){
629
		//volume
630
		String volPart = null;
631
		String pVolPhrase = volumeSeparator +  volume + end;
632
		Matcher volPhraseMatcher = getMatcher(pVolPhrase, strReference);
633
		if (volPhraseMatcher.find()){
634
			volPart = volPhraseMatcher.group(0);
635
			strReference = strReference.substring(0, strReference.length() - volPart.length());
636
			volPart = volPart.replaceFirst(pStart + volumeSeparator, "").trim();
637
			nomRef.setVolume(volPart);
638
		}
639
		return strReference;
640
	}
641

    
642
	private String makeEdition(IBook book, String strReference){
643
		//volume
644
		String editionPart = null;
645
		Matcher editionPhraseMatcher = getMatcher(pEditionPart, strReference);
646

    
647
		Matcher editionVolumeMatcher = getMatcher(pEditionVolPart, strReference);
648
		boolean isEditionAndVol = editionVolumeMatcher.find();
649

    
650
		if (editionPhraseMatcher.find()){
651
			editionPart = editionPhraseMatcher.group(0);
652
			int pos = strReference.indexOf(editionPart);
653
			int posEnd = pos + editionPart.length();
654
			if (isEditionAndVol){
655
				posEnd++;  //delete also comma
656
			}
657
			strReference = strReference.substring(0, pos) + strReference.substring(posEnd);
658
			editionPart = editionPart.replaceFirst(pStart + editionSeparator, "").trim();
659
			book.setEdition(editionPart);
660
		}
661
		return strReference;
662
	}
663

    
664
	private IBook parseBook(String reference){
665
		IBook result = ReferenceFactory.newBook();
666
		reference = makeEdition(result, reference);
667
		reference = makeVolume(result, reference);
668
		result.setAbbrevTitle(reference);
669
		return result;
670
	}
671

    
672

    
673
	private Reference parseArticle(String reference){
674
		//if (articlePatter)
675
		//(type, author, title, volume, editor, series;
676
		Reference result = ReferenceFactory.newArticle();
677
		reference = makeVolume(result, reference);
678
		Reference inJournal = ReferenceFactory.newJournal();
679
		inJournal.setAbbrevTitle(reference);
680
		result.setInReference(inJournal);
681
		return result;
682
	}
683

    
684
	private Reference parseBookSection(String reference){
685
		Reference result = ReferenceFactory.newBookSection();
686

    
687
		Pattern authorPattern = Pattern.compile("^" + authorTeam + referenceAuthorSeparator);
688
		Matcher authorMatcher = authorPattern.matcher(reference);
689
		boolean find = authorMatcher.find();
690
		if (find){
691
			String authorString = authorMatcher.group(0).trim();
692
			String bookString = reference.substring(authorString.length()).trim();
693
			authorString = authorString.substring(0, authorString.length() -1);
694

    
695
			TeamOrPersonBase<?> authorTeam = author(authorString);
696
			IBook inBook = parseBook(bookString);
697
			inBook.setAuthorship(authorTeam);
698
			result.setInBook(inBook);
699
		}else{
700
			logger.warn("Unexpected non matching book section author part");
701
			//TODO do we want to record a 'problem' here?
702
			result.setTitleCache(reference, true);
703
			result.setAbbrevTitleCache(reference, true);
704
		}
705

    
706
		return result;
707
	}
708

    
709
	/**
710
	 * If the publication date of a book section and it's inBook do differ this is usually
711
	 * caused by the fact that a book has been published during a period, because originally
712
	 * it consisted of several parts that only later where put together to one book.
713
	 * If so, the book section's publication date may be a point in time (year or month of year)
714
	 * whereas the books publication date may be a period of several years.
715
	 * Therefore a valid nomenclatural reference string should use the book sections
716
	 * publication date rather then the book's publication date.<BR>
717
	 * This method in general adds the publication date to the book section.
718
	 * An exception exists if the publication date is a period. Then the parser
719
	 * assumes that the nomenclatural reference string does not follow the above rule but
720
	 * the books publication date is set.
721
	 * @param bookSection
722
	 * @param datePublished
723
	 */
724
	private void handleBookSectionYear(IBookSection bookSection, TimePeriod datePublished){
725
		if (datePublished == null || datePublished.getStart() == null || bookSection == null){
726
			return;
727
		}
728
		if (datePublished.isPeriod() && bookSection.getInBook() != null){
729
			bookSection.getInBook().setDatePublished(datePublished);
730
		}else{
731
			bookSection.setDatePublished(datePublished);
732
		}
733
	}
734

    
735
	@Override
736
    public NonViralName parseFullName(String fullNameString){
737
		return parseFullName(fullNameString, null, null);
738
	}
739

    
740
	@Override
741
    public NonViralName parseFullName(String fullNameString, NomenclaturalCode nomCode, Rank rank) {
742

    
743
		if (fullNameString == null){
744
			return null;
745
		}else{
746
			NonViralName<?> result = getNonViralNameInstance(fullNameString, nomCode, rank);
747
			parseFullName(result, fullNameString, rank, false);
748
			return result;
749
		}
750
	}
751

    
752
	@Override
753
	public void parseFullName(INonViralName nameToBeFilledOrig, String fullNameStringOrig, Rank rank, boolean makeEmpty) {
754
	    INonViralName nameToBeFilled = nameToBeFilledOrig;
755

    
756
	    //TODO prol. etc.
757
		boolean hasCheckRankProblem = false; //was rank guessed in a previous parsing process?
758
		if (nameToBeFilled == null){
759
			throw new IllegalArgumentException("NameToBeFilled must not be null in name parser");
760
		}else{
761
			hasCheckRankProblem = nameToBeFilled.hasProblem(ParserProblem.CheckRank);
762
			nameToBeFilled.removeParsingProblem(ParserProblem.CheckRank);
763
		}
764
		String authorString = null;
765
		if (fullNameStringOrig == null){
766
			return;
767
		}
768
		if (makeEmpty){
769
			makeEmpty(nameToBeFilled);
770
		}
771

    
772
		String fullNameString = fullNameStringOrig.replaceAll(oWs , " ").trim();
773

    
774
		fullNameString = removeHybridBlanks(fullNameString);
775
		String[] epi = pattern.split(fullNameString);
776
		try {
777
	    	//cultivars //TODO 2 implement cultivars
778
//		    if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
779
//		    	result = parseCultivar(fullName);
780
//		    }
781

    
782
		    if (genusOrSupraGenusPattern.matcher(fullNameString).matches()){
783
		    	//supraGeneric
784
				if (rank != null && ! hasCheckRankProblem  && (rank.isSupraGeneric()|| rank.isGenus())){
785
					nameToBeFilled.setRank(rank);
786
					nameToBeFilled.setGenusOrUninomial(epi[0]);
787
				}
788
				//genus or guess rank
789
				else {
790
					rank = guessUninomialRank(nameToBeFilled, epi[0]);
791
					nameToBeFilled.setRank(rank);
792
					nameToBeFilled.setGenusOrUninomial(epi[0]);
793
					nameToBeFilled.addParsingProblem(ParserProblem.CheckRank);
794
					nameToBeFilled.setProblemStarts(0);
795
					nameToBeFilled.setProblemEnds(epi[0].length());
796
				}
797
				authorString = fullNameString.substring(epi[0].length());
798
			}
799
			 //infra genus
800
			 else if (infraGenusPattern.matcher(fullNameString).matches()){
801
				Rank infraGenericRank;
802
				if ("[unranked]".equals(epi[1])){
803
					infraGenericRank = Rank.INFRAGENERICTAXON();
804
				}else{
805
				    String infraGenericRankMarker = epi[1];
806
				    if (infraGenericRankMarker.startsWith(notho)){  //#3868
807
                        nameToBeFilled.setBinomHybrid(true);
808
                        infraGenericRankMarker = infraGenericRankMarker.substring(notho.length());
809
                    }else if(infraGenericRankMarker.startsWith("n")){
810
                        nameToBeFilled.setBinomHybrid(true);
811
                        infraGenericRankMarker = infraGenericRankMarker.substring(1);
812
                    }
813
                    infraGenericRank = Rank.getRankByIdInVoc(infraGenericRankMarker, nameToBeFilledOrig.getNomenclaturalCode());
814
				}
815
				nameToBeFilled.setRank(infraGenericRank);
816
				nameToBeFilled.setGenusOrUninomial(epi[0]);
817
				nameToBeFilled.setInfraGenericEpithet(epi[2]);
818
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length()+ 1 + epi[2].length());
819
			}
820
			 //aggr. or group
821
			 else if (aggrOrGroupPattern.matcher(fullNameString).matches()){
822
				nameToBeFilled.setRank(Rank.getRankByIdInVoc(epi[2]));
823
				nameToBeFilled.setGenusOrUninomial(epi[0]);
824
				nameToBeFilled.setSpecificEpithet(epi[1]);
825
			}
826
		     //species
827
			 else if (speciesPattern.matcher(fullNameString).matches()){
828
				nameToBeFilled.setRank(Rank.SPECIES());
829
				nameToBeFilled.setGenusOrUninomial(epi[0]);
830
				nameToBeFilled.setSpecificEpithet(epi[1]);
831
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length());
832
			}
833
		    //species with infra generic epithet
834
			 else if (speciesWithInfraGenPattern.matcher(fullNameString).matches()){
835
			     nameToBeFilled.setRank(Rank.SPECIES());
836
	             nameToBeFilled.setGenusOrUninomial(epi[0]);
837
                 nameToBeFilled.setInfraGenericEpithet(epi[2]);
838
	             nameToBeFilled.setSpecificEpithet(epi[4]);
839
	             authorString = fullNameString.substring(epi[0].length() + 2 + epi[2].length() + 2 + epi[4].length());
840
			 }
841
			 //autonym
842
			 else if (autonymPattern.matcher(fullNameString).matches()){
843
				nameToBeFilled.setRank(Rank.getRankByIdInVoc(epi[epi.length - 2]));
844
				nameToBeFilled.setGenusOrUninomial(epi[0]);
845
				nameToBeFilled.setSpecificEpithet(epi[1]);
846
				nameToBeFilled.setInfraSpecificEpithet(epi[epi.length - 1]);
847
				int lenSpecies = 2 + epi[0].length()+epi[1].length();
848
				int lenInfraSpecies =  2 + epi[epi.length - 2].length() + epi[epi.length - 1].length();
849
				authorString = fullNameString.substring(lenSpecies, fullNameString.length() - lenInfraSpecies);
850
			}
851
			 //infraSpecies
852
			 else if (infraSpeciesPattern.matcher(fullNameString).matches()){
853
				String infraSpecRankMarker = epi[2];
854
				String infraSpecEpi = epi[3];
855
				if ("tax.".equals(infraSpecRankMarker)){
856
					infraSpecRankMarker += " " +  epi[3];
857
					infraSpecEpi = epi[4];
858
				}
859
				Rank infraSpecificRank;
860
				if ("[unranked]".equals(infraSpecRankMarker)){
861
					infraSpecificRank = Rank.INFRASPECIFICTAXON();
862
				}else{
863
					String localInfraSpecRankMarker;
864
					if (infraSpecRankMarker.startsWith(notho)){  //#3868
865
	                    nameToBeFilled.setTrinomHybrid(true);
866
	                    localInfraSpecRankMarker = infraSpecRankMarker.substring(notho.length());
867
					}else if(infraSpecRankMarker.startsWith("n")){
868
	                    nameToBeFilled.setTrinomHybrid(true);
869
	                    localInfraSpecRankMarker = infraSpecRankMarker.substring(1);
870
                    }else{
871
                        localInfraSpecRankMarker = infraSpecRankMarker;
872
                    }
873
				    infraSpecificRank = Rank.getRankByIdInVoc(localInfraSpecRankMarker);
874
				}
875
				nameToBeFilled.setRank(infraSpecificRank);
876
				nameToBeFilled.setGenusOrUninomial(epi[0]);
877
				nameToBeFilled.setSpecificEpithet(epi[1]);
878
				nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
879
				authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecRankMarker.length() + 1 + infraSpecEpi.length());
880

    
881
			 }
882
		      //infraSpecies without marker
883
			 else if (zooInfraSpeciesPattern.matcher(fullNameString).matches()){
884
					String infraSpecEpi = epi[2];
885
					Rank infraSpecificRank = Rank.SUBSPECIES();
886
					nameToBeFilled.setRank(infraSpecificRank);
887
					nameToBeFilled.setGenusOrUninomial(epi[0]);
888
					nameToBeFilled.setSpecificEpithet(epi[1]);
889
					nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
890
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecEpi.length());
891

    
892
			 }//old infraSpecies
893
			 else if (oldInfraSpeciesPattern.matcher(fullNameString).matches()){
894
				boolean implemented = false;
895
				if (implemented){
896
					nameToBeFilled.setRank(Rank.getRankByNameOrIdInVoc(epi[2]));
897
					nameToBeFilled.setGenusOrUninomial(epi[0]);
898
					nameToBeFilled.setSpecificEpithet(epi[1]);
899
					//TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
900
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + epi[2].length() + 1 + epi[3].length());
901
				}else{
902
					nameToBeFilled.addParsingProblem(ParserProblem.OldInfraSpeciesNotSupported);
903
					nameToBeFilled.setTitleCache(fullNameString, true);
904
					// FIXME Quick fix, otherwise search would not deilver results for unparsable names
905
					nameToBeFilled.setNameCache(fullNameString,true);
906
					// END
907
					logger.info("Name string " + fullNameString + " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
908
				}
909
			}
910
		     //hybrid formula
911
			 else if (hybridFormulaPattern.matcher(fullNameString).matches()){
912
				 Set<HybridRelationship> existingRelations = new HashSet<HybridRelationship>();
913
				 Set<HybridRelationship> notToBeDeleted = new HashSet<HybridRelationship>();
914

    
915
				 for ( HybridRelationship rel : nameToBeFilled.getHybridChildRelations()){
916
				     existingRelations.add(rel);
917
				 }
918

    
919
			     String firstNameString = "";
920
				 String secondNameString = "";
921
				 boolean isFirstName = true;
922
				 for (String str : epi){
923
					 if (str.matches(hybridSign)){
924
						 isFirstName = false;
925
					 }else if(isFirstName){
926
						 firstNameString += " " + str;
927
					 }else {
928
						 secondNameString += " " + str;
929
					 }
930
				 }
931
				 nameToBeFilled.setHybridFormula(true);
932
				 NomenclaturalCode code = nameToBeFilled.getNomenclaturalCode();
933
				 NonViralName<?> firstName = this.parseFullName(firstNameString.trim(), code, rank);
934
				 NonViralName<?> secondName = this.parseFullName(secondNameString.trim(), code, rank);
935
				 HybridRelationship firstRel = nameToBeFilled.addHybridParent(firstName, HybridRelationshipType.FIRST_PARENT(), null);
936
				 HybridRelationship second = nameToBeFilled.addHybridParent(secondName, HybridRelationshipType.SECOND_PARENT(), null);
937
				 checkRelationExist(firstRel, existingRelations, notToBeDeleted);
938
				 checkRelationExist(second, existingRelations, notToBeDeleted);
939

    
940
				 Rank newRank;
941
				 Rank firstRank = firstName.getRank();
942
				 Rank secondRank = secondName.getRank();
943

    
944
				 if (firstRank == null || firstRank.isHigher(secondRank)){
945
					 newRank = secondRank;
946
				 }else{
947
					 newRank = firstRank;
948
				 }
949
				 nameToBeFilled.setRank(newRank);
950
				 //remove not existing hybrid relation
951
				 if (makeEmpty){
952
		            Set<HybridRelationship> tmpChildRels = new HashSet<HybridRelationship>();
953
		            tmpChildRels.addAll(nameToBeFilled.getHybridChildRelations());
954
		            for (HybridRelationship rel : tmpChildRels){
955
		                if (! notToBeDeleted.contains(rel)){
956
		                    nameToBeFilled.removeHybridRelationship(rel);
957
		                }
958
		            }
959
				 }
960
			 }
961
		    //none
962
			else{
963
				nameToBeFilled.addParsingProblem(ParserProblem.UnparsableNamePart);
964
				nameToBeFilled.setTitleCache(fullNameString, true);
965
				// FIXME Quick fix, otherwise search would not deilver results for unparsable names
966
				nameToBeFilled.setNameCache(fullNameString, true);
967
				// END
968
				logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
969
		    }
970
		    //hybrid bits
971
		    handleHybridBits(nameToBeFilled);
972
		    if (!nameToBeFilled.isHybridFormula()){
973
		        Set<HybridRelationship> hybridChildRelations = new HashSet<HybridRelationship>();
974
		        hybridChildRelations.addAll(nameToBeFilled.getHybridChildRelations());
975

    
976
		        for (HybridRelationship hybridRelationship: hybridChildRelations){
977
		        	nameToBeFilled.removeHybridRelationship(hybridRelationship);
978
		        }
979
		    }
980

    
981
			//authors
982
		    if (StringUtils.isNotBlank(authorString) ){
983
				handleAuthors(nameToBeFilled, fullNameString, authorString);
984
			}
985
		    return;
986
		} catch (UnknownCdmTypeException e) {
987
			nameToBeFilled.addParsingProblem(ParserProblem.RankNotSupported);
988
			nameToBeFilled.setTitleCache(fullNameString, true);
989
			// FIXME Quick fix, otherwise search would not deilver results for unparsable names
990
			nameToBeFilled.setNameCache(fullNameString,true);
991
			// END
992
			logger.info("unknown rank (" + (rank == null? "null":rank) + ") or abbreviation in string " +  fullNameString);
993
			//return result;
994
			return;
995
		}
996
	}
997

    
998
	/**
999
     * Checks if a hybrid relation exists in the Set of existing relations
1000
     * and <BR>
1001
     *  if it does not adds it to relations not to be deleted <BR>
1002
     *  if it does adds the existing relations to the relations not to be deleted
1003
     *
1004
     * @param firstRel
1005
     * @param existingRelations
1006
     * @param notToBeDeleted
1007
     */
1008
    private void checkRelationExist(
1009
            HybridRelationship newRelation,
1010
            Set<HybridRelationship> existingRelations,
1011
            Set<HybridRelationship> notToBeDeleted) {
1012
        HybridRelationship relToKeep = newRelation;
1013
        for (HybridRelationship existingRelation : existingRelations){
1014
            if (existingRelation.equals(newRelation)){
1015
                relToKeep = existingRelation;
1016
                break;
1017
            }
1018
        }
1019
        notToBeDeleted.add(relToKeep);
1020
    }
1021

    
1022
    private void handleHybridBits(INonViralName nameToBeFilled) {
1023
		//uninomial
1024
		String uninomial = CdmUtils.Nz(nameToBeFilled.getGenusOrUninomial());
1025
		boolean isUninomialHybrid = uninomial.startsWith(hybridSign);
1026
		if (isUninomialHybrid){
1027
			nameToBeFilled.setMonomHybrid(true);
1028
			nameToBeFilled.setGenusOrUninomial(uninomial.replace(hybridSign, ""));
1029
		}
1030
		//infrageneric
1031
		String infrageneric = CdmUtils.Nz(nameToBeFilled.getInfraGenericEpithet());
1032
		boolean isInfraGenericHybrid = infrageneric.startsWith(hybridSign);
1033
		if (isInfraGenericHybrid){
1034
			nameToBeFilled.setBinomHybrid(true);
1035
			nameToBeFilled.setInfraGenericEpithet(infrageneric.replace(hybridSign, ""));
1036
		}
1037
		//species Epi
1038
		String speciesEpi = CdmUtils.Nz(nameToBeFilled.getSpecificEpithet());
1039
		boolean isSpeciesHybrid = speciesEpi.startsWith(hybridSign);
1040
		if (isSpeciesHybrid){
1041
			if (StringUtils.isBlank(infrageneric)){
1042
				nameToBeFilled.setBinomHybrid(true);
1043
			}else{
1044
				nameToBeFilled.setTrinomHybrid(true);
1045
			}
1046
			nameToBeFilled.setSpecificEpithet(speciesEpi.replace(hybridSign, ""));
1047
		}
1048
		//infra species
1049
		String infraSpeciesEpi = CdmUtils.Nz(nameToBeFilled.getInfraSpecificEpithet());
1050
		boolean isInfraSpeciesHybrid = infraSpeciesEpi.startsWith(hybridSign);
1051
		if (isInfraSpeciesHybrid){
1052
			nameToBeFilled.setTrinomHybrid(true);
1053
			nameToBeFilled.setInfraSpecificEpithet(infraSpeciesEpi.replace(hybridSign, ""));
1054
		}
1055

    
1056
	}
1057

    
1058
	private String removeHybridBlanks(String fullNameString) {
1059
		String result = fullNameString
1060
		        .replaceAll(oWs + "[xX]" + oWs + "(?=[A-Z])", " " + hybridSign + " ")
1061
		        .replaceAll(hybridFull, " " + hybridSign).trim();
1062
		if (result.contains(hybridSign + " ") &&
1063
		        result.matches("^" + capitalEpiWord + oWs + hybridSign + oWs + nonCapitalEpiWord + ".*")){
1064
		    result = result.replaceFirst(hybridSign + oWs, hybridSign);
1065
		}
1066
		return result;
1067
	}
1068

    
1069
	/**
1070
	 * Author parser for external use
1071
	 * @param nonViralName
1072
	 * @param authorString
1073
	 * @throws StringNotParsableException
1074
	 */
1075
	@Override
1076
	public void parseAuthors(INonViralName nonViralNameOrig, String authorString) throws StringNotParsableException{
1077
	    INonViralName nonViralName = nonViralNameOrig;
1078
	    TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
1079
		Integer[] years = new Integer[4];
1080
		Class clazz = nonViralName.getClass();
1081
		fullAuthors(authorString, authors, years, clazz);
1082
		nonViralName.setCombinationAuthorship(authors[0]);
1083
		nonViralName.setExCombinationAuthorship(authors[1]);
1084
		nonViralName.setBasionymAuthorship(authors[2]);
1085
		nonViralName.setExBasionymAuthorship(authors[3]);
1086
		if (nonViralName instanceof ZoologicalName){
1087
			ZoologicalName zooName = CdmBase.deproxy(nonViralName, ZoologicalName.class);
1088
			zooName.setPublicationYear(years[0]);
1089
			zooName.setOriginalPublicationYear(years[2]);
1090
		}
1091
	}
1092

    
1093
	/**
1094
	 * @param nameToBeFilled
1095
	 * @param fullNameString
1096
	 * @param authorString
1097
	 */
1098
	public void handleAuthors(INonViralName nameToBeFilled, String fullNameString, String authorString) {
1099
	    TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
1100
		Integer[] years = new Integer[4];
1101
		try {
1102
			Class<? extends INonViralName> clazz = nameToBeFilled.getClass();
1103
			fullAuthors(authorString, authors, years, clazz);
1104
		} catch (StringNotParsableException e) {
1105
			nameToBeFilled.addParsingProblem(ParserProblem.UnparsableAuthorPart);
1106
			nameToBeFilled.setTitleCache(fullNameString, true);
1107
			// FIXME Quick fix, otherwise search would not deliver results for unparsable names
1108
			nameToBeFilled.setNameCache(fullNameString, true);
1109
			// END
1110
			logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");;
1111
		}
1112
		nameToBeFilled.setCombinationAuthorship(authors[0]);
1113
		nameToBeFilled.setExCombinationAuthorship(authors[1]);
1114
		nameToBeFilled.setBasionymAuthorship(authors[2]);
1115
		nameToBeFilled.setExBasionymAuthorship(authors[3]);
1116
		if (nameToBeFilled instanceof ZoologicalName){
1117
			ZoologicalName zooName = (ZoologicalName)nameToBeFilled;
1118
			zooName.setPublicationYear(years[0]);
1119
			zooName.setOriginalPublicationYear(years[2]);
1120
		}
1121
	}
1122

    
1123
	/**
1124
	 * Guesses the rank of uninomial depending on the typical endings for ranks
1125
	 * @param nameToBeFilled
1126
	 * @param string
1127
	 */
1128
	private Rank guessUninomialRank(INonViralName nameToBeFilled, String uninomial) {
1129
		Rank result = Rank.GENUS();
1130
		if (nameToBeFilled.isInstanceOf(BotanicalName.class)){
1131
			if (false){
1132
				//
1133
			}else if (uninomial.endsWith("phyta") || uninomial.endsWith("mycota") ){  //plants, fungi
1134
				result = Rank.SECTION_BOTANY();
1135
			}else if (uninomial.endsWith("bionta")){
1136
				result = Rank.SUBKINGDOM();  //TODO
1137
			}else if (uninomial.endsWith("phytina")|| uninomial.endsWith("mycotina")  ){  //plants, fungi
1138
				result = Rank.SUBSECTION_BOTANY();
1139
			}else if (uninomial.endsWith("opsida") || uninomial.endsWith("phyceae") || uninomial.endsWith("mycetes")){  //plants, algae, fungi
1140
				result = Rank.CLASS();
1141
			}else if (uninomial.endsWith("idae") || uninomial.endsWith("phycidae") || uninomial.endsWith("mycetidae")){ //plants, algae, fungi
1142
				result = Rank.SUBCLASS();
1143
			}else if (uninomial.endsWith("ales")){
1144
				result = Rank.ORDER();
1145
			}else if (uninomial.endsWith("ineae")){
1146
				result = Rank.SUBORDER();
1147
			}else if (uninomial.endsWith("aceae")){
1148
					result = Rank.FAMILY();
1149
			}else if (uninomial.endsWith("oideae")){
1150
				result = Rank.SUBFAMILY();
1151
			}else if (uninomial.endsWith("eae")){
1152
				result = Rank.TRIBE();
1153
			}else if (uninomial.endsWith("inae")){
1154
				result = Rank.SUBTRIBE();
1155
			}else if (uninomial.endsWith("ota")){
1156
				result = Rank.KINGDOM();  //TODO
1157
			}
1158
		}else if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
1159
			if (false){
1160
				//
1161
			}else if (uninomial.endsWith("oideae")){
1162
				result = Rank.SUPERFAMILY();
1163
			}else if (uninomial.endsWith("idae")){
1164
					result = Rank.FAMILY();
1165
			}else if (uninomial.endsWith("inae")){
1166
				result = Rank.SUBFAMILY();
1167
			}else if (uninomial.endsWith("inae")){
1168
				result = Rank.SUBFAMILY();
1169
			}else if (uninomial.endsWith("ini")){
1170
				result = Rank.TRIBE();
1171
			}else if (uninomial.endsWith("ina")){
1172
				result = Rank.SUBTRIBE();
1173
			}
1174
		}else{
1175
			//
1176
		}
1177
		return result;
1178
	}
1179

    
1180
	/**
1181
	 * Parses the fullAuthorString
1182
	 * @param fullAuthorString
1183
	 * @return array of Teams containing the Team[0],
1184
	 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
1185
	 */
1186
	protected void fullAuthors (String fullAuthorStringOrig, TeamOrPersonBase<?>[] authors, Integer[] years, Class<? extends INonViralName> clazz)
1187
			throws StringNotParsableException{
1188
		if (fullAuthorStringOrig == null || clazz == null){
1189
			return;
1190
		}
1191
		String fullAuthorString = fullAuthorStringOrig.trim();
1192

    
1193
		//Botanic
1194
		if ( BotanicalName.class.isAssignableFrom(clazz) ){
1195
			if (! fullBotanicAuthorStringPattern.matcher(fullAuthorString).matches() ){
1196
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1197
			}
1198
		}
1199
		//Zoo
1200
		else if ( ZoologicalName.class.isAssignableFrom(clazz) ){
1201
			if (! fullZooAuthorStringPattern.matcher(fullAuthorString).matches() ){
1202
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1203
			}
1204
		}else {
1205
			//TODO
1206
			logger.warn ("Full author String parsable only for defined BotanicalNames or ZoologicalNames but this is " + clazz.getSimpleName());
1207
			throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1208
		}
1209
		fullAuthorsChecked(fullAuthorString, authors, years);
1210
	}
1211

    
1212
	/*
1213
	 * like fullTeams but without trim and match check
1214
	 */
1215
	protected void fullAuthorsChecked (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years){
1216
		int authorShipStart = 0;
1217
		Matcher basionymMatcher = basionymPattern.matcher(fullAuthorString);
1218

    
1219
		if (basionymMatcher.find(0)){
1220

    
1221
			String basString = basionymMatcher.group();
1222
			basString = basString.replaceFirst(basStart, "");
1223
			basString = basString.replaceAll(basEnd, "").trim();
1224
			authorShipStart = basionymMatcher.end(1);
1225

    
1226
			TeamOrPersonBase<?>[] basAuthors = new TeamOrPersonBase[2];
1227
			Integer[] basYears = new Integer[2];
1228
			authorsAndEx(basString, basAuthors, basYears);
1229
			authors[2]= basAuthors[0];
1230
			years[2] = basYears[0];
1231
			authors[3]= basAuthors[1];
1232
			years[3] = basYears[1];
1233
		}
1234
		if (fullAuthorString.length() >= authorShipStart){
1235
			TeamOrPersonBase<?>[] combinationAuthors = new TeamOrPersonBase[2];
1236
			Integer[] combinationYears = new Integer[2];
1237
			authorsAndEx(fullAuthorString.substring(authorShipStart), combinationAuthors, combinationYears);
1238
			authors[0]= combinationAuthors[0] ;
1239
			years[0] = combinationYears[0];
1240
			authors[1]= combinationAuthors[1];
1241
			years[1] = combinationYears[1];
1242
		}
1243
	}
1244

    
1245

    
1246
	/**
1247
	 * Parses the author and ex-author String
1248
	 * @param authorShipStringOrig String representing the author and the ex-author team
1249
	 * @return array of Teams containing the Team[0] and the ExTeam[1]
1250
	 */
1251
	protected void authorsAndEx (String authorShipStringOrig, TeamOrPersonBase<?>[] authors, Integer[] years){
1252
		//TODO noch allgemeiner am anfang durch Replace etc.
1253
		String authorShipString = authorShipStringOrig.trim();
1254
		authorShipString = authorShipString.replaceFirst(oWs + "ex" + oWs, " ex. " );
1255

    
1256
		//int authorEnd = authorTeamString.length();
1257
		int authorBegin = 0;
1258

    
1259
		Matcher exAuthorMatcher = exAuthorPattern.matcher(authorShipString);
1260
		if (exAuthorMatcher.find(0)){
1261
			authorBegin = exAuthorMatcher.end(0);
1262
			int exAuthorEnd = exAuthorMatcher.start(0);
1263
			String exString = authorShipString.substring(0, exAuthorEnd).trim();
1264
			authors [1] = author(exString);
1265
		}
1266
		zooOrBotanicAuthor(authorShipString.substring(authorBegin), authors, years );
1267
	}
1268

    
1269
	/**
1270
	 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
1271
	 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies
1272
	 * to the year in case of an zoological name.
1273
	 * @param authorString
1274
	 * @param team
1275
	 * @param year
1276
	 */
1277
	protected void zooOrBotanicAuthor(String authorString, TeamOrPersonBase<?>[] team, Integer[] year){
1278
		if (authorString == null){
1279
			return;
1280
		}else if ((authorString = authorString.trim()).length() == 0){
1281
			return;
1282
		}
1283
		Matcher zooAuthorAddidtionMatcher = zooAuthorAddidtionPattern.matcher(authorString);
1284
		if (zooAuthorAddidtionMatcher.find()){
1285
			int index = zooAuthorAddidtionMatcher.start(0);
1286
			String strYear = authorString.substring(index);
1287
			strYear = strYear.replaceAll(zooAuthorYearSeperator, "").trim();
1288
			year[0] = Integer.valueOf(strYear);
1289
			authorString = authorString.substring(0, index).trim();
1290
		}
1291
		team[0] = author(authorString);
1292
	}
1293

    
1294

    
1295
	/**
1296
	 * Parses an authorTeam String and returns the Team
1297
	 * !!! TODO (atomization not yet implemented)
1298
	 * @param authorTeamString String representing the author team
1299
	 * @return an Team
1300
	 */
1301
	protected TeamOrPersonBase<?> author (String authorString){
1302
		if (authorString == null){
1303
			return null;
1304
		}else if ((authorString = authorString.trim()).length() == 0){
1305
			return null;
1306
		}else if (! finalTeamSplitterPattern.matcher(authorString).find() && ! authorIsAlwaysTeam){
1307
			//1 Person
1308
			Person result = Person.NewInstance();
1309
			result.setNomenclaturalTitle(authorString);
1310
			return result;
1311
		}else{
1312
			return parsedTeam(authorString);
1313
		}
1314

    
1315
	}
1316

    
1317
	/**
1318
	 * Parses an authorString (reprsenting a team into the single authors and add
1319
	 * them to the return Team.
1320
	 * @param authorString
1321
	 * @return Team
1322
	 */
1323
	protected Team parsedTeam(String authorString){
1324
		Team result = Team.NewInstance();
1325
		String[] authors = authorString.split(notFinalTeamSplitter);
1326
		for (int i = 0; i < authors.length; i++){
1327
		    String author = authors[i];
1328
		    if ("al.".equals(author.trim()) && i == authors.length - 1){  //final al. is handled as hasMoreMembers
1329
			    result.setHasMoreMembers(true);
1330
			}else{
1331
			    Person person = Person.NewInstance();
1332
			    person.setNomenclaturalTitle(author);
1333
			    result.addTeamMember(person);
1334
			}
1335
		}
1336
		return result;
1337
	}
1338

    
1339

    
1340
//	// Parsing of the given full name that has been identified as a cultivar already somwhere else.
1341
//	// The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
1342
	public BotanicalName parseCultivar(String fullName)	throws StringNotParsableException{
1343
		CultivarPlantName result = null;
1344
		    String[] words = oWsPattern.split(fullName);
1345

    
1346
		    /* ---------------------------------------------------------------------------------
1347
		     * cultivar
1348
		     * ---------------------------------------------------------------------------------*/
1349
			if (fullName.indexOf(" '") != 0){
1350
				//TODO location of 'xx' is probably not arbitrary
1351
				Matcher cultivarMatcher = cultivarPattern.matcher(fullName);
1352
				if (cultivarMatcher.find()){
1353
					String namePart = fullName.replaceFirst(cultivar, "");
1354

    
1355
					String cultivarPart = cultivarMatcher.group(0).replace("'","").trim();
1356
					//OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
1357

    
1358
					result = (CultivarPlantName)parseFullName(namePart);
1359
					result.setCultivarName(cultivarPart);
1360
				}
1361
			}else if (fullName.indexOf(" cv.") != 0){
1362
				// cv. is old form (not official)
1363
				throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
1364
			}
1365

    
1366
		    /* ---------------------------------------------------------------------------------
1367
		     * cultivar group
1368
		     * ---------------------------------------------------------------------------------
1369
		     */
1370
			// TODO in work
1371
			//Ann. this is not the official way of noting cultivar groups
1372
		    String group = oWs + "Group" + oWs + capitalEpiWord + end;
1373
			Pattern groupRE = Pattern.compile(group);
1374
			Matcher groupMatcher = groupRE.matcher(fullName);
1375
			if (groupMatcher.find()){
1376
		    	if (! words[words.length - 2].equals("group")){
1377
		            throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName + " should be 'group'");
1378
		        }else{
1379

    
1380
		        	String namePart = fullName.substring(0, groupMatcher.start(0) - 0);
1381
		        	//OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
1382

    
1383
		        	String cultivarPart = words[words.length -1];
1384
		        	result = (CultivarPlantName)parseFullName(namePart);
1385
		        	if (result != null){
1386
		        		result.setCultivarName(cultivarPart);
1387

    
1388
		        		//OLD: result.setCultivarGroupName(cultivarPart);
1389
		        	}
1390
		        }
1391

    
1392
		    }
1393
//		    // ---------------------------------------------------------------------------------
1394
//		    if ( result = "" ){
1395
//		        return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
1396
//		    }else{
1397
//		        return result;
1398
	//	    }
1399
			return result; //TODO
1400
	}
1401

    
1402

    
1403
	private void makeEmpty(INonViralName nameToBeFilled){
1404
		nameToBeFilled.setRank(null);
1405
		nameToBeFilled.setTitleCache(null, false);
1406
		nameToBeFilled.setFullTitleCache(null, false);
1407
		nameToBeFilled.setNameCache(null, false);
1408

    
1409
		nameToBeFilled.setAppendedPhrase(null);
1410
		nameToBeFilled.setBasionymAuthorship(null);
1411
		nameToBeFilled.setCombinationAuthorship(null);
1412
		nameToBeFilled.setExBasionymAuthorship(null);
1413
		nameToBeFilled.setExCombinationAuthorship(null);
1414
		nameToBeFilled.setAuthorshipCache(null, false);
1415

    
1416

    
1417
		//delete problems except check rank
1418
		makeProblemEmpty(nameToBeFilled);
1419

    
1420
		// TODO ?
1421
		//nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
1422

    
1423

    
1424
		nameToBeFilled.setGenusOrUninomial(null);
1425
		nameToBeFilled.setInfraGenericEpithet(null);
1426
		nameToBeFilled.setSpecificEpithet(null);
1427
		nameToBeFilled.setInfraSpecificEpithet(null);
1428

    
1429
		nameToBeFilled.setNomenclaturalMicroReference(null);
1430
		nameToBeFilled.setNomenclaturalReference(null);
1431

    
1432
		nameToBeFilled.setHybridFormula(false);
1433
		nameToBeFilled.setMonomHybrid(false);
1434
		nameToBeFilled.setBinomHybrid(false);
1435
		nameToBeFilled.setTrinomHybrid(false);
1436

    
1437
		if (nameToBeFilled.isInstanceOf(BotanicalName.class)){
1438
			BotanicalName botanicalName = (BotanicalName)nameToBeFilled;
1439
			botanicalName.setAnamorphic(false);
1440
		}
1441

    
1442
		if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
1443
			ZoologicalName zoologicalName = (ZoologicalName)nameToBeFilled;
1444
			zoologicalName.setBreed(null);
1445
			zoologicalName.setOriginalPublicationYear(null);
1446
		}
1447

    
1448
		//nom status handled in nom status parser, otherwise we loose additional information like reference etc.
1449
		//hybrid relationships handled in hybrid formula and at end of fullNameParser
1450
	}
1451

    
1452

    
1453

    
1454
}
(3-3/8)