Project

General

Profile

Download (54.6 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.strategy.parser;
11

    
12
import java.util.HashSet;
13
import java.util.Set;
14
import java.util.regex.Matcher;
15
import java.util.regex.Pattern;
16

    
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.log4j.Logger;
19
import org.joda.time.DateTimeFieldType;
20
import org.joda.time.Partial;
21

    
22
import eu.etaxonomy.cdm.common.CdmUtils;
23
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
24
import eu.etaxonomy.cdm.model.agent.Person;
25
import eu.etaxonomy.cdm.model.agent.Team;
26
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27
import eu.etaxonomy.cdm.model.common.CdmBase;
28
import eu.etaxonomy.cdm.model.common.IParsable;
29
import eu.etaxonomy.cdm.model.common.TimePeriod;
30
import eu.etaxonomy.cdm.model.name.BacterialName;
31
import eu.etaxonomy.cdm.model.name.BotanicalName;
32
import eu.etaxonomy.cdm.model.name.CultivarPlantName;
33
import eu.etaxonomy.cdm.model.name.HybridRelationship;
34
import eu.etaxonomy.cdm.model.name.HybridRelationshipType;
35
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
36
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
37
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
38
import eu.etaxonomy.cdm.model.name.NonViralName;
39
import eu.etaxonomy.cdm.model.name.Rank;
40
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
41
import eu.etaxonomy.cdm.model.name.ZoologicalName;
42
import eu.etaxonomy.cdm.model.reference.IBook;
43
import eu.etaxonomy.cdm.model.reference.IBookSection;
44
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
45
import eu.etaxonomy.cdm.model.reference.IVolumeReference;
46
import eu.etaxonomy.cdm.model.reference.Reference;
47
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
48
import eu.etaxonomy.cdm.model.reference.ReferenceType;
49
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
50
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
51

    
52

    
53
/**
54
 * @author a.mueller
55
 *
56
 */
57
public class NonViralNameParserImpl extends NonViralNameParserImplRegExBase implements INonViralNameParser<NonViralName> {
58
	private static final Logger logger = Logger.getLogger(NonViralNameParserImpl.class);
59

    
60
	// good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
61

    
62
	final static boolean MAKE_EMPTY = true;
63
	final static boolean MAKE_NOT_EMPTY = false;
64

    
65
	private final boolean authorIsAlwaysTeam = false;
66

    
67
	public static NonViralNameParserImpl NewInstance(){
68
		return new NonViralNameParserImpl();
69
	}
70

    
71
	@Override
72
    public NonViralName parseSimpleName(String simpleName){
73
		return parseSimpleName(simpleName, null, null);
74
	}
75

    
76
	@Override
77
    public NonViralName parseSimpleName(String simpleName, NomenclaturalCode code, Rank rank){
78
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
79
		return parseFullName(simpleName, code, rank);
80
	}
81

    
82
	public void parseSimpleName(NonViralName nameToBeFilled, String simpleNameString, Rank rank, boolean makeEmpty){
83
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
84
		parseFullName(nameToBeFilled, simpleNameString, rank, makeEmpty);
85
	}
86

    
87
	public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code){
88
		return getNonViralNameInstance(fullString, code, null);
89
	}
90

    
91
	public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code, Rank rank){
92
		NonViralName<?> result = null;
93
		if(code ==null) {
94
			boolean isBotanicalName = anyBotanicFullNamePattern.matcher(fullString).find();
95
			boolean isZoologicalName = anyZooFullNamePattern.matcher(fullString).find();;
96
			boolean isBacteriologicalName = false;
97
			boolean isCultivatedPlantName = false;
98
			if ( (isBotanicalName || isCultivatedPlantName) && ! isZoologicalName && !isBacteriologicalName){
99
				if (isBotanicalName){
100
					result = BotanicalName.NewInstance(rank);
101
				}else{
102
					result = CultivarPlantName.NewInstance(rank);
103
				}
104
			}else if ( isZoologicalName /*&& ! isBotanicalName*/ && !isBacteriologicalName && !isCultivatedPlantName){
105
				result = ZoologicalName.NewInstance(rank);
106
			}else if ( isZoologicalName && ! isBotanicalName && !isBacteriologicalName && !isCultivatedPlantName){
107
				result = BacterialName.NewInstance(rank);
108
			}else {
109
				result =  NonViralName.NewInstance(rank);
110
			}
111
		} else {
112
			switch (code) {
113
			case ICNAFP:
114
				result = BotanicalName.NewInstance(rank);
115
				break;
116
			case ICZN:
117
				result = ZoologicalName.NewInstance(rank);
118
				break;
119
			case ICNCP:
120
				logger.warn("ICNCP parsing not yet implemented");
121
				result = CultivarPlantName.NewInstance(rank);
122
				break;
123
			case ICNB:
124
				logger.warn("ICNB not yet implemented");
125
				result = BacterialName.NewInstance(rank);
126
				break;
127
			case ICVCN:
128
				logger.error("Viral name is not a NonViralName !!");
129
				break;
130
			default:
131
				// FIXME Unreachable code
132
				logger.error("Unknown Nomenclatural Code !!");
133
			}
134
		}
135
		return result;
136
	}
137

    
138
	@Override
139
    public NonViralName parseReferencedName(String fullReferenceString) {
140
		return parseReferencedName(fullReferenceString, null, null);
141
	}
142

    
143
	@Override
144
    public NonViralName parseReferencedName(String fullReferenceString, NomenclaturalCode nomCode, Rank rank) {
145
		if (fullReferenceString == null){
146
			return null;
147
		}else{
148
			NonViralName<?> result = getNonViralNameInstance(fullReferenceString, nomCode, rank);
149
			parseReferencedName(result, fullReferenceString, rank, MAKE_EMPTY);
150
			return result;
151
		}
152
	}
153

    
154
	private String standardize(NonViralName<?> nameToBeFilled, String fullReferenceString, boolean makeEmpty){
155
		//Check null and standardize
156
		if (fullReferenceString == null){
157
			//return null;
158
			return null;
159
		}
160
		if (makeEmpty){
161
			makeEmpty(nameToBeFilled);
162
		}
163
		fullReferenceString = fullReferenceString.replaceAll(oWs , " ");
164
		fullReferenceString = fullReferenceString.trim();
165
		if ("".equals(fullReferenceString)){
166
			fullReferenceString = null;
167
		}
168
		return fullReferenceString;
169
	}
170

    
171
	/**
172
	 * Returns the regEx to be used for the full-name depending on the code
173
	 * @param nameToBeFilled
174
	 * @return
175
	 */
176
	private String getCodeSpecificFullNameRegEx(NonViralName<?> nameToBeFilledOrig){
177
	    NonViralName<?> nameToBeFilled = HibernateProxyHelper.deproxy(nameToBeFilledOrig, NonViralName.class);
178
		if (nameToBeFilled instanceof ZoologicalName){
179
			return anyZooFullName;
180
		}else if (nameToBeFilled instanceof BotanicalName) {
181
			return anyBotanicFullName;
182
		}else if (nameToBeFilled instanceof NonViralName) {
183
			return anyBotanicFullName;  //TODO ?
184
		}else{
185
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
186
			return null;
187
		}
188
	}
189

    
190
	/**
191
	 * Returns the regEx to be used for the fsimple-name depending on the code
192
	 * @param nameToBeFilled
193
	 * @return
194
	 */
195
	private String getCodeSpecificSimpleNameRegEx(NonViralName<?> nameToBeFilled){
196
		nameToBeFilled = HibernateProxyHelper.deproxy(nameToBeFilled, NonViralName.class);
197

    
198
		if (nameToBeFilled instanceof ZoologicalName){
199
			return anyZooName;
200
		}else if (nameToBeFilled instanceof NonViralName){
201
			return anyZooName;  //TODO ?
202
		}else if (nameToBeFilled instanceof BotanicalName) {
203
			return anyBotanicName;
204
		}else{
205
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
206
			return null;
207
		}
208
	}
209

    
210
	private Matcher getMatcher(String regEx, String matchString){
211
		Pattern pattern = Pattern.compile(regEx);
212
		Matcher matcher = pattern.matcher(matchString);
213
		return matcher;
214
	}
215

    
216
	@Override
217
    public void parseReferencedName(NonViralName nameToBeFilled, String fullReferenceStringOrig, Rank rank, boolean makeEmpty) {
218
		//standardize
219
		String fullReferenceString = standardize(nameToBeFilled, fullReferenceStringOrig, makeEmpty);
220
		if (fullReferenceString == null){
221
			return;
222
		}
223
		// happens already in standardize(...)
224
//		makeProblemEmpty(nameToBeFilled);
225

    
226
		//make nomenclatural status and replace it by empty string
227
	    fullReferenceString = parseNomStatus(fullReferenceString, nameToBeFilled, makeEmpty);
228
	    nameToBeFilled.setProblemEnds(fullReferenceString.length());
229

    
230
	    //get full name reg
231
		String localFullNameRegEx = getCodeSpecificFullNameRegEx(nameToBeFilled);
232
		//get full name reg
233
		String localSimpleNameRegEx = getCodeSpecificSimpleNameRegEx(nameToBeFilled);
234

    
235
		//separate name and reference part
236
		String nameAndRefSeparatorRegEx = "(^" + localFullNameRegEx + ")("+ referenceSeperator + ")";
237
		Matcher nameAndRefSeparatorMatcher = getMatcher (nameAndRefSeparatorRegEx, fullReferenceString);
238

    
239
		Matcher onlyNameMatcher = getMatcher (localFullNameRegEx, fullReferenceString);
240
		Matcher hybridMatcher = hybridFormulaPattern.matcher(fullReferenceString);
241
		Matcher onlySimpleNameMatcher = getMatcher (localSimpleNameRegEx, fullReferenceString);
242

    
243
		if (onlyNameMatcher.matches()){
244
			makeEmpty = false;
245
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
246
		} else if (nameAndRefSeparatorMatcher.find()){
247
			makeNameWithReference(nameToBeFilled, fullReferenceString, nameAndRefSeparatorMatcher, rank, makeEmpty);
248
		}else if (hybridMatcher.matches() ){
249
		    //I do not remember why we need makeEmpty = false for onlyNameMatcher,
250
		    //but for hybridMatcher we need to remove old Hybrid Relationships if necessary, therefore
251
		    //I removed it from here
252
            parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
253
        }else if (onlySimpleNameMatcher.matches()){
254
			makeEmpty = false;
255
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);	//simpleName not yet implemented
256
		}else{
257
			makeNoFullRefMatch(nameToBeFilled, fullReferenceString, rank);
258
		}
259
		//problem handling. Start and end solved in subroutines
260
		if (! nameToBeFilled.hasProblem()){
261
			makeProblemEmpty(nameToBeFilled);
262
		}
263
	}
264

    
265
	private void makeProblemEmpty(IParsable parsable){
266
		boolean hasCheckRank = parsable.hasProblem(ParserProblem.CheckRank);
267
		parsable.setParsingProblem(0);
268
		if (hasCheckRank){
269
			parsable.addParsingProblem(ParserProblem.CheckRank);
270
		}
271
		parsable.setProblemStarts(-1);
272
		parsable.setProblemEnds(-1);
273
	}
274

    
275
	private void makeNoFullRefMatch(NonViralName<?> nameToBeFilled, String fullReferenceString, Rank rank){
276
	    //try to parse first part as name, but keep in mind full string is not parsable
277
		int start = 0;
278

    
279
		String localFullName = getCodeSpecificFullNameRegEx(nameToBeFilled);
280
		Matcher fullNameMatcher = getMatcher (pStart + localFullName, fullReferenceString);
281
		if (fullNameMatcher.find()){
282
			String fullNameString = fullNameMatcher.group(0);
283
			nameToBeFilled.setProtectedNameCache(false);
284
			parseFullName(nameToBeFilled, fullNameString, rank, false);
285
			String sure = nameToBeFilled.getNameCache();
286
			start = sure.length();
287
		}
288

    
289
//		String localSimpleName = getLocalSimpleName(nameToBeFilled);
290
//		Matcher simpleNameMatcher = getMatcher (start + localSimpleName, fullReferenceString);
291
//		if (simpleNameMatcher.find()){
292
//			String simpleNameString = simpleNameMatcher.group(0);
293
//			parseFullName(nameToBeFilled, simpleNameString, rank, false);
294
//			start = simpleNameString.length();
295
//		}
296

    
297
		//don't parse if name can't be separated
298
		nameToBeFilled.addParsingProblem(ParserProblem.NameReferenceSeparation);
299
		nameToBeFilled.setTitleCache(fullReferenceString,true);
300
		nameToBeFilled.setFullTitleCache(fullReferenceString,true);
301
		// FIXME Quick fix, otherwise search would not deliver results for unparsable names
302
		nameToBeFilled.setNameCache(fullReferenceString,true);
303
		// END
304
		nameToBeFilled.setProblemStarts(start);
305
		nameToBeFilled.setProblemEnds(fullReferenceString.length());
306
		logger.info("no applicable parsing rule could be found for \"" + fullReferenceString + "\"");
307
	}
308

    
309
	private void makeNameWithReference(NonViralName<?> nameToBeFilled,
310
			String fullReferenceString,
311
			Matcher nameAndRefSeparatorMatcher,
312
			Rank rank,
313
			boolean makeEmpty){
314

    
315
		String nameAndSeparator = nameAndRefSeparatorMatcher.group(0);
316
	    String name = nameAndRefSeparatorMatcher.group(1);
317
	    String referenceString = fullReferenceString.substring(nameAndRefSeparatorMatcher.end());
318

    
319
	    // is reference an in ref?
320
	    String separator = nameAndSeparator.substring(name.length());
321
		boolean isInReference = separator.matches(inReferenceSeparator);
322

    
323
	    //parse subparts
324

    
325
		int oldProblemEnds = nameToBeFilled.getProblemEnds();
326
		parseFullName(nameToBeFilled, name, rank, makeEmpty);
327
	    nameToBeFilled.setProblemEnds(oldProblemEnds);
328

    
329
		//zoological new combinations should not have a nom. reference to be parsed
330
	    if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
331
			ZoologicalName zooName = CdmBase.deproxy(nameToBeFilled, ZoologicalName.class);
332
			//is name new combination?
333
			if (zooName.getBasionymAuthorship() != null || zooName.getOriginalPublicationYear() != null){
334
				ParserProblem parserProblem = ParserProblem.NewCombinationHasPublication;
335
				zooName.addParsingProblem(parserProblem);
336
				nameToBeFilled.setProblemStarts((nameToBeFilled.getProblemStarts()> -1) ? nameToBeFilled.getProblemStarts(): name.length());
337
				nameToBeFilled.setProblemEnds(Math.max(fullReferenceString.length(), nameToBeFilled.getProblemEnds()));
338
			}
339
		}
340

    
341
	    parseReference(nameToBeFilled, referenceString, isInReference);
342
	    INomenclaturalReference ref = nameToBeFilled.getNomenclaturalReference();
343

    
344
	    //problem start
345
	    int start = nameToBeFilled.getProblemStarts();
346
	    int nameLength = name.length();
347
	    int nameAndSeparatorLength = nameAndSeparator.length();
348
	    int fullRefLength = nameToBeFilled.getFullTitleCache().length();
349

    
350
	    if (nameToBeFilled.isProtectedTitleCache() || nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)){
351
	    	start = Math.max(0, start);
352
		}else{
353
			if (ref != null && ref.getParsingProblem()!=0){
354
				start = Math.max(nameAndSeparatorLength, start);
355
		    	//TODO search within ref
356
			}
357
		}
358

    
359
	    //end
360
	    int end = nameToBeFilled.getProblemEnds();
361

    
362
	    if (ref != null && ref.getParsingProblem()!=0){
363
	    	end = Math.min(nameAndSeparatorLength + ref.getProblemEnds(), end);
364
	    }else{
365
	    	if (nameToBeFilled.isProtectedTitleCache() ){
366
	    		end = Math.min(end, nameAndSeparatorLength);
367
	    		//TODO search within name
368
			}
369
	    }
370
	    nameToBeFilled.setProblemStarts(start);
371
	    nameToBeFilled.setProblemEnds(end);
372

    
373
	    //delegate has problem to name
374
	    if (ref != null && ref.getParsingProblem()!=0){
375
	    	nameToBeFilled.addParsingProblems(ref.getParsingProblem());
376
	    }
377

    
378
	    Reference<?> nomRef;
379
		if ( (nomRef = (Reference<?>)nameToBeFilled.getNomenclaturalReference()) != null ){
380
			nomRef.setAuthorship(nameToBeFilled.getCombinationAuthorship());
381
		}
382
	}
383

    
384
	//TODO make it an Array of status
385
	/**
386
	 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
387
	 * The nomenclatural status part ist deleted from the reference String.
388
	 * @return  String the new (shortend) reference String
389
	 */
390
	public String parseNomStatus(String fullString, NonViralName<?> nameToBeFilled, boolean makeEmpty) {
391
		Set<NomenclaturalStatusType> existingStatusTypeSet = new HashSet<NomenclaturalStatusType>();
392
		Set<NomenclaturalStatusType> newStatusTypeSet = new HashSet<NomenclaturalStatusType>();
393
		for (NomenclaturalStatus existingStatus : nameToBeFilled.getStatus()){
394
			existingStatusTypeSet.add(existingStatus.getType());
395
		}
396

    
397
		String statusString;
398
		Pattern hasStatusPattern = Pattern.compile("(" + pNomStatusPhrase + ")");
399
		Matcher hasStatusMatcher = hasStatusPattern.matcher(fullString);
400

    
401
		if (hasStatusMatcher.find()) {
402
			String statusPhrase = hasStatusMatcher.group(0);
403

    
404
			Pattern statusPattern = Pattern.compile(pNomStatus);
405
			Matcher statusMatcher = statusPattern.matcher(statusPhrase);
406
			statusMatcher.find();
407
			statusString = statusMatcher.group(0);
408
			try {
409
				NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(statusString, nameToBeFilled);
410
				if (! existingStatusTypeSet.contains(nomStatusType)){
411
					NomenclaturalStatus nomStatus = NomenclaturalStatus.NewInstance(nomStatusType);
412
					nameToBeFilled.addStatus(nomStatus);
413
				}
414
				newStatusTypeSet.add(nomStatusType);
415
				fullString = fullString.replace(statusPhrase, "");
416
			} catch (UnknownCdmTypeException e) {
417
				//Do nothing
418
			}
419
		}
420
		//remove not existing nom status
421
		if (makeEmpty){
422
			Set<NomenclaturalStatus> tmpStatus = new HashSet<NomenclaturalStatus>();
423
			tmpStatus.addAll(nameToBeFilled.getStatus());
424
			for (NomenclaturalStatus status : tmpStatus){
425
				if (! newStatusTypeSet.contains(status.getType())){
426
					nameToBeFilled.removeStatus(status);
427
				}
428
			}
429
		}
430

    
431
		return fullString;
432
	}
433

    
434

    
435
	private void parseReference(NonViralName<?> nameToBeFilled, String strReference, boolean isInReference){
436

    
437
		INomenclaturalReference ref;
438
		String originalStrReference = strReference;
439

    
440
		//End (just delete end (e.g. '.', may be ambigous for yearPhrase, but no real information gets lost
441
		Matcher endMatcher = getMatcher(referenceEnd + end, strReference);
442
		if (endMatcher.find()){
443
			String endPart = endMatcher.group(0);
444
			strReference = strReference.substring(0, strReference.length() - endPart.length());
445
		}
446

    
447
//		String pDetailYear = ".*" + detailSeparator + detail + fWs + yearSeperator + fWs + yearPhrase + fWs + end;
448
//		Matcher detailYearMatcher = getMatcher(pDetailYear, strReference);
449

    
450
		String strReferenceWithYear = strReference;
451
		//year
452
		String yearPart = null;
453
		String pYearPhrase = yearSeperator + fWs + yearPhrase + fWs + end;
454
		Matcher yearPhraseMatcher = getMatcher(pYearPhrase, strReference);
455
		if (yearPhraseMatcher.find()){
456
			yearPart = yearPhraseMatcher.group(0);
457
			strReference = strReference.substring(0, strReference.length() - yearPart.length());
458
			yearPart = yearPart.replaceFirst(pStart + yearSeperator, "").trim();
459
		}else{
460
			if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
461
				ZoologicalName zooName = CdmBase.deproxy(nameToBeFilled, ZoologicalName.class);
462
				yearPart = String.valueOf(zooName.getPublicationYear());
463
				//continue
464
			}else{
465
				ref = makeDetailYearUnparsable(nameToBeFilled,strReference);
466
				ref.setDatePublished(TimePeriodParser.parseString(yearPart));
467
				return;
468
			}
469
		}
470

    
471

    
472
		//detail
473
		String pDetailPhrase = detailSeparator + fWs + detail + fWs + end;
474
		Matcher detailPhraseMatcher = getMatcher(pDetailPhrase, strReference);
475
		if (detailPhraseMatcher.find()){
476
			String detailPart = detailPhraseMatcher.group(0);
477
			strReference = strReference.substring(0, strReference.length() - detailPart.length());
478
			detailPart = detailPart.replaceFirst(pStart + detailSeparator, "").trim();
479
			nameToBeFilled.setNomenclaturalMicroReference(detailPart);
480
		}else{
481
			makeDetailYearUnparsable(nameToBeFilled, strReferenceWithYear);
482
			return;
483
		}
484
		//parse title and author
485
		ref = parseReferenceTitle(strReference, yearPart, isInReference);
486
		if (ref.hasProblem()){
487
			ref.setTitleCache( (isInReference ? "in ":"") +  originalStrReference, true);
488
			ref.setAbbrevTitleCache( (isInReference ? "in ":"") +  originalStrReference, true);
489
		}
490
		nameToBeFilled.setNomenclaturalReference(ref);
491
		int end = Math.min(strReference.length(), ref.getProblemEnds());
492
		ref.setProblemEnds(end);
493
	}
494

    
495
	/**
496
	 * @param nameToBeFilled
497
	 * @param strReference
498
	 * @return
499
	 */
500
	private INomenclaturalReference makeDetailYearUnparsable(NonViralName<?> nameToBeFilled, String strReference) {
501
		INomenclaturalReference ref;
502
		//ref = Generic.NewInstance();
503

    
504
		ref = ReferenceFactory.newGeneric();
505
		ref.setTitleCache(strReference,true);
506
		ref.setProblemEnds(strReference.length());
507
		ref.addParsingProblem(ParserProblem.CheckDetailOrYear);
508
		nameToBeFilled.addParsingProblem(ParserProblem.CheckDetailOrYear);
509
		nameToBeFilled.setNomenclaturalReference(ref);
510
		return ref;
511
	}
512

    
513
	/**
514
	 * Parses the referenceTitlePart, including the author volume and edition.
515
	 * @param reference
516
	 * @param year
517
	 * @return
518
	 */
519
	private INomenclaturalReference parseReferenceTitle(String strReference, String year, boolean isInReference){
520
		IBook result = null;
521

    
522
		Matcher refSineDetailMatcher = referenceSineDetailPattern.matcher(strReference);
523
		if (! refSineDetailMatcher.matches()){
524
			//TODO ?
525
		}
526

    
527
		Matcher articleMatcher = getMatcher(pArticleReference, strReference);
528
		Matcher bookMatcher = getMatcher(pBookReference, strReference);
529

    
530
		Matcher softArticleMatcher = getMatcher(pSoftArticleReference, strReference);
531
		Matcher bookSectionMatcher = getMatcher(pBookSectionReference, strReference);
532

    
533

    
534
		if(isInReference == false){
535
			if (bookMatcher.matches() ){
536
				result = parseBook(strReference);
537
			}else{
538
				logger.warn("Non-InRef must be book but does not match book");
539
				result = ReferenceFactory.newBook();
540
				makeUnparsableRefTitle(result, strReference);
541
			}
542
		}else{  //inRef
543
			if (articleMatcher.matches()){
544
				//article without separators like ","
545
				result = parseArticle(strReference);
546
			}else if (softArticleMatcher.matches()){
547
				result = parseArticle(strReference);
548
			}else if (bookSectionMatcher.matches()){
549
				result = parseBookSection(strReference);
550
			}else{
551
				result =  ReferenceFactory.newGeneric();
552
				makeUnparsableRefTitle(result, "in " + strReference);
553
			}
554
		}
555
		//make year
556
		if (makeYear(result, year) == false){
557
			//TODO
558
			logger.warn("Year could not be parsed");
559
		}
560
		result.setProblemStarts(0);
561
		result.setProblemEnds(strReference.length());
562
		return result;
563
	}
564

    
565
	private void makeUnparsableRefTitle(INomenclaturalReference result, String reference){
566
		result.setTitleCache(reference,true);
567
		result.setAbbrevTitleCache(reference,true);
568
		result.addParsingProblem(ParserProblem.UnparsableReferenceTitle);
569
	}
570

    
571
	/**
572
	 * Parses a single date string. If the string is not parsable a StringNotParsableException is thrown
573
	 * @param singleDateString
574
	 * @return
575
	 * @throws StringNotParsableException
576
	 */
577
	private static Partial parseSingleDate(String singleDateString)
578
			throws StringNotParsableException{
579
		Partial dt = new Partial();
580
		if (CdmUtils.isNumeric(singleDateString)){
581
			try {
582
				Integer year = Integer.valueOf(singleDateString.trim());
583
				if (year > 1750 && year < 2050){
584
					dt = dt.with(DateTimeFieldType.year(), year);
585
				}else{
586
					dt = null;
587
				}
588
			} catch (NumberFormatException e) {
589
				logger.debug("Not a Integer format in getCalendar()");
590
				throw new StringNotParsableException(singleDateString + "is not parsable as a single Date");
591
			}
592
		}
593
		return dt;
594
	}
595

    
596

    
597
	/**
598
	 * Parses the publication date part.
599
	 * @param nomRef
600
	 * @param year
601
	 * @return If the string is not parsable <code>false</code>
602
	 * is returned. <code>True</code> otherwise
603
	 */
604
	private boolean makeYear(INomenclaturalReference nomRef, String year){
605
		boolean result = true;
606
		if (year == null){
607
			return false;
608
		}
609
		if ("".equals(year.trim())){
610
			return true;
611
		}
612
		TimePeriod datePublished = TimePeriodParser.parseString(year);
613

    
614
		if (nomRef.getType().equals(ReferenceType.BookSection)){
615
			handleBookSectionYear((IBookSection)nomRef, datePublished);
616
		}else if (nomRef instanceof Reference){
617
			((Reference)nomRef).setDatePublished(datePublished);
618
		}else{
619
			throw new ClassCastException("nom Ref is not of type Reference but " + (nomRef == null? "(null)" : nomRef.getClass()));
620
		}
621
		return result;
622
	}
623

    
624
	private String makeVolume(IVolumeReference nomRef, String strReference){
625
		//volume
626
		String volPart = null;
627
		String pVolPhrase = volumeSeparator +  volume + end;
628
		Matcher volPhraseMatcher = getMatcher(pVolPhrase, strReference);
629
		if (volPhraseMatcher.find()){
630
			volPart = volPhraseMatcher.group(0);
631
			strReference = strReference.substring(0, strReference.length() - volPart.length());
632
			volPart = volPart.replaceFirst(pStart + volumeSeparator, "").trim();
633
			nomRef.setVolume(volPart);
634
		}
635
		return strReference;
636
	}
637

    
638
	private String makeEdition(IBook book, String strReference){
639
		//volume
640
		String editionPart = null;
641
		Matcher editionPhraseMatcher = getMatcher(pEditionPart, strReference);
642

    
643
		Matcher editionVolumeMatcher = getMatcher(pEditionVolPart, strReference);
644
		boolean isEditionAndVol = editionVolumeMatcher.find();
645

    
646
		if (editionPhraseMatcher.find()){
647
			editionPart = editionPhraseMatcher.group(0);
648
			int pos = strReference.indexOf(editionPart);
649
			int posEnd = pos + editionPart.length();
650
			if (isEditionAndVol){
651
				posEnd++;  //delete also comma
652
			}
653
			strReference = strReference.substring(0, pos) + strReference.substring(posEnd);
654
			editionPart = editionPart.replaceFirst(pStart + editionSeparator, "").trim();
655
			book.setEdition(editionPart);
656
		}
657
		return strReference;
658
	}
659

    
660
	private IBook parseBook(String reference){
661
		IBook result = ReferenceFactory.newBook();
662
		reference = makeEdition(result, reference);
663
		reference = makeVolume(result, reference);
664
		result.setTitle(reference);
665
		return result;
666
	}
667

    
668

    
669
	private Reference<?> parseArticle(String reference){
670
		//if (articlePatter)
671
		//(type, author, title, volume, editor, series;
672
		Reference<?> result = ReferenceFactory.newArticle();
673
		reference = makeVolume(result, reference);
674
		Reference<?> inJournal = ReferenceFactory.newJournal();
675
		inJournal.setTitle(reference);
676
		result.setInReference(inJournal);
677
		return result;
678
	}
679

    
680
	private Reference<?> parseBookSection(String reference){
681
		Reference<?> result = ReferenceFactory.newBookSection();
682

    
683
		Pattern authorPattern = Pattern.compile("^" + authorTeam + referenceAuthorSeparator);
684
		Matcher authorMatcher = authorPattern.matcher(reference);
685
		boolean find = authorMatcher.find();
686
		if (find){
687
			String authorString = authorMatcher.group(0).trim();
688
			String bookString = reference.substring(authorString.length()).trim();
689
			authorString = authorString.substring(0, authorString.length() -1);
690

    
691
			TeamOrPersonBase<?> authorTeam = author(authorString);
692
			IBook inBook = parseBook(bookString);
693
			inBook.setAuthorship(authorTeam);
694
			result.setInBook(inBook);
695
		}else{
696
			logger.warn("Unexpected non matching book section author part");
697
			//TODO do we want to record a 'problem' here?
698
			result.setTitleCache(reference,true);
699
		}
700

    
701
		return result;
702
	}
703

    
704
	/**
705
	 * If the publication date of a book section and it's inBook do differ this is usually
706
	 * caused by the fact that a book has been published during a period, because originally
707
	 * it consisted of several parts that only later where put together to one book.
708
	 * If so, the book section's publication date may be a point in time (year or month of year)
709
	 * whereas the books publication date may be a period of several years.
710
	 * Therefore a valid nomenclatural reference string should use the book sections
711
	 * publication date rather then the book's publication date.<BR>
712
	 * This method in general adds the publication date to the book section.
713
	 * An exception exists if the publication date is a period. Then the parser
714
	 * assumes that the nomenclatural reference string does not follow the above rule but
715
	 * the books publication date is set.
716
	 * @param bookSection
717
	 * @param datePublished
718
	 */
719
	private void handleBookSectionYear(IBookSection bookSection, TimePeriod datePublished){
720
		if (datePublished == null || datePublished.getStart() == null || bookSection == null){
721
			return;
722
		}
723
		if (datePublished.isPeriod() && bookSection.getInBook() != null){
724
			bookSection.getInBook().setDatePublished(datePublished);
725
		}else{
726
			bookSection.setDatePublished(datePublished);
727
		}
728
	}
729

    
730
	@Override
731
    public NonViralName parseFullName(String fullNameString){
732
		return parseFullName(fullNameString, null, null);
733
	}
734

    
735
	@Override
736
    public NonViralName parseFullName(String fullNameString, NomenclaturalCode nomCode, Rank rank) {
737

    
738
		if (fullNameString == null){
739
			return null;
740
		}else{
741
			NonViralName<?> result = getNonViralNameInstance(fullNameString, nomCode, rank);
742
			parseFullName(result, fullNameString, rank, false);
743
			return result;
744
		}
745
	}
746

    
747
	@Override
748
	public void parseFullName(NonViralName nameToBeFilledOrig, String fullNameStringOrig, Rank rank, boolean makeEmpty) {
749
	    NonViralName<?> nameToBeFilled = nameToBeFilledOrig;
750

    
751
	    //TODO prol. etc.
752
		boolean hasCheckRankProblem = false; //was rank guessed in a previous parsing process?
753
		if (nameToBeFilled == null){
754
			throw new IllegalArgumentException("NameToBeFilled must not be null in name parser");
755
		}else{
756
			hasCheckRankProblem = nameToBeFilled.hasProblem(ParserProblem.CheckRank);
757
			nameToBeFilled.removeParsingProblem(ParserProblem.CheckRank);
758
		}
759
		String authorString = null;
760
		if (fullNameStringOrig == null){
761
			return;
762
		}
763
		if (makeEmpty){
764
			makeEmpty(nameToBeFilled);
765
		}
766

    
767
		String fullNameString = fullNameStringOrig.replaceAll(oWs , " ").trim();
768

    
769
		fullNameString = removeHybridBlanks(fullNameString);
770
		String[] epi = pattern.split(fullNameString);
771
		try {
772
	    	//cultivars //TODO 2 implement cultivars
773
//		    if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
774
//		    	result = parseCultivar(fullName);
775
//		    }
776

    
777
		    if (genusOrSupraGenusPattern.matcher(fullNameString).matches()){
778
		    	//supraGeneric
779
				if (rank != null && ! hasCheckRankProblem  && (rank.isSupraGeneric()|| rank.isGenus())){
780
					nameToBeFilled.setRank(rank);
781
					nameToBeFilled.setGenusOrUninomial(epi[0]);
782
				}
783
				//genus or guess rank
784
				else {
785
					rank = guessUninomialRank(nameToBeFilled, epi[0]);
786
					nameToBeFilled.setRank(rank);
787
					nameToBeFilled.setGenusOrUninomial(epi[0]);
788
					nameToBeFilled.addParsingProblem(ParserProblem.CheckRank);
789
					nameToBeFilled.setProblemStarts(0);
790
					nameToBeFilled.setProblemEnds(epi[0].length());
791
				}
792
				authorString = fullNameString.substring(epi[0].length());
793
			}
794
			 //infra genus
795
			 else if (infraGenusPattern.matcher(fullNameString).matches()){
796
				Rank infraGenericRank;
797

    
798

    
799
				if ("[unranked]".equals(epi[1])){
800
					infraGenericRank = Rank.INFRAGENERICTAXON();
801
				}else{
802
					infraGenericRank = Rank.getRankByIdInVoc(epi[1], nameToBeFilledOrig.getNomenclaturalCode());
803
				}
804
				nameToBeFilled.setRank(infraGenericRank);
805
				nameToBeFilled.setGenusOrUninomial(epi[0]);
806
				nameToBeFilled.setInfraGenericEpithet(epi[2]);
807
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length()+ 1 + epi[2].length());
808
			}
809
			 //aggr. or group
810
			 else if (aggrOrGroupPattern.matcher(fullNameString).matches()){
811
				nameToBeFilled.setRank(Rank.getRankByIdInVoc(epi[2]));
812
				nameToBeFilled.setGenusOrUninomial(epi[0]);
813
				nameToBeFilled.setSpecificEpithet(epi[1]);
814
			}
815
			 //species
816
			 else if (speciesPattern.matcher(fullNameString).matches()){
817
				nameToBeFilled.setRank(Rank.SPECIES());
818
				nameToBeFilled.setGenusOrUninomial(epi[0]);
819
				nameToBeFilled.setSpecificEpithet(epi[1]);
820
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length());
821
			}
822
			 //autonym
823
			 else if (autonymPattern.matcher(fullNameString).matches()){
824
				nameToBeFilled.setRank(Rank.getRankByIdInVoc(epi[epi.length - 2]));
825
				nameToBeFilled.setGenusOrUninomial(epi[0]);
826
				nameToBeFilled.setSpecificEpithet(epi[1]);
827
				nameToBeFilled.setInfraSpecificEpithet(epi[epi.length - 1]);
828
				int lenSpecies = 2 + epi[0].length()+epi[1].length();
829
				int lenInfraSpecies =  2 + epi[epi.length - 2].length() + epi[epi.length - 1].length();
830
				authorString = fullNameString.substring(lenSpecies, fullNameString.length() - lenInfraSpecies);
831
			}
832
			 //infraSpecies
833
			 else if (infraSpeciesPattern.matcher(fullNameString).matches()){
834
				String infraSpecRankMarker = epi[2];
835
				String infraSpecEpi = epi[3];
836
				if ("tax.".equals(infraSpecRankMarker)){
837
					infraSpecRankMarker += " " +  epi[3];
838
					infraSpecEpi = epi[4];
839
				}
840
				Rank infraSpecificRank;
841
				if ("[unranked]".equals(infraSpecRankMarker)){
842
					infraSpecificRank = Rank.INFRASPECIFICTAXON();
843
				}else{
844
					infraSpecificRank = Rank.getRankByIdInVoc(infraSpecRankMarker);
845
				}
846
				nameToBeFilled.setRank(infraSpecificRank);
847
				nameToBeFilled.setGenusOrUninomial(epi[0]);
848
				nameToBeFilled.setSpecificEpithet(epi[1]);
849
				nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
850
				authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecRankMarker.length() + 1 + infraSpecEpi.length());
851

    
852
			 }
853
		      //infraSpecies without marker
854
			 else if (zooInfraSpeciesPattern.matcher(fullNameString).matches()){
855
					String infraSpecEpi = epi[2];
856
					Rank infraSpecificRank = Rank.SUBSPECIES();
857
					nameToBeFilled.setRank(infraSpecificRank);
858
					nameToBeFilled.setGenusOrUninomial(epi[0]);
859
					nameToBeFilled.setSpecificEpithet(epi[1]);
860
					nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
861
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecEpi.length());
862

    
863
			 }//old infraSpecies
864
			 else if (oldInfraSpeciesPattern.matcher(fullNameString).matches()){
865
				boolean implemented = false;
866
				if (implemented){
867
					nameToBeFilled.setRank(Rank.getRankByNameOrIdInVoc(epi[2]));
868
					nameToBeFilled.setGenusOrUninomial(epi[0]);
869
					nameToBeFilled.setSpecificEpithet(epi[1]);
870
					//TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
871
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + epi[2].length() + 1 + epi[3].length());
872
				}else{
873
					nameToBeFilled.addParsingProblem(ParserProblem.OldInfraSpeciesNotSupported);
874
					nameToBeFilled.setTitleCache(fullNameString,true);
875
					// FIXME Quick fix, otherwise search would not deilver results for unparsable names
876
					nameToBeFilled.setNameCache(fullNameString,true);
877
					// END
878
					logger.info("Name string " + fullNameString + " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
879
				}
880
			}
881
		     //hybrid formula
882
			 else if (hybridFormulaPattern.matcher(fullNameString).matches()){
883
				 Set<HybridRelationship> existingRelations = new HashSet<HybridRelationship>();
884
				 Set<HybridRelationship> notToBeDeleted = new HashSet<HybridRelationship>();
885

    
886
				 for ( HybridRelationship rel : nameToBeFilled.getHybridChildRelations()){
887
				     existingRelations.add(rel);
888
				 }
889

    
890
			     String firstNameString = "";
891
				 String secondNameString = "";
892
				 boolean isFirstName = true;
893
				 for (String str : epi){
894
					 if (str.matches(hybridSign)){
895
						 isFirstName = false;
896
					 }else if(isFirstName){
897
						 firstNameString += " " + str;
898
					 }else {
899
						 secondNameString += " " + str;
900
					 }
901
				 }
902
				 nameToBeFilled.setHybridFormula(true);
903
				 NomenclaturalCode code = nameToBeFilled.getNomenclaturalCode();
904
				 NonViralName<?> firstName = this.parseFullName(firstNameString.trim(), code, rank);
905
				 NonViralName<?> secondName = this.parseFullName(secondNameString.trim(), code, rank);
906
				 HybridRelationship firstRel = nameToBeFilled.addHybridParent(firstName, HybridRelationshipType.FIRST_PARENT(), null);
907
				 HybridRelationship second = nameToBeFilled.addHybridParent(secondName, HybridRelationshipType.SECOND_PARENT(), null);
908
				 checkRelationExist(firstRel, existingRelations, notToBeDeleted);
909
				 checkRelationExist(second, existingRelations, notToBeDeleted);
910

    
911
				 Rank newRank;
912
				 Rank firstRank = firstName.getRank();
913
				 Rank secondRank = secondName.getRank();
914

    
915
				 if (firstRank == null || firstRank.isHigher(secondRank)){
916
					 newRank = secondRank;
917
				 }else{
918
					 newRank = firstRank;
919
				 }
920
				 nameToBeFilled.setRank(newRank);
921
				 //remove not existing hybrid relation
922
				 if (makeEmpty){
923
		            Set<HybridRelationship> tmpChildRels = new HashSet<HybridRelationship>();
924
		            tmpChildRels.addAll(nameToBeFilled.getHybridChildRelations());
925
		            for (HybridRelationship rel : tmpChildRels){
926
		                if (! notToBeDeleted.contains(rel)){
927
		                    nameToBeFilled.removeHybridRelationship(rel);
928
		                }
929
		            }
930
				 }
931
			 }
932
		    //none
933
			else{
934
				nameToBeFilled.addParsingProblem(ParserProblem.UnparsableNamePart);
935
				nameToBeFilled.setTitleCache(fullNameString,true);
936
				// FIXME Quick fix, otherwise search would not deilver results for unparsable names
937
				nameToBeFilled.setNameCache(fullNameString,true);
938
				// END
939
				logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
940
		    }
941
		    //hybrid bits
942
		    handleHybridBits(nameToBeFilled);
943
		    if (!nameToBeFilled.isHybridFormula()){
944
		        Set<HybridRelationship> hybridChildRelations = new HashSet<HybridRelationship>();
945
		        hybridChildRelations.addAll(nameToBeFilled.getHybridChildRelations());
946

    
947
		        for (HybridRelationship hybridRelationship: hybridChildRelations){
948
		        	nameToBeFilled.removeHybridRelationship(hybridRelationship);
949
		        }
950
		    }
951

    
952
			//authors
953
		    if (StringUtils.isNotBlank(authorString) ){
954
				handleAuthors(nameToBeFilled, fullNameString, authorString);
955
			}
956
		    return;
957
		} catch (UnknownCdmTypeException e) {
958
			nameToBeFilled.addParsingProblem(ParserProblem.RankNotSupported);
959
			nameToBeFilled.setTitleCache(fullNameString,true);
960
			// FIXME Quick fix, otherwise search would not deilver results for unparsable names
961
			nameToBeFilled.setNameCache(fullNameString,true);
962
			// END
963
			logger.info("unknown rank (" + (rank == null? "null":rank) + ") or abbreviation in string " +  fullNameString);
964
			//return result;
965
			return;
966
		}
967
	}
968

    
969
	/**
970
     * Checks if a hybrid relation exists in the Set of existing relations
971
     * and <BR>
972
     *  if it does not adds it to relations not to be deleted <BR>
973
     *  if it does adds the existing relations to the relations not to be deleted
974
     *
975
     * @param firstRel
976
     * @param existingRelations
977
     * @param notToBeDeleted
978
     */
979
    private void checkRelationExist(
980
            HybridRelationship newRelation,
981
            Set<HybridRelationship> existingRelations,
982
            Set<HybridRelationship> notToBeDeleted) {
983
        HybridRelationship relToKeep = newRelation;
984
        for (HybridRelationship existingRelation : existingRelations){
985
            if (existingRelation.equals(newRelation)){
986
                relToKeep = existingRelation;
987
                break;
988
            }
989
        }
990
        notToBeDeleted.add(relToKeep);
991
    }
992

    
993
    private void handleHybridBits(NonViralName<?> nameToBeFilled) {
994
		//uninomial
995
		String uninomial = CdmUtils.Nz(nameToBeFilled.getGenusOrUninomial());
996
		boolean isUninomialHybrid = uninomial.startsWith(hybridSign);
997
		if (isUninomialHybrid){
998
			nameToBeFilled.setMonomHybrid(true);
999
			nameToBeFilled.setGenusOrUninomial(uninomial.replace(hybridSign, ""));
1000
		}
1001
		//infrageneric
1002
		String infrageneric = CdmUtils.Nz(nameToBeFilled.getInfraGenericEpithet());
1003
		boolean isInfraGenericHybrid = infrageneric.startsWith(hybridSign);
1004
		if (isInfraGenericHybrid){
1005
			nameToBeFilled.setBinomHybrid(true);
1006
			nameToBeFilled.setInfraGenericEpithet(infrageneric.replace(hybridSign, ""));
1007
		}
1008
		//species Epi
1009
		String speciesEpi = CdmUtils.Nz(nameToBeFilled.getSpecificEpithet());
1010
		boolean isSpeciesHybrid = speciesEpi.startsWith(hybridSign);
1011
		if (isSpeciesHybrid){
1012
			if (StringUtils.isBlank(infrageneric)){
1013
				nameToBeFilled.setBinomHybrid(true);
1014
			}else{
1015
				nameToBeFilled.setTrinomHybrid(true);
1016
			}
1017
			nameToBeFilled.setSpecificEpithet(speciesEpi.replace(hybridSign, ""));
1018
		}
1019
		//infra species
1020
		String infraSpeciesEpi = CdmUtils.Nz(nameToBeFilled.getInfraSpecificEpithet());
1021
		boolean isInfraSpeciesHybrid = infraSpeciesEpi.startsWith(hybridSign);
1022
		if (isInfraSpeciesHybrid){
1023
			nameToBeFilled.setTrinomHybrid(true);
1024
			nameToBeFilled.setInfraSpecificEpithet(infraSpeciesEpi.replace(hybridSign, ""));
1025
		}
1026

    
1027
	}
1028

    
1029
	private String removeHybridBlanks(String fullNameString) {
1030
		String result = fullNameString
1031
		        .replaceAll(oWs + "[xX]" + oWs + "(?=[A-Z])", " " + hybridSign + " ")
1032
		        .replaceAll(hybridFull, " "+hybridSign).trim();
1033
		return result;
1034
	}
1035

    
1036
	/**
1037
	 * Author parser for external use
1038
	 * @param nonViralName
1039
	 * @param authorString
1040
	 * @throws StringNotParsableException
1041
	 */
1042
	@Override
1043
	public void parseAuthors(NonViralName nonViralNameOrig, String authorString) throws StringNotParsableException{
1044
	    NonViralName<?> nonViralName = nonViralNameOrig;
1045
	    TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
1046
		Integer[] years = new Integer[4];
1047
		Class<? extends NonViralName> clazz = nonViralName.getClass();
1048
		fullAuthors(authorString, authors, years, clazz);
1049
		nonViralName.setCombinationAuthorship(authors[0]);
1050
		nonViralName.setExCombinationAuthorship(authors[1]);
1051
		nonViralName.setBasionymAuthorship(authors[2]);
1052
		nonViralName.setExBasionymAuthorship(authors[3]);
1053
		if (nonViralName instanceof ZoologicalName){
1054
			ZoologicalName zooName = CdmBase.deproxy(nonViralName, ZoologicalName.class);
1055
			zooName.setPublicationYear(years[0]);
1056
			zooName.setOriginalPublicationYear(years[2]);
1057
		}
1058
	}
1059

    
1060
	/**
1061
	 * @param nameToBeFilled
1062
	 * @param fullNameString
1063
	 * @param authorString
1064
	 */
1065
	public void handleAuthors(NonViralName nameToBeFilledOrig, String fullNameString, String authorString) {
1066
	    NonViralName<?> nameToBeFilled = nameToBeFilledOrig;
1067
        TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
1068
		Integer[] years = new Integer[4];
1069
		try {
1070
			Class<? extends NonViralName> clazz = nameToBeFilled.getClass();
1071
			fullAuthors(authorString, authors, years, clazz);
1072
		} catch (StringNotParsableException e) {
1073
			nameToBeFilled.addParsingProblem(ParserProblem.UnparsableAuthorPart);
1074
			nameToBeFilled.setTitleCache(fullNameString,true);
1075
			// FIXME Quick fix, otherwise search would not deliver results for unparsable names
1076
			nameToBeFilled.setNameCache(fullNameString,true);
1077
			// END
1078
			logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");;
1079
		}
1080
		nameToBeFilled.setCombinationAuthorship(authors[0]);
1081
		nameToBeFilled.setExCombinationAuthorship(authors[1]);
1082
		nameToBeFilled.setBasionymAuthorship(authors[2]);
1083
		nameToBeFilled.setExBasionymAuthorship(authors[3]);
1084
		if (nameToBeFilled instanceof ZoologicalName){
1085
			ZoologicalName zooName = (ZoologicalName)nameToBeFilled;
1086
			zooName.setPublicationYear(years[0]);
1087
			zooName.setOriginalPublicationYear(years[2]);
1088
		}
1089
	}
1090

    
1091
	/**
1092
	 * Guesses the rank of uninomial depending on the typical endings for ranks
1093
	 * @param nameToBeFilled
1094
	 * @param string
1095
	 */
1096
	private Rank guessUninomialRank(NonViralName nameToBeFilled, String uninomial) {
1097
		Rank result = Rank.GENUS();
1098
		if (nameToBeFilled.isInstanceOf(BotanicalName.class)){
1099
			if (false){
1100
				//
1101
			}else if (uninomial.endsWith("phyta") || uninomial.endsWith("mycota") ){  //plants, fungi
1102
				result = Rank.SECTION_BOTANY();
1103
			}else if (uninomial.endsWith("bionta")){
1104
				result = Rank.SUBKINGDOM();  //TODO
1105
			}else if (uninomial.endsWith("phytina")|| uninomial.endsWith("mycotina")  ){  //plants, fungi
1106
				result = Rank.SUBSECTION_BOTANY();
1107
			}else if (uninomial.endsWith("opsida") || uninomial.endsWith("phyceae") || uninomial.endsWith("mycetes")){  //plants, algae, fungi
1108
				result = Rank.CLASS();
1109
			}else if (uninomial.endsWith("idae") || uninomial.endsWith("phycidae") || uninomial.endsWith("mycetidae")){ //plants, algae, fungi
1110
				result = Rank.SUBCLASS();
1111
			}else if (uninomial.endsWith("ales")){
1112
				result = Rank.ORDER();
1113
			}else if (uninomial.endsWith("ineae")){
1114
				result = Rank.SUBORDER();
1115
			}else if (uninomial.endsWith("aceae")){
1116
					result = Rank.FAMILY();
1117
			}else if (uninomial.endsWith("oideae")){
1118
				result = Rank.SUBFAMILY();
1119
			}else if (uninomial.endsWith("eae")){
1120
				result = Rank.TRIBE();
1121
			}else if (uninomial.endsWith("inae")){
1122
				result = Rank.SUBTRIBE();
1123
			}else if (uninomial.endsWith("ota")){
1124
				result = Rank.KINGDOM();  //TODO
1125
			}
1126
		}else if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
1127
			if (false){
1128
				//
1129
			}else if (uninomial.endsWith("oideae")){
1130
				result = Rank.SUPERFAMILY();
1131
			}else if (uninomial.endsWith("idae")){
1132
					result = Rank.FAMILY();
1133
			}else if (uninomial.endsWith("inae")){
1134
				result = Rank.SUBFAMILY();
1135
			}else if (uninomial.endsWith("inae")){
1136
				result = Rank.SUBFAMILY();
1137
			}else if (uninomial.endsWith("ini")){
1138
				result = Rank.TRIBE();
1139
			}else if (uninomial.endsWith("ina")){
1140
				result = Rank.SUBTRIBE();
1141
			}
1142
		}else{
1143
			//
1144
		}
1145
		return result;
1146
	}
1147

    
1148
	/**
1149
	 * Parses the fullAuthorString
1150
	 * @param fullAuthorString
1151
	 * @return array of Teams containing the Team[0],
1152
	 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
1153
	 */
1154
	protected void fullAuthors (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years, Class<? extends NonViralName> clazz)
1155
			throws StringNotParsableException{
1156
		if (fullAuthorString == null || clazz == null){
1157
			return;
1158
		}
1159
		fullAuthorString = fullAuthorString.trim();
1160

    
1161
		//Botanic
1162
		if ( BotanicalName.class.isAssignableFrom(clazz) ){
1163
			if (! fullBotanicAuthorStringPattern.matcher(fullAuthorString).matches() ){
1164
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1165
			}
1166
		}
1167
		//Zoo
1168
		else if ( ZoologicalName.class.isAssignableFrom(clazz) ){
1169
			if (! fullZooAuthorStringPattern.matcher(fullAuthorString).matches() ){
1170
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1171
			}
1172
		}else {
1173
			//TODO
1174
			logger.warn ("Full author String parsable only for defined BotanicalNames or ZoologicalNames but this is " + clazz.getSimpleName());
1175
			throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1176
		}
1177
		fullAuthorsChecked(fullAuthorString, authors, years);
1178
	}
1179

    
1180
	/*
1181
	 * like fullTeams but without trim and match check
1182
	 */
1183
	protected void fullAuthorsChecked (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years){
1184
		int authorShipStart = 0;
1185
		Matcher basionymMatcher = basionymPattern.matcher(fullAuthorString);
1186

    
1187
		if (basionymMatcher.find(0)){
1188

    
1189
			String basString = basionymMatcher.group();
1190
			basString = basString.replaceFirst(basStart, "");
1191
			basString = basString.replaceAll(basEnd, "").trim();
1192
			authorShipStart = basionymMatcher.end(1) + 1;
1193

    
1194
			TeamOrPersonBase<?>[] basAuthors = new TeamOrPersonBase[2];
1195
			Integer[] basYears = new Integer[2];
1196
			authorsAndEx(basString, basAuthors, basYears);
1197
			authors[2]= basAuthors[0];
1198
			years[2] = basYears[0];
1199
			authors[3]= basAuthors[1];
1200
			years[3] = basYears[1];
1201
		}
1202
		if (fullAuthorString.length() >= authorShipStart){
1203
			TeamOrPersonBase<?>[] combinationAuthors = new TeamOrPersonBase[2];;
1204
			Integer[] combinationYears = new Integer[2];
1205
			authorsAndEx(fullAuthorString.substring(authorShipStart), combinationAuthors, combinationYears);
1206
			authors[0]= combinationAuthors[0] ;
1207
			years[0] = combinationYears[0];
1208
			authors[1]= combinationAuthors[1];
1209
			years[1] = combinationYears[1];
1210
		}
1211
	}
1212

    
1213

    
1214
	/**
1215
	 * Parses the author and ex-author String
1216
	 * @param authorShipStringOrig String representing the author and the ex-author team
1217
	 * @return array of Teams containing the Team[0] and the ExTeam[1]
1218
	 */
1219
	protected void authorsAndEx (String authorShipStringOrig, TeamOrPersonBase<?>[] authors, Integer[] years){
1220
		//TODO noch allgemeiner am anfang durch Replace etc.
1221
		String authorShipString = authorShipStringOrig.trim();
1222
		authorShipString = authorShipString.replaceFirst(oWs + "ex" + oWs, " ex. " );
1223

    
1224
		//int authorEnd = authorTeamString.length();
1225
		int authorBegin = 0;
1226

    
1227
		Matcher exAuthorMatcher = exAuthorPattern.matcher(authorShipString);
1228
		if (exAuthorMatcher.find(0)){
1229
			authorBegin = exAuthorMatcher.end(0);
1230
			int exAuthorEnd = exAuthorMatcher.start(0);
1231
			String exString = authorShipString.substring(0, exAuthorEnd).trim();
1232
			authors [1] = author(exString);
1233
		}
1234
		zooOrBotanicAuthor(authorShipString.substring(authorBegin), authors, years );
1235
	}
1236

    
1237
	/**
1238
	 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
1239
	 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies
1240
	 * to the year in case of an zoological name.
1241
	 * @param authorString
1242
	 * @param team
1243
	 * @param year
1244
	 */
1245
	protected void zooOrBotanicAuthor(String authorString, TeamOrPersonBase<?>[] team, Integer[] year){
1246
		if (authorString == null){
1247
			return;
1248
		}else if ((authorString = authorString.trim()).length() == 0){
1249
			return;
1250
		}
1251
		Matcher zooAuthorAddidtionMatcher = zooAuthorAddidtionPattern.matcher(authorString);
1252
		if (zooAuthorAddidtionMatcher.find()){
1253
			int index = zooAuthorAddidtionMatcher.start(0);
1254
			String strYear = authorString.substring(index);
1255
			strYear = strYear.replaceAll(zooAuthorYearSeperator, "").trim();
1256
			year[0] = Integer.valueOf(strYear);
1257
			authorString = authorString.substring(0, index).trim();
1258
		}
1259
		team[0] = author(authorString);
1260
	}
1261

    
1262

    
1263
	/**
1264
	 * Parses an authorTeam String and returns the Team
1265
	 * !!! TODO (atomization not yet implemented)
1266
	 * @param authorTeamString String representing the author team
1267
	 * @return an Team
1268
	 */
1269
	protected TeamOrPersonBase<?> author (String authorString){
1270
		if (authorString == null){
1271
			return null;
1272
		}else if ((authorString = authorString.trim()).length() == 0){
1273
			return null;
1274
		}else if (! finalTeamSplitterPattern.matcher(authorString).find() && ! authorIsAlwaysTeam){
1275
			//1 Person
1276
			Person result = Person.NewInstance();
1277
			result.setNomenclaturalTitle(authorString);
1278
			return result;
1279
		}else{
1280
			return parsedTeam(authorString);
1281
		}
1282

    
1283
	}
1284

    
1285
	/**
1286
	 * Parses an authorString (reprsenting a team into the single authors and add
1287
	 * them to the return Team.
1288
	 * @param authorString
1289
	 * @return Team
1290
	 */
1291
	protected Team parsedTeam(String authorString){
1292
		Team result = Team.NewInstance();
1293
		String[] authors = authorString.split(notFinalTeamSplitter);
1294
		for (int i = 0; i < authors.length; i++){
1295
		    String author = authors[i];
1296
		    if ("al.".equals(author.trim()) && i == authors.length - 1){  //final al. is handled as hasMoreMembers
1297
			    result.setHasMoreMembers(true);
1298
			}else{
1299
			    Person person = Person.NewInstance();
1300
			    person.setNomenclaturalTitle(author);
1301
			    result.addTeamMember(person);
1302
			}
1303
		}
1304
		return result;
1305
	}
1306

    
1307

    
1308
	//Parsing of the given full name that has been identified as hybrid already somewhere else.
1309
	private void parseHybrid(NonViralName nameToBeFilled, String fullNameString, Rank rank, boolean makeEmpty){
1310
	    logger.warn("parseHybrid --> function not yet implemented");
1311

    
1312
//	    String nonHybridName  = fullNameString;
1313
//	    boolean isMonomHybrid = isMonomHybrid(fullNameString);
1314
//	    if (isMonomHybrid){
1315
//	    	nonHybridName.replaceAll(hybrid, "");
1316
//	    }
1317
//
1318
//	    String[] split = nonHybridName.split("\\s");
1319
//	    parseFullName(nameToBeFilled, nonHybridName, rank, makeEmpty);
1320
//
1321
//	    nonHybridName = nonHybridName.replaceAll(hybrid, " ");
1322
//
1323
//	    boolean isBinomHybrid = isBinomHybrid(split);
1324
////	    boolean isTrinomHybrid = isTrinomHybrid(split);
1325
//
1326
//	    nonHybridName = nonHybridName.replaceAll(hybrid, " ");
1327
//
1328
//	    parseFullName(nameToBeFilled, nonHybridName, rank, makeEmpty);
1329
//	    nameToBeFilled.getTitleCache();
1330
//	    nameToBeFilled.setMonomHybrid(isMonomHybrid);
1331
//	    nameToBeFilled.setBinomHybrid(isBinomHybrid);
1332
//	    nameToBeFilled.setBinomHybrid(isTrinomHybrid);
1333

    
1334
	    nameToBeFilled.setTitleCache(fullNameString,true);
1335
	    return;
1336
    }
1337

    
1338
//	private boolean isBinomHybrid(String[] split) {
1339
//		if (){
1340
//
1341
//		}
1342
//		return false;
1343
//	}
1344

    
1345
	private boolean isMonomHybrid(String fullNameString) {
1346
		Matcher matcher = hybridPattern.matcher(fullNameString);
1347
		boolean find = matcher.find();
1348
		int start = matcher.start();
1349
		if (find == true && start == 0){
1350
			return true;
1351
		}else{
1352
			return false;
1353
		}
1354
	}
1355

    
1356
//	// Parsing of the given full name that has been identified as a cultivar already somwhere else.
1357
//	// The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
1358
	public BotanicalName parseCultivar(String fullName)	throws StringNotParsableException{
1359
		CultivarPlantName result = null;
1360
		    String[] words = oWsPattern.split(fullName);
1361

    
1362
		    /* ---------------------------------------------------------------------------------
1363
		     * cultivar
1364
		     * ---------------------------------------------------------------------------------*/
1365
			if (fullName.indexOf(" '") != 0){
1366
				//TODO location of 'xx' is probably not arbitrary
1367
				Matcher cultivarMatcher = cultivarPattern.matcher(fullName);
1368
				if (cultivarMatcher.find()){
1369
					String namePart = fullName.replaceFirst(cultivar, "");
1370

    
1371
					String cultivarPart = cultivarMatcher.group(0).replace("'","").trim();
1372
					//OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
1373

    
1374
					result = (CultivarPlantName)parseFullName(namePart);
1375
					result.setCultivarName(cultivarPart);
1376
				}
1377
			}else if (fullName.indexOf(" cv.") != 0){
1378
				// cv. is old form (not official)
1379
				throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
1380
			}
1381

    
1382
		    /* ---------------------------------------------------------------------------------
1383
		     * cultivar group
1384
		     * ---------------------------------------------------------------------------------
1385
		     */
1386
			// TODO in work
1387
			//Ann. this is not the official way of noting cultivar groups
1388
		    String group = oWs + "Group" + oWs + capitalEpiWord + end;
1389
			Pattern groupRE = Pattern.compile(group);
1390
			Matcher groupMatcher = groupRE.matcher(fullName);
1391
			if (groupMatcher.find()){
1392
		    	if (! words[words.length - 2].equals("group")){
1393
		            throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName + " should be 'group'");
1394
		        }else{
1395

    
1396
		        	String namePart = fullName.substring(0, groupMatcher.start(0) - 0);
1397
		        	//OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
1398

    
1399
		        	String cultivarPart = words[words.length -1];
1400
		        	result = (CultivarPlantName)parseFullName(namePart);
1401
		        	if (result != null){
1402
		        		result.setCultivarName(cultivarPart);
1403

    
1404
		        		//OLD: result.setCultivarGroupName(cultivarPart);
1405
		        	}
1406
		        }
1407

    
1408
		    }
1409
//		    // ---------------------------------------------------------------------------------
1410
//		    if ( result = "" ){
1411
//		        return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
1412
//		    }else{
1413
//		        return result;
1414
	//	    }
1415
			return result; //TODO
1416
	}
1417

    
1418

    
1419
	private void makeEmpty(NonViralName<?> nameToBeFilled){
1420
		nameToBeFilled.setRank(null);
1421
		nameToBeFilled.setTitleCache(null, false);
1422
		nameToBeFilled.setFullTitleCache(null, false);
1423
		nameToBeFilled.setNameCache(null, false);
1424

    
1425
		nameToBeFilled.setAppendedPhrase(null);
1426
		nameToBeFilled.setBasionymAuthorship(null);
1427
		nameToBeFilled.setCombinationAuthorship(null);
1428
		nameToBeFilled.setExBasionymAuthorship(null);
1429
		nameToBeFilled.setExCombinationAuthorship(null);
1430
		nameToBeFilled.setAuthorshipCache(null, false);
1431

    
1432

    
1433
		//delete problems except check rank
1434
		makeProblemEmpty(nameToBeFilled);
1435

    
1436
		// TODO ?
1437
		//nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
1438

    
1439

    
1440
		nameToBeFilled.setGenusOrUninomial(null);
1441
		nameToBeFilled.setInfraGenericEpithet(null);
1442
		nameToBeFilled.setSpecificEpithet(null);
1443
		nameToBeFilled.setInfraSpecificEpithet(null);
1444

    
1445
		nameToBeFilled.setNomenclaturalMicroReference(null);
1446
		nameToBeFilled.setNomenclaturalReference(null);
1447

    
1448
		nameToBeFilled.setHybridFormula(false);
1449
		nameToBeFilled.setMonomHybrid(false);
1450
		nameToBeFilled.setBinomHybrid(false);
1451
		nameToBeFilled.setTrinomHybrid(false);
1452

    
1453
		if (nameToBeFilled.isInstanceOf(BotanicalName.class)){
1454
			BotanicalName botanicalName = (BotanicalName)nameToBeFilled;
1455
			botanicalName.setAnamorphic(false);
1456
		}
1457

    
1458
		if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
1459
			ZoologicalName zoologicalName = (ZoologicalName)nameToBeFilled;
1460
			zoologicalName.setBreed(null);
1461
			zoologicalName.setOriginalPublicationYear(null);
1462
		}
1463

    
1464
		//nom status handled in nom status parser, otherwise we loose additional information like reference etc.
1465
		//hybrid relationships handled in hybrid formula and at end of fullNameParser
1466
	}
1467

    
1468

    
1469

    
1470
}
(3-3/8)