Project

General

Profile

Download (50.5 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2009 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/ 
9

    
10
package eu.etaxonomy.cdm.strategy.parser;
11

    
12
import java.util.regex.Matcher;
13
import java.util.regex.Pattern;
14

    
15
import org.apache.commons.lang.StringUtils;
16
import org.apache.log4j.Logger;
17
import org.joda.time.DateTimeFieldType;
18
import org.joda.time.Partial;
19

    
20
import eu.etaxonomy.cdm.common.CdmUtils;
21
import eu.etaxonomy.cdm.model.agent.Person;
22
import eu.etaxonomy.cdm.model.agent.Team;
23
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
24
import eu.etaxonomy.cdm.model.common.CdmBase;
25
import eu.etaxonomy.cdm.model.common.IParsable;
26
import eu.etaxonomy.cdm.model.common.TimePeriod;
27
import eu.etaxonomy.cdm.model.name.BacterialName;
28
import eu.etaxonomy.cdm.model.name.BotanicalName;
29
import eu.etaxonomy.cdm.model.name.CultivarPlantName;
30
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
31
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
32
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
33
import eu.etaxonomy.cdm.model.name.NonViralName;
34
import eu.etaxonomy.cdm.model.name.Rank;
35
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
36
import eu.etaxonomy.cdm.model.name.ZoologicalName;
37
import eu.etaxonomy.cdm.model.reference.IBook;
38
import eu.etaxonomy.cdm.model.reference.IBookSection;
39
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
40
import eu.etaxonomy.cdm.model.reference.IVolumeReference;
41
import eu.etaxonomy.cdm.model.reference.Reference;
42
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
43
import eu.etaxonomy.cdm.model.reference.ReferenceType;
44
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
45
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
46

    
47

    
48
/**
49
 * @author a.mueller
50
 *
51
 */
52
public class NonViralNameParserImpl extends NonViralNameParserImplRegExBase implements INonViralNameParser<NonViralName> {
53
	private static final Logger logger = Logger.getLogger(NonViralNameParserImpl.class);
54
	
55
	// good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
56
	
57
	final static boolean MAKE_EMPTY = true;
58
	final static boolean MAKE_NOT_EMPTY = false;
59
	
60
	private boolean authorIsAlwaysTeam = true;
61
	
62
	public static NonViralNameParserImpl NewInstance(){
63
		return new NonViralNameParserImpl();
64
	}
65

    
66
	/* (non-Javadoc)
67
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericSimpleName(java.lang.String)
68
	 */
69
	public NonViralName parseSimpleName(String simpleName){
70
		return parseSimpleName(simpleName, null, null);
71
	}
72
	
73
	
74
	/* (non-Javadoc)
75
	 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseSimpleName(java.lang.String, eu.etaxonomy.cdm.model.name.NomenclaturalCode, eu.etaxonomy.cdm.model.name.Rank)
76
	 */
77
	public NonViralName parseSimpleName(String simpleName, NomenclaturalCode code, Rank rank){
78
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
79
		return parseFullName(simpleName, code, rank);
80
	}
81

    
82
	public void parseSimpleName(NonViralName nameToBeFilled, String simpleNameString, Rank rank, boolean makeEmpty){
83
		//"parseSimpleName() not yet implemented. Uses parseFullName() instead");
84
		parseFullName(nameToBeFilled, simpleNameString, rank, makeEmpty);
85
	}
86

    
87
	
88
	public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code){
89
		return getNonViralNameInstance(fullString, code, null);
90
	}
91
	
92
	public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code, Rank rank){
93
		NonViralName result = null;
94
		if(code ==null) {
95
			boolean isBotanicalName = anyBotanicFullNamePattern.matcher(fullString).find();
96
			boolean isZoologicalName = anyZooFullNamePattern.matcher(fullString).find();;
97
			boolean isBacteriologicalName = false;
98
			boolean isCultivatedPlantName = false;
99
			if ( (isBotanicalName || isCultivatedPlantName) && ! isZoologicalName && !isBacteriologicalName){
100
				if (isBotanicalName){
101
					result = BotanicalName.NewInstance(rank);
102
				}else{
103
					result = CultivarPlantName.NewInstance(rank);
104
				}
105
			}else if ( isZoologicalName /*&& ! isBotanicalName*/ && !isBacteriologicalName && !isCultivatedPlantName){
106
				result = ZoologicalName.NewInstance(rank);
107
			}else if ( isZoologicalName && ! isBotanicalName && !isBacteriologicalName && !isCultivatedPlantName){
108
				result = BacterialName.NewInstance(rank);
109
			}else {
110
				result =  NonViralName.NewInstance(rank);
111
			}
112
		} else {
113
			switch (code) {
114
			case ICBN:
115
				result = BotanicalName.NewInstance(rank);
116
				break;
117
			case ICZN:
118
				result = ZoologicalName.NewInstance(rank);
119
				break;
120
			case ICNCP:
121
				logger.warn("ICNCP parsing not yet implemented");
122
				result = CultivarPlantName.NewInstance(rank);
123
				break;
124
			case ICNB:
125
				logger.warn("ICNB not yet implemented");
126
				result = BacterialName.NewInstance(rank);
127
				break;
128
			case ICVCN:
129
				logger.error("Viral name is not a NonViralName !!");
130
				break;
131
			default:
132
				// FIXME Unreachable code
133
				logger.error("Unknown Nomenclatural Code !!");
134
			}
135
		}
136
		return result;
137
	}
138
	
139

    
140
	/* (non-Javadoc)
141
	 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseFullReference(java.lang.String)
142
	 */
143
	public NonViralName parseReferencedName(String fullReferenceString) {
144
		return parseReferencedName(fullReferenceString, null, null);
145
	}
146
	
147
	/* (non-Javadoc)
148
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
149
	 */
150
	public NonViralName parseReferencedName(String fullReferenceString, NomenclaturalCode nomCode, Rank rank) {
151
		if (fullReferenceString == null){
152
			return null;
153
		}else{
154
			NonViralName result = getNonViralNameInstance(fullReferenceString, nomCode, rank);
155
			parseReferencedName(result, fullReferenceString, rank, MAKE_EMPTY);
156
			return result;
157
		}
158
	}
159
	
160
	private String standardize(NonViralName nameToBeFilled, String fullReferenceString, boolean makeEmpty){
161
		//Check null and standardize
162
		if (fullReferenceString == null){
163
			//return null;
164
			return null;
165
		}
166
		if (makeEmpty){
167
			makeEmpty(nameToBeFilled);
168
		}
169
		fullReferenceString = fullReferenceString.replaceAll(oWs , " ");
170
		fullReferenceString = fullReferenceString.trim();
171
		if ("".equals(fullReferenceString)){
172
			fullReferenceString = null;
173
		}
174
		return fullReferenceString;
175
	}
176

    
177
	/**
178
	 * Returns the regEx to be used for the full-name depending on the code
179
	 * @param nameToBeFilled
180
	 * @return
181
	 */
182
	private String getLocalFullName(NonViralName nameToBeFilled){
183
		if (nameToBeFilled instanceof ZoologicalName){
184
			return anyZooFullName;
185
		}else if (nameToBeFilled instanceof BotanicalName) {
186
			return anyBotanicFullName;
187
		}else if (nameToBeFilled instanceof NonViralName) {
188
			return anyBotanicFullName;  //TODO ?
189
		}else{
190
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
191
			return null;
192
		}
193
	}
194
	
195
	/**
196
	 * Returns the regEx to be used for the fsimple-name depending on the code
197
	 * @param nameToBeFilled
198
	 * @return
199
	 */
200
	private String getLocalSimpleName(NonViralName nameToBeFilled){
201
		if (nameToBeFilled instanceof ZoologicalName){
202
			return anyZooName;
203
		}else if (nameToBeFilled instanceof NonViralName){
204
			return anyZooName;  //TODO ?
205
		}else if (nameToBeFilled instanceof BotanicalName) {
206
			return anyBotanicName;
207
		}else{
208
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
209
			return null;
210
		}
211
	}
212
	
213
	private Matcher getMatcher(String regEx, String matchString){
214
		Pattern pattern = Pattern.compile(regEx);
215
		Matcher matcher = pattern.matcher(matchString);
216
		return matcher;
217
	}
218
	
219
	/* (non-Javadoc)
220
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(eu.etaxonomy.cdm.model.name.BotanicalName, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, boolean)
221
	 */
222
	public void parseReferencedName(NonViralName nameToBeFilled, String fullReferenceString, Rank rank, boolean makeEmpty) {
223
		//standardize
224
		fullReferenceString = standardize(nameToBeFilled, fullReferenceString, makeEmpty);
225
		if (fullReferenceString == null){
226
			return;
227
		}
228
		// happens already in standardize(...)
229
//		makeProblemEmpty(nameToBeFilled);
230
		
231
		//make nomenclatural status and replace it by empty string 
232
	    fullReferenceString = parseNomStatus(fullReferenceString, nameToBeFilled);
233
	    nameToBeFilled.setProblemEnds(fullReferenceString.length());
234
		
235
	    //get full name reg
236
		String localFullName = getLocalFullName(nameToBeFilled);
237
		//get full name reg
238
		String localSimpleName = getLocalSimpleName(nameToBeFilled);
239
		
240
		//separate name and reference part
241
		String nameAndRefSeparator = "(^" + localFullName + ")("+ referenceSeperator + ")";
242
		Matcher nameAndRefSeparatorMatcher = getMatcher (nameAndRefSeparator, fullReferenceString);
243
		
244
		Matcher onlyNameMatcher = getMatcher (localFullName, fullReferenceString);
245
		Matcher onlySimpleNameMatcher = getMatcher (localSimpleName, fullReferenceString);
246
		
247
		if (nameAndRefSeparatorMatcher.find()){
248
			makeNameWithReference(nameToBeFilled, fullReferenceString, nameAndRefSeparatorMatcher, rank, makeEmpty);
249
		}else if (onlyNameMatcher.matches()){
250
			makeEmpty = false;
251
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
252
		}else if (onlySimpleNameMatcher.matches()){
253
			makeEmpty = false;
254
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);	//simpleName not yet implemented
255
		}else{
256
			makeNoFullRefMatch(nameToBeFilled, fullReferenceString, rank);
257
		}
258
		//problem handling. Start and end solved in subroutines
259
		if (! nameToBeFilled.hasProblem()){
260
			makeProblemEmpty(nameToBeFilled);
261
		}
262
	}
263
	
264
	private void makeProblemEmpty(IParsable parsable){
265
		boolean hasCheckRank = parsable.hasProblem(ParserProblem.CheckRank);
266
		parsable.setParsingProblem(0);
267
		if (hasCheckRank){
268
			parsable.addParsingProblem(ParserProblem.CheckRank);
269
		}
270
		parsable.setProblemStarts(-1);
271
		parsable.setProblemEnds(-1);
272
	}
273
	
274
	private void makeNoFullRefMatch(NonViralName nameToBeFilled, String fullReferenceString, Rank rank){
275
	    //try to parse first part as name, but keep in mind full string is not parsable
276
		int start = 0;
277
		
278
		String localFullName = getLocalFullName(nameToBeFilled);
279
		Matcher fullNameMatcher = getMatcher (pStart + localFullName, fullReferenceString);
280
		if (fullNameMatcher.find()){
281
			String fullNameString = fullNameMatcher.group(0);
282
			nameToBeFilled.setProtectedNameCache(false);
283
			parseFullName(nameToBeFilled, fullNameString, rank, false);
284
			String sure = nameToBeFilled.getNameCache();
285
			start = sure.length();
286
		}
287
		
288
//		String localSimpleName = getLocalSimpleName(nameToBeFilled);
289
//		Matcher simpleNameMatcher = getMatcher (start + localSimpleName, fullReferenceString);
290
//		if (simpleNameMatcher.find()){
291
//			String simpleNameString = simpleNameMatcher.group(0);
292
//			parseFullName(nameToBeFilled, simpleNameString, rank, false);
293
//			start = simpleNameString.length();
294
//		}
295
		
296
		//don't parse if name can't be separated
297
		nameToBeFilled.addParsingProblem(ParserProblem.NameReferenceSeparation);
298
		nameToBeFilled.setTitleCache(fullReferenceString,true);
299
		nameToBeFilled.setFullTitleCache(fullReferenceString,true);
300
		// FIXME Quick fix, otherwise search would not deliver results for unparsable names
301
		nameToBeFilled.setNameCache(fullReferenceString,true);
302
		// END
303
		nameToBeFilled.setProblemStarts(start);
304
		nameToBeFilled.setProblemEnds(fullReferenceString.length());
305
		logger.info("no applicable parsing rule could be found for \"" + fullReferenceString + "\"");    
306
	}
307
	
308
	private void makeNameWithReference(NonViralName nameToBeFilled, 
309
			String fullReferenceString, 
310
			Matcher nameAndRefSeparatorMatcher,
311
			Rank rank,
312
			boolean makeEmpty){
313
		
314
		String nameAndSeparator = nameAndRefSeparatorMatcher.group(0); 
315
	    String name = nameAndRefSeparatorMatcher.group(1); 
316
	    String referenceString = fullReferenceString.substring(nameAndRefSeparatorMatcher.end());
317
	    
318
	    // is reference an in ref?
319
	    String separator = nameAndSeparator.substring(name.length());
320
		boolean isInReference = separator.matches(inReferenceSeparator);
321
	    
322
	    //parse subparts
323
	    
324
		int oldProblemEnds = nameToBeFilled.getProblemEnds();
325
		parseFullName(nameToBeFilled, name, rank, makeEmpty);
326
	    nameToBeFilled.setProblemEnds(oldProblemEnds);
327
		
328
		//zoological new combinations should not have a nom. reference to be parsed
329
	    if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
330
			ZoologicalName zooName = CdmBase.deproxy(nameToBeFilled, ZoologicalName.class);
331
			//is name new combination?
332
			if (zooName.getBasionymAuthorTeam() != null || zooName.getOriginalPublicationYear() != null){
333
				ParserProblem parserProblem = ParserProblem.NewCombinationHasPublication;
334
				zooName.addParsingProblem(parserProblem);
335
				nameToBeFilled.setProblemStarts((nameToBeFilled.getProblemStarts()> -1) ? nameToBeFilled.getProblemStarts(): name.length());
336
				nameToBeFilled.setProblemEnds(Math.max(fullReferenceString.length(), nameToBeFilled.getProblemEnds()));
337
			}
338
		}
339
		
340
	    parseReference(nameToBeFilled, referenceString, isInReference); 
341
	    INomenclaturalReference ref = (INomenclaturalReference)nameToBeFilled.getNomenclaturalReference();
342

    
343
	    //problem start
344
	    int start = nameToBeFilled.getProblemStarts();
345
	    int nameLength = name.length();
346
	    int nameAndSeparatorLength = nameAndSeparator.length();
347
	    int fullRefLength = nameToBeFilled.getFullTitleCache().length();
348
	    
349
	    if (nameToBeFilled.isProtectedTitleCache() || nameToBeFilled.getParsingProblems().contains(ParserProblem.CheckRank)){
350
	    	start = Math.max(0, start);
351
		}else{
352
			if (ref != null && ref.getParsingProblem()!=0){
353
				start = Math.max(nameAndSeparatorLength, start);
354
		    	//TODO search within ref
355
			}	
356
		}
357
	    
358
	    //end
359
	    int end = nameToBeFilled.getProblemEnds();
360
	    
361
	    if (ref != null && ref.getParsingProblem()!=0){
362
	    	end = Math.min(nameAndSeparatorLength + ref.getProblemEnds(), end);
363
	    }else{
364
	    	if (nameToBeFilled.isProtectedTitleCache() ){
365
	    		end = Math.min(end, nameAndSeparatorLength);
366
	    		//TODO search within name
367
			}
368
	    }
369
	    nameToBeFilled.setProblemStarts(start);
370
	    nameToBeFilled.setProblemEnds(end);
371

    
372
	    //delegate has problem to name
373
	    if (ref != null && ref.getParsingProblem()!=0){
374
	    	nameToBeFilled.addParsingProblems(ref.getParsingProblem());
375
	    }
376
	    
377
	    Reference nomRef;
378
		if ( (nomRef = (Reference)nameToBeFilled.getNomenclaturalReference()) != null ){
379
			nomRef.setAuthorTeam((TeamOrPersonBase)nameToBeFilled.getCombinationAuthorTeam());
380
		}
381
	}
382
	
383
	//TODO make it an Array of status
384
	/**
385
	 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
386
	 * The nomenclatural status part ist deleted from the reference String.
387
	 * @return  String the new (shortend) reference String 
388
	 */ 
389
	public String parseNomStatus(String fullString, NonViralName nameToBeFilled) {
390
		String statusString;
391
		Pattern hasStatusPattern = Pattern.compile("(" + pNomStatusPhrase + ")"); 
392
		Matcher hasStatusMatcher = hasStatusPattern.matcher(fullString);
393
		
394
		if (hasStatusMatcher.find()) {
395
			String statusPhrase = hasStatusMatcher.group(0);
396
			
397
			Pattern statusPattern = Pattern.compile(pNomStatus);
398
			Matcher statusMatcher = statusPattern.matcher(statusPhrase);
399
			statusMatcher.find();
400
			statusString = statusMatcher.group(0);
401
			try {
402
				NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(statusString);
403
				NomenclaturalStatus nomStatus = NomenclaturalStatus.NewInstance(nomStatusType);
404
				nameToBeFilled.addStatus(nomStatus);
405
			    
406
				fullString = fullString.replace(statusPhrase, "");
407
			} catch (UnknownCdmTypeException e) {
408
				//Do nothing
409
			}
410
		}
411
		return fullString;
412
	}
413
	
414
	
415
	private void parseReference(NonViralName nameToBeFilled, String strReference, boolean isInReference){
416
		
417
		INomenclaturalReference ref;
418
		String originalStrReference = strReference;
419
		
420
		//End (just delete end (e.g. '.', may be ambigous for yearPhrase, but no real information gets lost
421
		Matcher endMatcher = getMatcher(referenceEnd + end, strReference);
422
		if (endMatcher.find()){
423
			String endPart = endMatcher.group(0);
424
			strReference = strReference.substring(0, strReference.length() - endPart.length());
425
		}
426
		
427
//		String pDetailYear = ".*" + detailSeparator + detail + fWs + yearSeperator + fWs + yearPhrase + fWs + end;
428
//		Matcher detailYearMatcher = getMatcher(pDetailYear, strReference);
429
		
430
		String strReferenceWithYear = strReference;
431
		//year
432
		String yearPart = null;
433
		String pYearPhrase = yearSeperator + fWs + yearPhrase + fWs + end;
434
		Matcher yearPhraseMatcher = getMatcher(pYearPhrase, strReference);
435
		if (yearPhraseMatcher.find()){
436
			yearPart = yearPhraseMatcher.group(0);
437
			strReference = strReference.substring(0, strReference.length() - yearPart.length());
438
			yearPart = yearPart.replaceFirst(pStart + yearSeperator, "").trim();
439
		}else{
440
			if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
441
				ZoologicalName zooName = CdmBase.deproxy(nameToBeFilled, ZoologicalName.class);
442
				yearPart = String.valueOf(zooName.getPublicationYear());
443
				//continue
444
			}else{
445
				ref = makeDetailYearUnparsable(nameToBeFilled,strReference);
446
				ref.setDatePublished(TimePeriod.parseString(yearPart));
447
				return;
448
			}
449
		}
450
		
451
			
452
		//detail
453
		String pDetailPhrase = detailSeparator + fWs + detail + fWs + end;
454
		Matcher detailPhraseMatcher = getMatcher(pDetailPhrase, strReference);
455
		if (detailPhraseMatcher.find()){
456
			String detailPart = detailPhraseMatcher.group(0);
457
			strReference = strReference.substring(0, strReference.length() - detailPart.length());
458
			detailPart = detailPart.replaceFirst(pStart + detailSeparator, "").trim();
459
			nameToBeFilled.setNomenclaturalMicroReference(detailPart);
460
		}else{
461
			makeDetailYearUnparsable(nameToBeFilled, strReferenceWithYear);
462
			return;
463
		}
464
		//parse title and author
465
		ref = parseReferenceTitle(strReference, yearPart, isInReference);
466
		if (ref.hasProblem()){
467
			ref.setTitleCache( (isInReference?"in ":"") +  originalStrReference,true);
468
		}
469
		nameToBeFilled.setNomenclaturalReference((Reference)ref);
470
		int end = Math.min(strReference.length(), ref.getProblemEnds());
471
		ref.setProblemEnds(end);
472
	}
473

    
474
	/**
475
	 * @param nameToBeFilled
476
	 * @param strReference
477
	 * @return 
478
	 */
479
	private INomenclaturalReference makeDetailYearUnparsable(NonViralName nameToBeFilled, String strReference) {
480
		INomenclaturalReference ref;
481
		//ref = Generic.NewInstance();
482
		
483
		ref = ReferenceFactory.newGeneric();
484
		ref.setTitleCache(strReference,true);
485
		ref.setProblemEnds(strReference.length());
486
		ref.addParsingProblem(ParserProblem.CheckDetailOrYear);
487
		nameToBeFilled.addParsingProblem(ParserProblem.CheckDetailOrYear);
488
		nameToBeFilled.setNomenclaturalReference((Reference)ref);
489
		return ref;
490
	}
491
		
492
	/**
493
	 * Parses the referenceTitlePart, including the author volume and edition.
494
	 * @param reference
495
	 * @param year
496
	 * @return
497
	 */
498
	private INomenclaturalReference parseReferenceTitle(String strReference, String year, boolean isInReference){
499
		IBook result = null;
500

    
501
		Matcher refSineDetailMatcher = referenceSineDetailPattern.matcher(strReference);
502
		if (! refSineDetailMatcher.matches()){
503
			//TODO ?
504
		}
505
		
506
		Matcher articleMatcher = getMatcher(pArticleReference, strReference);
507
		Matcher bookMatcher = getMatcher(pBookReference, strReference);
508
		
509
		Matcher softArticleMatcher = getMatcher(pSoftArticleReference, strReference);
510
		Matcher bookSectionMatcher = getMatcher(pBookSectionReference, strReference);
511
		
512
		
513
		if(isInReference == false){
514
			if (bookMatcher.matches() ){
515
				result = parseBook(strReference);
516
			}else{
517
				logger.warn("Non-InRef must be book but does not match book");
518
				result = ReferenceFactory.newBook();
519
				makeUnparsableRefTitle(result, strReference);
520
			}
521
		}else{  //inRef
522
			if (articleMatcher.matches()){
523
				//article without separators like ","
524
				result = parseArticle(strReference);
525
			}else if (softArticleMatcher.matches()){
526
				result = parseArticle(strReference);
527
			}else if (bookSectionMatcher.matches()){
528
				result = parseBookSection(strReference);
529
			}else{
530
				result =  ReferenceFactory.newGeneric();
531
				makeUnparsableRefTitle(result, "in " + strReference);
532
			}
533
		}
534
		//make year
535
		if (makeYear(result, year) == false){
536
			//TODO
537
			logger.warn("Year could not be parsed");
538
		}
539
		result.setProblemStarts(0);
540
		result.setProblemEnds(strReference.length());
541
		return result;
542
	}
543
	
544
	private void makeUnparsableRefTitle(INomenclaturalReference result, String reference){
545
		result.setTitleCache(reference,true);
546
		result.addParsingProblem(ParserProblem.UnparsableReferenceTitle);
547
	}
548
	
549
	/**
550
	 * Parses a single date string. If the string is not parsable a StringNotParsableException is thrown
551
	 * @param singleDateString
552
	 * @return
553
	 * @throws StringNotParsableException
554
	 */
555
	private static Partial parseSingleDate(String singleDateString) 
556
			throws StringNotParsableException{
557
		Partial dt = new Partial();
558
		if (CdmUtils.isNumeric(singleDateString)){
559
			try {
560
				Integer year = Integer.valueOf(singleDateString.trim());
561
				if (year > 1750 && year < 2050){
562
					dt = dt.with(DateTimeFieldType.year(), year);
563
				}else{
564
					dt = null;
565
				}
566
			} catch (NumberFormatException e) {
567
				logger.debug("Not a Integer format in getCalendar()");
568
				throw new StringNotParsableException(singleDateString + "is not parsable as a single Date");
569
			}
570
		}
571
		return dt;
572
	}
573

    
574
	
575
	/**
576
	 * Parses the publication date part. 
577
	 * @param nomRef
578
	 * @param year
579
	 * @return If the string is not parsable <code>false</code>
580
	 * is returned. <code>True</code> otherwise
581
	 */
582
	private boolean makeYear(INomenclaturalReference nomRef, String year){
583
		boolean result = true;
584
		if (year == null){
585
			return false;
586
		}
587
		if ("".equals(year.trim())){
588
			return true;
589
		}
590
		TimePeriod datePublished = TimePeriod.parseString(year);
591
		
592
		if (nomRef.getType().equals(ReferenceType.BookSection)){
593
			handleBookSectionYear((IBookSection)nomRef, datePublished);
594
		}else if (nomRef instanceof Reference){
595
			((Reference)nomRef).setDatePublished(datePublished);	
596
		}else{
597
			throw new ClassCastException("nom Ref is not of type Reference but " + (nomRef == null? "(null)" : nomRef.getClass()));
598
		}
599
		return result;	
600
	}
601
	
602
	private String makeVolume(IVolumeReference nomRef, String strReference){
603
		//volume
604
		String volPart = null;
605
		String pVolPhrase = volumeSeparator +  volume + end;
606
		Matcher volPhraseMatcher = getMatcher(pVolPhrase, strReference);
607
		if (volPhraseMatcher.find()){
608
			volPart = volPhraseMatcher.group(0);
609
			strReference = strReference.substring(0, strReference.length() - volPart.length());
610
			volPart = volPart.replaceFirst(pStart + volumeSeparator, "").trim();
611
			nomRef.setVolume(volPart);
612
		}
613
		return strReference;
614
	}
615
	
616
	private String makeEdition(IBook book, String strReference){
617
		//volume
618
		String editionPart = null;
619
		Matcher editionPhraseMatcher = getMatcher(pEditionPart, strReference);
620
		
621
		Matcher editionVolumeMatcher = getMatcher(pEditionVolPart, strReference);
622
		boolean isEditionAndVol = editionVolumeMatcher.find();
623
		
624
		if (editionPhraseMatcher.find()){
625
			editionPart = editionPhraseMatcher.group(0);
626
			int pos = strReference.indexOf(editionPart);
627
			int posEnd = pos + editionPart.length();
628
			if (isEditionAndVol){
629
				posEnd++;  //delete also comma
630
			}
631
			strReference = strReference.substring(0, pos) + strReference.substring(posEnd);
632
			editionPart = editionPart.replaceFirst(pStart + editionSeparator, "").trim();
633
			book.setEdition(editionPart);
634
		}
635
		return strReference;
636
	}
637
	
638
	private IBook parseBook(String reference){
639
		IBook result = ReferenceFactory.newBook();
640
		reference = makeEdition(result, reference);
641
		reference = makeVolume(result, reference);
642
		result.setTitle(reference);
643
		return result;
644
	}
645
	
646
	
647
	private Reference parseArticle(String reference){
648
		//if (articlePatter)
649
		//(type, author, title, volume, editor, series;
650
		Reference result = ReferenceFactory.newArticle();
651
		reference = makeVolume(result, reference);
652
		Reference inJournal = ReferenceFactory.newJournal();
653
		inJournal.setTitle(reference);
654
		result.setInReference(inJournal);
655
		return result;
656
	}
657
	
658
	private Reference parseBookSection(String reference){
659
		Reference result = ReferenceFactory.newBookSection();
660
		String[] parts = reference.split(referenceAuthorSeparator, 2);
661
		if (parts.length != 2){
662
			logger.warn("Unexpected number of parts");
663
			result.setTitleCache(reference,true);
664
		}else{
665
			String authorString = parts[0];
666
			String bookString = parts[1];
667
			
668
			TeamOrPersonBase<?> authorTeam = author(authorString);
669
			IBook inBook = parseBook(bookString);
670
			inBook.setAuthorTeam(authorTeam);
671
			result.setInBook(inBook);
672
		}
673
		return result;
674
	}
675
	
676
	/**
677
	 * If the publication date of a book section and it's inBook do differ this is usually 
678
	 * caused by the fact that a book has been published during a period, because originally 
679
	 * it consisted of several parts that only later where put together to one book.
680
	 * If so, the book section's publication date may be a point in time (year or month of year)
681
	 * whereas the books publication date may be a period of several years.
682
	 * Therefore a valid nomenclatural reference string should use the book sections 
683
	 * publication date rather then the book's publication date.<BR>
684
	 * This method in general adds the publication date to the book section.
685
	 * An exception exists if the publication date is a period. Then the parser
686
	 * assumes that the nomenclatural reference string does not follow the above rule but
687
	 * the books publication date is set.
688
	 * @param bookSection
689
	 * @param datePublished
690
	 */
691
	private void handleBookSectionYear(IBookSection bookSection, TimePeriod datePublished){
692
		if (datePublished == null || datePublished.getStart() == null || bookSection == null){
693
			return;
694
		}
695
		if (datePublished.isPeriod() && bookSection.getInBook() != null){
696
			bookSection.getInBook().setDatePublished(datePublished);
697
		}else{
698
			bookSection.setDatePublished(datePublished);	
699
		}
700
	}
701
	
702
	
703
	/* (non-Javadoc)
704
	 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseFullName(java.lang.String)
705
	 */
706
	public NonViralName parseFullName(String fullNameString){
707
		return parseFullName(fullNameString, null, null);
708
	}
709
	
710
	
711
	/* (non-Javadoc)
712
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
713
	 */
714
	public NonViralName parseFullName(String fullNameString, NomenclaturalCode nomCode, Rank rank) {
715
		
716
		if (fullNameString == null){
717
			return null;
718
		}else{
719
			NonViralName result = getNonViralNameInstance(fullNameString, nomCode, rank);
720
			parseFullName(result, fullNameString, rank, false);
721
			return result;
722
		}
723
	}
724
		
725
	
726
	public void parseFullName(NonViralName nameToBeFilled, String fullNameString, Rank rank, boolean makeEmpty) {
727
		//TODO prol. etc.
728
		boolean hasCheckRankProblem = false; //was rank guessed in a previous parsing process?
729
		if (nameToBeFilled == null){
730
			logger.warn("name is null!");
731
		}else{
732
			hasCheckRankProblem = nameToBeFilled.hasProblem(ParserProblem.CheckRank);
733
			nameToBeFilled.removeParsingProblem(ParserProblem.CheckRank);
734
		}
735
		String authorString = null;
736
		
737
		if (fullNameString == null){
738
			return;
739
		}
740
		
741
		if (makeEmpty){
742
			makeEmpty(nameToBeFilled);
743
		}
744
		fullNameString.replaceAll(oWs , " ");
745
		//TODO 
746
		// OLD: fullName = oWsRE.subst(fullName, " "); //substitute multiple whitespaces		   
747
		fullNameString = fullNameString.trim();
748
		
749
		fullNameString = removeHybridBlanks(fullNameString);
750
		String[] epi = pattern.split(fullNameString);
751
		try {
752
	    	//cultivars //TODO 2 implement cultivars
753
//		    if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
754
//		    	result = parseCultivar(fullName);
755
//		    }
756
		    //hybrids //TODO 2 implement hybrids
757
		    //else 
758
//		    if (hybridPattern.matcher(fullNameString).find() ){
759
//		    	parseHybrid(nameToBeFilled, fullNameString, rank, makeEmpty);
760
//		    } else 
761
		      if (genusOrSupraGenusPattern.matcher(fullNameString).matches()){
762
		    	//supraGeneric
763
				if (rank != null && ! hasCheckRankProblem  && (rank.isSupraGeneric()|| rank.isGenus())){
764
					nameToBeFilled.setRank(rank);
765
					nameToBeFilled.setGenusOrUninomial(epi[0]);
766
				} 
767
				 //genus or guess rank
768
				 else {
769
					rank = guessUninomialRank(nameToBeFilled, epi[0]); 
770
					nameToBeFilled.setRank(rank);
771
					nameToBeFilled.setGenusOrUninomial(epi[0]);
772
					nameToBeFilled.addParsingProblem(ParserProblem.CheckRank);
773
					nameToBeFilled.setProblemStarts(0);
774
					nameToBeFilled.setProblemEnds(epi[0].length());
775
				}
776
				authorString = fullNameString.substring(epi[0].length());
777
			}
778
			 //infra genus
779
			 else if (infraGenusPattern.matcher(fullNameString).matches()){
780
				nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[1]));
781
				nameToBeFilled.setGenusOrUninomial(epi[0]);
782
				nameToBeFilled.setInfraGenericEpithet(epi[2]);
783
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length()+ 1 + epi[2].length());
784
			}
785
			 //aggr. or group
786
			 else if (aggrOrGroupPattern.matcher(fullNameString).matches()){
787
				nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[2]));
788
				nameToBeFilled.setGenusOrUninomial(epi[0]);
789
				nameToBeFilled.setSpecificEpithet(epi[1]);
790
			}
791
			 //species
792
			 else if (speciesPattern.matcher(fullNameString).matches()){
793
				nameToBeFilled.setRank(Rank.SPECIES());
794
				nameToBeFilled.setGenusOrUninomial(epi[0]);
795
				nameToBeFilled.setSpecificEpithet(epi[1]);
796
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length());
797
			}
798
			 //autonym
799
			 else if (autonymPattern.matcher(fullNameString).matches()){
800
				nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[epi.length - 2]));
801
				nameToBeFilled.setGenusOrUninomial(epi[0]);
802
				nameToBeFilled.setSpecificEpithet(epi[1]);
803
				nameToBeFilled.setInfraSpecificEpithet(epi[epi.length - 1]);
804
				int lenSpecies = 2 + epi[0].length()+epi[1].length();
805
				int lenInfraSpecies =  2 + epi[epi.length - 2].length() + epi[epi.length - 1].length();
806
				authorString = fullNameString.substring(lenSpecies, fullNameString.length() - lenInfraSpecies);
807
			}
808
			 //infraSpecies
809
			 else if (infraSpeciesPattern.matcher(fullNameString).matches()){
810
				String infraSpecRankEpi = epi[2];
811
				String infraSpecEpi = epi[3];
812
				if ("tax.".equals(infraSpecRankEpi)){
813
					infraSpecRankEpi += " " +  epi[3];
814
					infraSpecEpi = epi[4];
815
				}
816
				nameToBeFilled.setRank(Rank.getRankByAbbreviation(infraSpecRankEpi));
817
				nameToBeFilled.setGenusOrUninomial(epi[0]);
818
				nameToBeFilled.setSpecificEpithet(epi[1]);
819
				nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
820
				authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecRankEpi.length() + 1 + infraSpecEpi.length());
821
			}//old infraSpecies
822
			 else if (oldInfraSpeciesPattern.matcher(fullNameString).matches()){
823
				boolean implemented = false;
824
				if (implemented){
825
					nameToBeFilled.setRank(Rank.getRankByNameOrAbbreviation(epi[2]));
826
					nameToBeFilled.setGenusOrUninomial(epi[0]);
827
					nameToBeFilled.setSpecificEpithet(epi[1]);
828
					//TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
829
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + epi[2].length() + 1 + epi[3].length());
830
				}else{
831
					nameToBeFilled.addParsingProblem(ParserProblem.OldInfraSpeciesNotSupported);
832
					nameToBeFilled.setTitleCache(fullNameString,true);
833
					// FIXME Quick fix, otherwise search would not deilver results for unparsable names
834
					nameToBeFilled.setNameCache(fullNameString,true);
835
					// END
836
					logger.info("Name string " + fullNameString + " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
837
				}
838
			}
839
			//none
840
			else{ 
841
				nameToBeFilled.addParsingProblem(ParserProblem.UnparsableNamePart);
842
				nameToBeFilled.setTitleCache(fullNameString,true);
843
				// FIXME Quick fix, otherwise search would not deilver results for unparsable names
844
				nameToBeFilled.setNameCache(fullNameString,true);
845
				// END
846
				logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
847
		    }
848
		    //hybrid bits	
849
		    handleHybridBits(nameToBeFilled);
850
			//authors
851
		    if (nameToBeFilled != null && StringUtils.isNotBlank(authorString) ){ 
852
				handleAuthors(nameToBeFilled, fullNameString, authorString);
853
			}	
854
			//return
855
			if (nameToBeFilled != null){
856
		    	//return(BotanicalName)result;
857
				return;
858
			}
859
		} catch (UnknownCdmTypeException e) {
860
			nameToBeFilled.addParsingProblem(ParserProblem.RankNotSupported);
861
			nameToBeFilled.setTitleCache(fullNameString,true);
862
			// FIXME Quick fix, otherwise search would not deilver results for unparsable names
863
			nameToBeFilled.setNameCache(fullNameString,true);
864
			// END
865
			logger.info("unknown rank (" + (rank == null? "null":rank) + ") or abbreviation in string " +  fullNameString);
866
			//return result;
867
			return;
868
		}
869
	}
870

    
871
	private void handleHybridBits(NonViralName nameToBeFilled) {
872
		//uninomial
873
		String uninomial = CdmUtils.Nz(nameToBeFilled.getGenusOrUninomial());
874
		boolean isUninomialHybrid = uninomial.startsWith(hybridSign);
875
		if (isUninomialHybrid){
876
			nameToBeFilled.setMonomHybrid(true);
877
			nameToBeFilled.setGenusOrUninomial(uninomial.replace(hybridSign, ""));
878
		}
879
		//infrageneric
880
		String infrageneric = CdmUtils.Nz(nameToBeFilled.getInfraGenericEpithet());
881
		boolean isInfraGenericHybrid = infrageneric.startsWith(hybridSign);
882
		if (isInfraGenericHybrid){
883
			nameToBeFilled.setBinomHybrid(true);
884
			nameToBeFilled.setInfraGenericEpithet(infrageneric.replace(hybridSign, ""));
885
		}
886
		//species Epi
887
		String speciesEpi = CdmUtils.Nz(nameToBeFilled.getSpecificEpithet());
888
		boolean isSpeciesHybrid = speciesEpi.startsWith(hybridSign);
889
		if (isSpeciesHybrid){
890
			if (StringUtils.isBlank(infrageneric)){
891
				nameToBeFilled.setBinomHybrid(true);
892
			}else{
893
				nameToBeFilled.setTrinomHybrid(true);
894
			}
895
			nameToBeFilled.setSpecificEpithet(speciesEpi.replace(hybridSign, ""));
896
		}
897
		//infra species
898
		String infraSpeciesEpi = CdmUtils.Nz(nameToBeFilled.getInfraSpecificEpithet());
899
		boolean isInfraSpeciesHybrid = infraSpeciesEpi.startsWith(hybridSign);
900
		if (isInfraSpeciesHybrid){
901
			nameToBeFilled.setTrinomHybrid(true);
902
			nameToBeFilled.setInfraSpecificEpithet(infraSpeciesEpi.replace(hybridSign, ""));
903
		}
904
		
905
	}
906

    
907
	private String removeHybridBlanks(String fullNameString) {
908
		fullNameString = fullNameString.replaceAll(hybridFull, " "+hybridSign).trim();
909
		return fullNameString;
910
	}
911

    
912
	/**
913
	 * Author parser for external use
914
	 * @param nonViralName
915
	 * @param authorString
916
	 * @throws StringNotParsableException
917
	 */
918
	public void parseAuthors(NonViralName nonViralName, String authorString) throws StringNotParsableException{
919
		TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
920
		Integer[] years = new Integer[4];
921
		Class<? extends NonViralName> clazz = nonViralName.getClass();
922
		fullAuthors(authorString, authors, years, clazz);
923
		nonViralName.setCombinationAuthorTeam(authors[0]);
924
		nonViralName.setExCombinationAuthorTeam(authors[1]);
925
		nonViralName.setBasionymAuthorTeam(authors[2]);
926
		nonViralName.setExBasionymAuthorTeam(authors[3]);
927
		if (nonViralName instanceof ZoologicalName){
928
			ZoologicalName zooName = CdmBase.deproxy(nonViralName, ZoologicalName.class);
929
			zooName.setPublicationYear(years[0]);
930
			zooName.setOriginalPublicationYear(years[2]);
931
		}
932
	}
933
	
934
	/**
935
	 * @param nameToBeFilled
936
	 * @param fullNameString
937
	 * @param authorString
938
	 */
939
	public void handleAuthors(NonViralName nameToBeFilled, String fullNameString, String authorString) {
940
		TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
941
		Integer[] years = new Integer[4];
942
		try {
943
			Class<? extends NonViralName> clazz = nameToBeFilled.getClass();
944
			fullAuthors(authorString, authors, years, clazz);
945
		} catch (StringNotParsableException e) {
946
			nameToBeFilled.addParsingProblem(ParserProblem.UnparsableAuthorPart);
947
			nameToBeFilled.setTitleCache(fullNameString,true);
948
			// FIXME Quick fix, otherwise search would not deliver results for unparsable names
949
			nameToBeFilled.setNameCache(fullNameString,true);
950
			// END
951
			logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");;
952
		}
953
		nameToBeFilled.setCombinationAuthorTeam(authors[0]);
954
		nameToBeFilled.setExCombinationAuthorTeam(authors[1]);
955
		nameToBeFilled.setBasionymAuthorTeam(authors[2]);
956
		nameToBeFilled.setExBasionymAuthorTeam(authors[3]);
957
		if (nameToBeFilled instanceof ZoologicalName){
958
			ZoologicalName zooName = (ZoologicalName)nameToBeFilled;
959
			zooName.setPublicationYear(years[0]);
960
			zooName.setOriginalPublicationYear(years[2]);
961
		}
962
	}
963

    
964
	
965
	
966
	/**
967
	 * Guesses the rank of uninomial depending on the typical endings for ranks
968
	 * @param nameToBeFilled
969
	 * @param string
970
	 */
971
	private Rank guessUninomialRank(NonViralName nameToBeFilled, String uninomial) {
972
		Rank result = Rank.GENUS();
973
		if (nameToBeFilled.isInstanceOf(BotanicalName.class)){
974
			if (false){
975
				//
976
			}else if (uninomial.endsWith("phyta") || uninomial.endsWith("mycota") ){  //plants, fungi
977
				result = Rank.SECTION_BOTANY();
978
			}else if (uninomial.endsWith("bionta")){
979
				result = Rank.SUBKINGDOM();  //TODO
980
			}else if (uninomial.endsWith("phytina")|| uninomial.endsWith("mycotina")  ){  //plants, fungi
981
				result = Rank.SUBSECTION_BOTANY();
982
			}else if (uninomial.endsWith("opsida") || uninomial.endsWith("phyceae") || uninomial.endsWith("mycetes")){  //plants, algae, fungi
983
				result = Rank.CLASS();
984
			}else if (uninomial.endsWith("idae") || uninomial.endsWith("phycidae") || uninomial.endsWith("mycetidae")){ //plants, algae, fungi
985
				result = Rank.SUBCLASS();
986
			}else if (uninomial.endsWith("ales")){
987
				result = Rank.ORDER();
988
			}else if (uninomial.endsWith("ineae")){
989
				result = Rank.SUBORDER();
990
			}else if (uninomial.endsWith("aceae")){
991
					result = Rank.FAMILY();
992
			}else if (uninomial.endsWith("oideae")){
993
				result = Rank.SUBFAMILY();
994
			}else if (uninomial.endsWith("eae")){
995
				result = Rank.TRIBE();
996
			}else if (uninomial.endsWith("inae")){
997
				result = Rank.SUBTRIBE();
998
			}else if (uninomial.endsWith("ota")){
999
				result = Rank.KINGDOM();  //TODO
1000
			}
1001
		}else if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
1002
			if (false){
1003
				//
1004
			}else if (uninomial.endsWith("oideae")){
1005
				result = Rank.SUPERFAMILY();
1006
			}else if (uninomial.endsWith("idae")){
1007
					result = Rank.FAMILY();
1008
			}else if (uninomial.endsWith("inae")){
1009
				result = Rank.SUBFAMILY();
1010
			}else if (uninomial.endsWith("inae")){
1011
				result = Rank.SUBFAMILY();
1012
			}else if (uninomial.endsWith("ini")){
1013
				result = Rank.TRIBE();
1014
			}else if (uninomial.endsWith("ina")){
1015
				result = Rank.SUBTRIBE();
1016
			}
1017
		}else{
1018
			//
1019
		}
1020
		return result;
1021
	}
1022

    
1023
	/**
1024
	 * Parses the fullAuthorString
1025
	 * @param fullAuthorString
1026
	 * @return array of Teams containing the Team[0], 
1027
	 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
1028
	 */
1029
	protected void fullAuthors (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years, Class<? extends NonViralName> clazz)
1030
			throws StringNotParsableException{
1031
		if (fullAuthorString == null || clazz == null){
1032
			return;
1033
		}
1034
		fullAuthorString = fullAuthorString.trim();
1035
		
1036
		//Botanic
1037
		if ( BotanicalName.class.isAssignableFrom(clazz) ){
1038
			if (! fullBotanicAuthorStringPattern.matcher(fullAuthorString).matches() ){
1039
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1040
			}
1041
		}
1042
		//Zoo
1043
		else if ( ZoologicalName.class.isAssignableFrom(clazz) ){
1044
			if (! fullZooAuthorStringPattern.matcher(fullAuthorString).matches() ){
1045
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1046
			}
1047
		}else {
1048
			//TODO
1049
			logger.warn ("Full author String parsable only for defined BotanicalNames or ZoologicalNames but this is " + clazz.getSimpleName());
1050
			throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
1051
		}
1052
		fullAuthorsChecked(fullAuthorString, authors, years);
1053
	}
1054
	
1055
	/*
1056
	 * like fullTeams but without trim and match check
1057
	 */
1058
	protected void fullAuthorsChecked (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years){
1059
		int authorTeamStart = 0;
1060
		Matcher basionymMatcher = basionymPattern.matcher(fullAuthorString);
1061
		
1062
		if (basionymMatcher.find(0)){
1063
			
1064
			String basString = basionymMatcher.group();
1065
			basString = basString.replaceFirst(basStart, "");
1066
			basString = basString.replaceAll(basEnd, "").trim();
1067
			authorTeamStart = basionymMatcher.end(1) + 1;
1068
			
1069
			TeamOrPersonBase<?>[] basAuthors = new TeamOrPersonBase[2];
1070
			Integer[] basYears = new Integer[2];
1071
			authorsAndEx(basString, basAuthors, basYears);
1072
			authors[2]= basAuthors[0];
1073
			years[2] = basYears[0];
1074
			authors[3]= basAuthors[1];
1075
			years[3] = basYears[1];
1076
		}
1077
		if (fullAuthorString.length() >= authorTeamStart){
1078
			TeamOrPersonBase<?>[] combinationAuthors = new TeamOrPersonBase[2];;
1079
			Integer[] combinationYears = new Integer[2];
1080
			authorsAndEx(fullAuthorString.substring(authorTeamStart), combinationAuthors, combinationYears);
1081
			authors[0]= combinationAuthors[0] ;
1082
			years[0] = combinationYears[0];
1083
			authors[1]= combinationAuthors[1];
1084
			years[1] = combinationYears[1];
1085
		}
1086
	}
1087
	
1088
	
1089
	/**
1090
	 * Parses the author and ex-author String
1091
	 * @param authorTeamString String representing the author and the ex-author team
1092
	 * @return array of Teams containing the Team[0] and the ExTeam[1]
1093
	 */
1094
	protected void authorsAndEx (String authorTeamString, TeamOrPersonBase<?>[] authors, Integer[] years){
1095
		//TODO noch allgemeiner am anfang durch Replace etc. 
1096
		authorTeamString = authorTeamString.trim();
1097
		authorTeamString = authorTeamString.replaceFirst(oWs + "ex" + oWs, " ex. " ); 
1098
		//int authorEnd = authorTeamString.length();
1099
		int authorBegin = 0;
1100
		
1101
		Matcher exAuthorMatcher = exAuthorPattern.matcher(authorTeamString);
1102
		if (exAuthorMatcher.find(0)){
1103
			authorBegin = exAuthorMatcher.end(0);
1104
			int exAuthorEnd = exAuthorMatcher.start(0);
1105
			String exString = authorTeamString.substring(0, exAuthorEnd).trim();
1106
			authors [1] = author(exString);
1107
		}
1108
		zooOrBotanicAuthor(authorTeamString.substring(authorBegin), authors, years );
1109
	}
1110
	
1111
	/**
1112
	 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
1113
	 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies 
1114
	 * to the year in case of an zoological name. 
1115
	 * @param authorString
1116
	 * @param team
1117
	 * @param year
1118
	 */
1119
	protected void zooOrBotanicAuthor(String authorString, TeamOrPersonBase<?>[] team, Integer[] year){
1120
		if (authorString == null){ 
1121
			return;
1122
		}else if ((authorString = authorString.trim()).length() == 0){
1123
			return;
1124
		}
1125
		Matcher zooAuthorAddidtionMatcher = zooAuthorAddidtionPattern.matcher(authorString);
1126
		if (zooAuthorAddidtionMatcher.find()){
1127
			int index = zooAuthorAddidtionMatcher.start(0); 
1128
			String strYear = authorString.substring(index);
1129
			strYear = strYear.replaceAll(zooAuthorYearSeperator, "").trim();
1130
			year[0] = Integer.valueOf(strYear);
1131
			authorString = authorString.substring(0, index).trim();
1132
		}
1133
		team[0] = author(authorString);
1134
	}
1135
	
1136
	
1137
	/**
1138
	 * Parses an authorTeam String and returns the Team 
1139
	 * !!! TODO (atomization not yet implemented)
1140
	 * @param authorTeamString String representing the author team
1141
	 * @return an Team 
1142
	 */
1143
	protected TeamOrPersonBase<?> author (String authorString){
1144
		if (authorString == null){ 
1145
			return null;
1146
		}else if ((authorString = authorString.trim()).length() == 0){
1147
			return null;
1148
		}else if (! teamSplitterPattern.matcher(authorString).find() && ! authorIsAlwaysTeam){
1149
			//1 Person
1150
			Person result = Person.NewInstance();
1151
			result.setNomenclaturalTitle(authorString);
1152
			return result;
1153
		}else{
1154
			return parsedTeam(authorString);
1155
		} 
1156
		
1157
	}
1158
	
1159
	/**
1160
	 * Parses an authorString (reprsenting a team into the single authors and add
1161
	 * them to the return Team.
1162
	 * @param authorString
1163
	 * @return Team
1164
	 */
1165
	protected Team parsedTeam(String authorString){
1166
		Team result = Team.NewInstance();
1167
		String[] authors = authorString.split(teamSplitter);
1168
		for (String author : authors){
1169
			Person person = Person.NewInstance();
1170
			person.setNomenclaturalTitle(author);
1171
			result.addTeamMember(person); 
1172
		}
1173
		return result;
1174
	}
1175
	
1176

    
1177
	//Parsing of the given full name that has been identified as hybrid already somewhere else.
1178
	private void parseHybrid(NonViralName nameToBeFilled, String fullNameString, Rank rank, boolean makeEmpty){
1179
	    logger.warn("parseHybrid --> function not yet implemented");
1180
	    
1181
//	    String nonHybridName  = fullNameString;
1182
//	    boolean isMonomHybrid = isMonomHybrid(fullNameString);
1183
//	    if (isMonomHybrid){
1184
//	    	nonHybridName.replaceAll(hybrid, "");
1185
//	    }
1186
//	    
1187
//	    String[] split = nonHybridName.split("\\s");
1188
//	    parseFullName(nameToBeFilled, nonHybridName, rank, makeEmpty);
1189
//	    
1190
//	    nonHybridName = nonHybridName.replaceAll(hybrid, " ");
1191
//
1192
//	    boolean isBinomHybrid = isBinomHybrid(split);
1193
////	    boolean isTrinomHybrid = isTrinomHybrid(split);
1194
//	    
1195
//	    nonHybridName = nonHybridName.replaceAll(hybrid, " ");
1196
//	     
1197
//	    parseFullName(nameToBeFilled, nonHybridName, rank, makeEmpty);
1198
//	    nameToBeFilled.getTitleCache();
1199
//	    nameToBeFilled.setMonomHybrid(isMonomHybrid);
1200
//	    nameToBeFilled.setBinomHybrid(isBinomHybrid);
1201
//	    nameToBeFilled.setBinomHybrid(isTrinomHybrid);
1202
	    
1203
	    nameToBeFilled.setTitleCache(fullNameString,true);
1204
	    return;
1205
    }
1206
	
1207
//	private boolean isBinomHybrid(String[] split) {
1208
//		if (){
1209
//			
1210
//		}
1211
//		return false;
1212
//	}
1213

    
1214
	private boolean isMonomHybrid(String fullNameString) {
1215
		Matcher matcher = hybridPattern.matcher(fullNameString);
1216
		boolean find = matcher.find();
1217
		int start = matcher.start();
1218
		if (find == true && start == 0){
1219
			return true;
1220
		}else{
1221
			return false;
1222
		}
1223
	}
1224

    
1225
//	// Parsing of the given full name that has been identified as a cultivar already somwhere else.
1226
//	// The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
1227
	public BotanicalName parseCultivar(String fullName)	throws StringNotParsableException{
1228
		CultivarPlantName result = null;
1229
		    String[] words = oWsPattern.split(fullName);
1230
			
1231
		    /* ---------------------------------------------------------------------------------
1232
		     * cultivar
1233
		     * ---------------------------------------------------------------------------------*/
1234
			if (fullName.indexOf(" '") != 0){
1235
				//TODO location of 'xx' is probably not arbitrary
1236
				Matcher cultivarMatcher = cultivarPattern.matcher(fullName);
1237
				if (cultivarMatcher.find()){
1238
					String namePart = fullName.replaceFirst(cultivar, "");
1239
					
1240
					String cultivarPart = cultivarMatcher.group(0).replace("'","").trim();
1241
					//OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
1242
					
1243
					result = (CultivarPlantName)parseFullName(namePart);
1244
					result.setCultivarName(cultivarPart);
1245
				}	
1246
			}else if (fullName.indexOf(" cv.") != 0){
1247
				// cv. is old form (not official) 
1248
				throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
1249
			}
1250
				
1251
		    /* ---------------------------------------------------------------------------------
1252
		     * cultivar group
1253
		     * ---------------------------------------------------------------------------------
1254
		     */ 
1255
			// TODO in work 
1256
			//Ann. this is not the official way of noting cultivar groups
1257
		    String group = oWs + "Group" + oWs + capitalEpiWord + end;
1258
			Pattern groupRE = Pattern.compile(group);
1259
			Matcher groupMatcher = groupRE.matcher(fullName);
1260
			if (groupMatcher.find()){
1261
		    	if (! words[words.length - 2].equals("group")){
1262
		            throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName + " should be 'group'");
1263
		        }else{
1264
		        	
1265
		        	String namePart = fullName.substring(0, groupMatcher.start(0) - 0);
1266
		        	//OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
1267
		        	
1268
		        	String cultivarPart = words[words.length -1];
1269
		        	result = (CultivarPlantName)parseFullName(namePart);
1270
		        	if (result != null){
1271
		        		result.setCultivarName(cultivarPart);
1272
			        	
1273
		        		//OLD: result.setCultivarGroupName(cultivarPart);
1274
		        	}
1275
		        }
1276

    
1277
		    }
1278
//		    // ---------------------------------------------------------------------------------
1279
//		    if ( result = "" ){
1280
//		        return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
1281
//		    }else{
1282
//		        return result;
1283
	//	    }
1284
			return result; //TODO
1285
	}
1286

    
1287
	
1288
	private void makeEmpty(NonViralName nameToBeFilled){
1289
		nameToBeFilled.setRank(null);
1290
		nameToBeFilled.setTitleCache(null, false);
1291
		nameToBeFilled.setFullTitleCache(null, false);
1292
		nameToBeFilled.setNameCache(null, false);
1293
				
1294
		nameToBeFilled.setAppendedPhrase(null);
1295
		nameToBeFilled.setBasionymAuthorTeam(null);
1296
		nameToBeFilled.setCombinationAuthorTeam(null);
1297
		nameToBeFilled.setExBasionymAuthorTeam(null);
1298
		nameToBeFilled.setExCombinationAuthorTeam(null);
1299
		nameToBeFilled.setAuthorshipCache(null, false);
1300
		
1301
		
1302
		//delete problems except check rank
1303
		makeProblemEmpty(nameToBeFilled);
1304
				
1305
		// TODO ?
1306
		//nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
1307

    
1308
		
1309
		nameToBeFilled.setGenusOrUninomial(null);
1310
		nameToBeFilled.setInfraGenericEpithet(null);
1311
		nameToBeFilled.setSpecificEpithet(null);
1312
		nameToBeFilled.setInfraSpecificEpithet(null);
1313
		
1314
		nameToBeFilled.setNomenclaturalMicroReference(null);
1315
		nameToBeFilled.setNomenclaturalReference(null);
1316
		
1317
		nameToBeFilled.setHybridFormula(false);
1318
		nameToBeFilled.setMonomHybrid(false);
1319
		nameToBeFilled.setBinomHybrid(false);
1320
		nameToBeFilled.setTrinomHybrid(false);
1321
		
1322
		if (nameToBeFilled.isInstanceOf(BotanicalName.class)){
1323
			BotanicalName botanicalName = (BotanicalName)nameToBeFilled;
1324
			botanicalName.setAnamorphic(false);
1325
		}
1326
		
1327
		if (nameToBeFilled.isInstanceOf(ZoologicalName.class)){
1328
			ZoologicalName zoologicalName = (ZoologicalName)nameToBeFilled;
1329
			zoologicalName.setBreed(null);
1330
			zoologicalName.setOriginalPublicationYear(null);
1331
			
1332
		}
1333
	}
1334
	
1335
	
1336
    
1337
}
(2-2/5)