Project

General

Profile

Download (56.1 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * 
3
 */
4
package eu.etaxonomy.cdm.strategy.parser;
5

    
6
import java.util.regex.Matcher;
7
import java.util.regex.Pattern;
8

    
9
import org.apache.log4j.Logger;
10
import org.joda.time.DateTimeFieldType;
11
import org.joda.time.Partial;
12

    
13
import eu.etaxonomy.cdm.common.CdmUtils;
14
import eu.etaxonomy.cdm.model.agent.Person;
15
import eu.etaxonomy.cdm.model.agent.Team;
16
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
17
import eu.etaxonomy.cdm.model.common.IParsable;
18
import eu.etaxonomy.cdm.model.common.TimePeriod;
19
import eu.etaxonomy.cdm.model.name.BacterialName;
20
import eu.etaxonomy.cdm.model.name.BotanicalName;
21
import eu.etaxonomy.cdm.model.name.CultivarPlantName;
22
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
23
import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
24
import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
25
import eu.etaxonomy.cdm.model.name.NonViralName;
26
import eu.etaxonomy.cdm.model.name.Rank;
27
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
28
import eu.etaxonomy.cdm.model.name.ZoologicalName;
29
import eu.etaxonomy.cdm.model.reference.Article;
30
import eu.etaxonomy.cdm.model.reference.BibtexReference;
31
import eu.etaxonomy.cdm.model.reference.Book;
32
import eu.etaxonomy.cdm.model.reference.BookSection;
33
import eu.etaxonomy.cdm.model.reference.Generic;
34
import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
35
import eu.etaxonomy.cdm.model.reference.IVolumeReference;
36
import eu.etaxonomy.cdm.model.reference.Journal;
37
import eu.etaxonomy.cdm.model.reference.ReferenceBase;
38
import eu.etaxonomy.cdm.model.reference.StrictReferenceBase;
39
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
40
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
41

    
42

    
43
/**
44
 * @author a.mueller
45
 *
46
 */
47
public class NonViralNameParserImpl implements INonViralNameParser<NonViralName> {
48
	private static final Logger logger = Logger.getLogger(NonViralNameParserImpl.class);
49
	
50
	// good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
51
	
52
	final static boolean MAKE_EMPTY = true;
53
	final static boolean MAKE_NOT_EMPTY = false;
54
	
55
	
56
	public static NonViralNameParserImpl NewInstance(){
57
		return new NonViralNameParserImpl();
58
	}
59
	
60
	/* (non-Javadoc)
61
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSimpleName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
62
	 */
63
	public NonViralName parseSimpleName(String simpleName, Rank rank){
64
		//TODO
65
		logger.warn("parseSimpleName() not yet implemented. Uses parseFullName() instead");
66
		return parseFullName(simpleName, null, rank);
67
	}
68

    
69

    
70
	/* (non-Javadoc)
71
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericSimpleName(java.lang.String)
72
	 */
73
	public NonViralName parseSimpleName(String simpleName){
74
		return parseSimpleName(simpleName, null);
75
	}
76
	
77
	public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code){
78
		return getNonViralNameInstance(fullString, code, null);
79
	}
80
	
81
	public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code, Rank rank){
82
		NonViralName result = null;
83
		if(code ==null) {
84
			boolean isBotanicalName = anyBotanicFullNamePattern.matcher(fullString).find();
85
			boolean isZoologicalName = anyZooFullNamePattern.matcher(fullString).find();;
86
			boolean isBacteriologicalName = false;
87
			boolean isCultivatedPlantName = false;
88
			if ( (isBotanicalName || isCultivatedPlantName) && ! isZoologicalName && !isBacteriologicalName){
89
				if (isBotanicalName){
90
					result = BotanicalName.NewInstance(rank);
91
				}else{
92
					result = CultivarPlantName.NewInstance(rank);
93
				}
94
			}else if ( isZoologicalName /*&& ! isBotanicalName*/ && !isBacteriologicalName && !isCultivatedPlantName){
95
				result = ZoologicalName.NewInstance(rank);
96
			}else if ( isZoologicalName && ! isBotanicalName && !isBacteriologicalName && !isCultivatedPlantName){
97
				result = BacterialName.NewInstance(rank);
98
			}else {
99
				result =  NonViralName.NewInstance(rank);
100
			}
101
		} else {
102
			switch (code) {
103
			case ICBN:
104
				result = BotanicalName.NewInstance(rank);
105
				break;
106
			case ICZN:
107
				result = ZoologicalName.NewInstance(rank);
108
				break;
109
			case ICNCP:
110
				logger.warn("ICNCP parsing not yet implemented");
111
				result = CultivarPlantName.NewInstance(rank);
112
				break;
113
			case ICNB:
114
				logger.warn("ICNB not yet implemented");
115
				result = BacterialName.NewInstance(rank);
116
				break;
117
			case ICVCN:
118
				logger.error("Viral name is not a NonViralName !!");
119
				break;
120
			default:
121
				// FIXME Unreachable code
122
				logger.error("Unknown Nomenclatural Code !!");
123
			}
124
		}
125
		return result;
126
	}
127
	
128

    
129
	/* (non-Javadoc)
130
	 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseFullReference(java.lang.String)
131
	 */
132
	public NonViralName parseReferencedName(String fullReferenceString) {
133
		return parseReferencedName(fullReferenceString, null, null);
134
	}
135
	
136
	/* (non-Javadoc)
137
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
138
	 */
139
	public NonViralName parseReferencedName(String fullReferenceString, NomenclaturalCode nomCode, Rank rank) {
140
		if (fullReferenceString == null){
141
			return null;
142
		}else{
143
			NonViralName result = getNonViralNameInstance(fullReferenceString, nomCode, rank);
144
			parseReferencedName(result, fullReferenceString, rank, MAKE_EMPTY);
145
			return result;
146
		}
147
	}
148
	
149
	private String standardize(NonViralName nameToBeFilled, String fullReferenceString, boolean makeEmpty){
150
		//Check null and standardize
151
		if (fullReferenceString == null){
152
			//return null;
153
			return null;
154
		}
155
		if (makeEmpty){
156
			makeEmpty(nameToBeFilled);
157
		}
158
		fullReferenceString = fullReferenceString.replaceAll(oWs , " ");
159
		fullReferenceString = fullReferenceString.trim();
160
		if ("".equals(fullReferenceString)){
161
			fullReferenceString = null;
162
		}
163
		return fullReferenceString;
164
	}
165

    
166
	/**
167
	 * Returns the regEx to be used for the full-name depending on the code
168
	 * @param nameToBeFilled
169
	 * @return
170
	 */
171
	private String getLocalFullName(NonViralName nameToBeFilled){
172
		if (nameToBeFilled instanceof ZoologicalName){
173
			return anyZooFullName;
174
		}else if (nameToBeFilled instanceof NonViralName) {
175
			return anyBotanicFullName;  //TODO ?
176
		}else if (nameToBeFilled instanceof BotanicalName) {
177
			return anyBotanicFullName;
178
		}else{
179
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
180
			return null;
181
		}
182
	}
183
	
184
	/**
185
	 * Returns the regEx to be used for the fsimple-name depending on the code
186
	 * @param nameToBeFilled
187
	 * @return
188
	 */
189
	private String getLocalSimpleName(NonViralName nameToBeFilled){
190
		if (nameToBeFilled instanceof ZoologicalName){
191
			return anyZooName;
192
		}else if (nameToBeFilled instanceof NonViralName){
193
			return anyZooName;  //TODO ?
194
		}else if (nameToBeFilled instanceof BotanicalName) {
195
			return anyBotanicName;
196
		}else{
197
			logger.warn("nameToBeFilled class not supported ("+nameToBeFilled.getClass()+")");
198
			return null;
199
		}
200
	}
201
	
202
	private Matcher getMatcher(String regEx, String matchString){
203
		Pattern pattern = Pattern.compile(regEx);
204
		Matcher matcher = pattern.matcher(matchString);
205
		return matcher;
206
	}
207
	
208
	/* (non-Javadoc)
209
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(eu.etaxonomy.cdm.model.name.BotanicalName, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, boolean)
210
	 */
211
	public void parseReferencedName(NonViralName nameToBeFilled, String fullReferenceString, Rank rank, boolean makeEmpty) {
212
		//standardize
213
		fullReferenceString = standardize(nameToBeFilled, fullReferenceString, makeEmpty);
214
		if (fullReferenceString == null){
215
			return;
216
		}
217
		makeProblemEmpty(nameToBeFilled);
218
		
219
		//make nomenclatural status and replace it by empty string 
220
	    fullReferenceString = parseNomStatus(fullReferenceString, nameToBeFilled);
221
	    nameToBeFilled.setProblemEnds(fullReferenceString.length());
222
		
223
	    //get full name reg
224
		String localFullName = getLocalFullName(nameToBeFilled);
225
		//get full name reg
226
		String localSimpleName = getLocalSimpleName(nameToBeFilled);
227
		
228
		//separate name and reference part
229
		String nameAndRefSeparator = "(^" + localFullName + ")("+ referenceSeperator + ")";
230
		Matcher nameAndRefSeparatorMatcher = getMatcher (nameAndRefSeparator, fullReferenceString);
231
		
232
		Matcher onlyNameMatcher = getMatcher (localFullName, fullReferenceString);
233
		Matcher onlySimpleNameMatcher = getMatcher (localSimpleName, fullReferenceString);
234
		
235
		if (nameAndRefSeparatorMatcher.find()){  
236
			makeNameWithReference(nameToBeFilled, fullReferenceString, nameAndRefSeparatorMatcher, rank, makeEmpty);
237
		}else if (onlyNameMatcher.matches()){
238
			makeEmpty = false;
239
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);
240
		}else if (onlySimpleNameMatcher.matches()){
241
			makeEmpty = false;
242
			parseFullName(nameToBeFilled, fullReferenceString, rank, makeEmpty);	//simpleName not yet implemented
243
		}else{
244
			makeNoFullRefMatch(nameToBeFilled, fullReferenceString, rank);
245
		}
246
		//problem handling. Start and end solved in subroutines
247
		if (! nameToBeFilled.hasProblem()){
248
			makeProblemEmpty(nameToBeFilled);
249
		}
250
	}
251
	
252
	private void makeProblemEmpty(IParsable parsable){
253
		parsable.setHasProblem(false);
254
		parsable.setProblemStarts(-1);
255
		parsable.setProblemEnds(-1);
256
	}
257
	
258
	private void makeNoFullRefMatch(NonViralName nameToBeFilled, String fullReferenceString, Rank rank){
259
	    //try to parse first part as name, but keep in mind full string is not parsable
260
		int start = 0;
261
		
262
		String localFullName = getLocalFullName(nameToBeFilled);
263
		Matcher fullNameMatcher = getMatcher (pStart + localFullName, fullReferenceString);
264
		if (fullNameMatcher.find()){
265
			String fullNameString = fullNameMatcher.group(0);
266
			nameToBeFilled.setProtectedNameCache(false);  //TODO why is is true?
267
			parseFullName(nameToBeFilled, fullNameString, rank, false);
268
			String sure = nameToBeFilled.getNameCache();
269
			start = sure.length();
270
		}
271
		
272
//		String localSimpleName = getLocalSimpleName(nameToBeFilled);
273
//		Matcher simpleNameMatcher = getMatcher (start + localSimpleName, fullReferenceString);
274
//		if (simpleNameMatcher.find()){
275
//			String simpleNameString = simpleNameMatcher.group(0);
276
//			parseFullName(nameToBeFilled, simpleNameString, rank, false);
277
//			start = simpleNameString.length();
278
//		}
279
		
280
		//don't parse if name can't be separated
281
		nameToBeFilled.setHasProblem(true);
282
		nameToBeFilled.setTitleCache(fullReferenceString);
283
		nameToBeFilled.setFullTitleCache(fullReferenceString);
284
		// FIXME Quick fix, otherwise search would not deilver results for unparsable names
285
		nameToBeFilled.setNameCache(fullReferenceString);
286
		// END
287
		nameToBeFilled.setProblemStarts(start);
288
		nameToBeFilled.setProblemEnds(fullReferenceString.length());
289
		logger.info("no applicable parsing rule could be found for \"" + fullReferenceString + "\"");    
290
	}
291
	
292
	private void makeNameWithReference(NonViralName nameToBeFilled, 
293
			String fullReferenceString, 
294
			Matcher nameAndRefSeparatorMatcher,
295
			Rank rank,
296
			boolean makeEmpty){
297
		
298
		String nameAndSeparator = nameAndRefSeparatorMatcher.group(0); 
299
	    String name = nameAndRefSeparatorMatcher.group(1); 
300
	    String referenceString = fullReferenceString.substring(nameAndRefSeparatorMatcher.end());
301
	    
302
	    // is reference an in ref?
303
	    String separator = nameAndSeparator.substring(name.length());
304
		boolean isInReference = separator.matches(inReferenceSeparator);
305
	    
306
	    //parse subparts
307
	    
308
		int oldProblemEnds = nameToBeFilled.getProblemEnds();
309
		parseFullName(nameToBeFilled, name, rank, makeEmpty);
310
	    nameToBeFilled.setProblemEnds(oldProblemEnds);
311
		parseReference(nameToBeFilled, referenceString, isInReference); 
312
	    INomenclaturalReference ref = (INomenclaturalReference)nameToBeFilled.getNomenclaturalReference();
313

    
314
	    //problem start
315
	    int start = nameToBeFilled.getProblemStarts();
316
	    int nameLength = name.length();
317
	    int nameAndSeparatorLength = nameAndSeparator.length();
318
	    int fullRefLength = nameToBeFilled.getFullTitleCache().length();
319
	    
320
	    if (nameToBeFilled.isProtectedTitleCache() || nameToBeFilled.getRank() == null ){
321
	    	start = Math.max(0, start);
322
		}else{
323
			if (ref != null && ref.getHasProblem()){
324
				start = Math.max(nameAndSeparatorLength, start);
325
		    	//TODO search within ref
326
			}	
327
		}
328
	    
329
	    //end
330
	    int end = nameToBeFilled.getProblemEnds();
331
	    
332
	    if (ref != null && ref.getHasProblem()){
333
	    	end = Math.min(nameAndSeparatorLength + ref.getProblemEnds(), end);
334
	    }else{
335
	    	if (nameToBeFilled.isProtectedTitleCache() ){
336
	    		end = Math.min(end, nameAndSeparatorLength);
337
	    		//TODO search within name
338
			}
339
	    }
340
	    nameToBeFilled.setProblemStarts(start);
341
	    nameToBeFilled.setProblemEnds(end);
342

    
343
	    //delegate has problem to name
344
	    if (ref != null && ref.getHasProblem()){
345
	    	nameToBeFilled.setHasProblem(true);
346
	    }
347
	    
348
	    ReferenceBase<?> nomRef;
349
		if ( (nomRef = nameToBeFilled.getNomenclaturalReference()) != null ){
350
			nomRef.setAuthorTeam((TeamOrPersonBase)nameToBeFilled.getCombinationAuthorTeam());
351
		}
352
	}
353
	
354
	//TODO make it an Array of status
355
	/**
356
	 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
357
	 * The nomenclatural status part ist deleted from the reference String.
358
	 * @return  String the new (shortend) reference String 
359
	 */ 
360
	private String parseNomStatus(String fullString, NonViralName nameToBeFilled) {
361
		String statusString;
362
		Pattern hasStatusPattern = Pattern.compile("(" + pNomStatusPhrase + ")"); 
363
		Matcher hasStatusMatcher = hasStatusPattern.matcher(fullString);
364
		
365
		if (hasStatusMatcher.find()) {
366
			String statusPhrase = hasStatusMatcher.group(0);
367
			
368
			Pattern statusPattern = Pattern.compile(pNomStatus);
369
			Matcher statusMatcher = statusPattern.matcher(statusPhrase);
370
			statusMatcher.find();
371
			statusString = statusMatcher.group(0);
372
			try {
373
				NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(statusString);
374
				NomenclaturalStatus nomStatus = NomenclaturalStatus.NewInstance(nomStatusType);
375
				nameToBeFilled.addStatus(nomStatus);
376
			    
377
				fullString = fullString.replace(statusPhrase, "");
378
			} catch (UnknownCdmTypeException e) {
379
				//Do nothing
380
			}
381
		}
382
		return fullString;
383
	}
384
	
385
	
386
	private void parseReference(NonViralName nameToBeFilled, String strReference, boolean isInReference){
387
		INomenclaturalReference ref;
388
		String originalStrReference = strReference;
389
		
390
		//End (just delete end (e.g. '.', may be ambigous for yearPhrase, but no real information gets lost
391
		Matcher endMatcher = getMatcher(referenceEnd + end, strReference);
392
		if (endMatcher.find()){
393
			String endPart = endMatcher.group(0);
394
			strReference = strReference.substring(0, strReference.length() - endPart.length());
395
		}
396
		
397
		String pDetailYear = ".*" + detailSeparator + detail + fWs + yearSeperator + fWs + yearPhrase + fWs + end;
398
		Matcher detailYearMatcher = getMatcher(pDetailYear, strReference);
399
		
400
		//if (referencePattern.matcher(reference).matches() ){
401
		if (detailYearMatcher.matches() ){
402
			
403
			//year
404
			String yearPart = null;
405
			String pYearPhrase = yearSeperator + fWs + yearPhrase + fWs + end;
406
			Matcher yearPhraseMatcher = getMatcher(pYearPhrase, strReference);
407
			if (yearPhraseMatcher.find()){
408
				yearPart = yearPhraseMatcher.group(0);
409
				strReference = strReference.substring(0, strReference.length() - yearPart.length());
410
				yearPart = yearPart.replaceFirst(pStart + yearSeperator, "").trim();
411
			}
412
			
413
			//detail
414
			String pDetailPhrase = detailSeparator + fWs + detail + fWs + end;
415
			Matcher detailPhraseMatcher = getMatcher(pDetailPhrase, strReference);
416
			if (detailPhraseMatcher.find()){
417
				String detailPart = detailPhraseMatcher.group(0);
418
				strReference = strReference.substring(0, strReference.length() - detailPart.length());
419
				detailPart = detailPart.replaceFirst(pStart + detailSeparator, "").trim();
420
				nameToBeFilled.setNomenclaturalMicroReference(detailPart);
421
			}
422
			//parse title and author
423
			ref = parseReferenceTitle(strReference, yearPart, isInReference);
424
			if (ref.hasProblem()){
425
				ref.setTitleCache( (isInReference?"in ":"") +  originalStrReference);
426
			}
427
			nameToBeFilled.setNomenclaturalReference((ReferenceBase)ref);
428
			int end = Math.min(strReference.length(), ref.getProblemEnds());
429
			ref.setProblemEnds(end);
430
	    }else{  //detail and year not parsable
431
	    	ref = Generic.NewInstance();
432
	    	ref.setTitleCache(strReference);
433
	    	ref.setProblemEnds(strReference.length());
434
	    	ref.setHasProblem(true);
435
	    	nameToBeFilled.setHasProblem(true);
436
	    	nameToBeFilled.setNomenclaturalReference((ReferenceBase)ref);
437
	    }
438
	}
439
		
440
	/**
441
	 * Parses the referenceTitlePart, including the author volume and edition.
442
	 * @param reference
443
	 * @param year
444
	 * @return
445
	 */
446
	private INomenclaturalReference parseReferenceTitle(String strReference, String year, boolean isInReference){
447
		INomenclaturalReference result = null;
448
		
449
		Matcher refSineDetailMatcher = referenceSineDetailPattern.matcher(strReference);
450
		if (! refSineDetailMatcher.matches()){
451
			//TODO ?
452
		}
453
		
454
		Matcher articleMatcher = getMatcher(pArticleReference, strReference);
455
		
456
		Matcher softArticleMatcher = getMatcher(pSoftArticleReference, strReference);
457
		Matcher bookMatcher = getMatcher(pBookReference, strReference);
458
		Matcher bookSectionMatcher = getMatcher(pBookSectionReference, strReference);
459
		
460
		
461
		if(isInReference == false){
462
			if (bookMatcher.matches() ){
463
				result = parseBook(strReference);
464
			}else{
465
				logger.warn("Non-InRef must be book but does not match book");
466
				result = Book.NewInstance();
467
				makeUnparsableRefTitle(result, strReference);
468
			}
469
		}else{  //inRef
470
			if (articleMatcher.matches()){
471
				//article without separators like ","
472
				result = parseArticle(strReference);
473
			}else if (softArticleMatcher.matches()){
474
				result = parseArticle(strReference);
475
			}else if (bookSectionMatcher.matches()){
476
				result = parseBookSection(strReference);
477
			}else{
478
				result =  Generic.NewInstance();
479
				makeUnparsableRefTitle(result, "in " + strReference);
480
			}
481
		}
482
		//make year
483
		if (makeYear(result, year) == false){
484
			//TODO
485
			logger.warn("Year could not be parsed");
486
		}
487
		result.setProblemStarts(0);
488
		result.setProblemEnds(strReference.length());
489
		return result;
490
	}
491
	
492
	private void makeUnparsableRefTitle(INomenclaturalReference result, String reference){
493
		result.setTitleCache(reference);
494
		result.setHasProblem(true);
495
	}
496
	
497
	/**
498
	 * Parses a single date string. If the string is not parsable a StringNotParsableException is thrown
499
	 * @param singleDateString
500
	 * @return
501
	 * @throws StringNotParsableException
502
	 */
503
	private static Partial parseSingleDate(String singleDateString) 
504
			throws StringNotParsableException{
505
		Partial dt = new Partial();
506
		if (CdmUtils.isNumeric(singleDateString)){
507
			try {
508
				Integer year = Integer.valueOf(singleDateString.trim());
509
				if (year > 1750 && year < 2050){
510
					dt = dt.with(DateTimeFieldType.year(), year);
511
				}else{
512
					dt = null;
513
				}
514
			} catch (NumberFormatException e) {
515
				logger.debug("Not a Integer format in getCalendar()");
516
				throw new StringNotParsableException(singleDateString + "is not parsable as a single Date");
517
			}
518
		}
519
		return dt;
520
	}
521

    
522
	
523
	/**
524
	 * Parses the publication date part. 
525
	 * @param nomRef
526
	 * @param year
527
	 * @return If the string is not parsable <code>false</code>
528
	 * is returned. <code>True</code> otherwise
529
	 */
530
	private boolean makeYear(INomenclaturalReference nomRef, String year){
531
		boolean result = true;
532
		if (year == null){
533
			return false;
534
		}
535
		if ("".equals(year.trim())){
536
			return true;
537
		}
538
		String[] years = year.split("-");
539
		Partial startDate = null;
540
		Partial endDate = null;
541
		try{
542
			if (years.length < 1){
543
				throw new StringNotParsableException();
544
			}else {
545
				startDate = parseSingleDate(years[0]);
546
				if (years.length > 1){
547
					endDate = parseSingleDate(years[1]);
548
					if (years.length > 2){
549
						throw new StringNotParsableException();
550
					}
551
				}
552
			}
553
		}catch(StringNotParsableException npe){
554
			result = false;
555
		}
556
		TimePeriod datePublished = TimePeriod.NewInstance(startDate, endDate);
557
		
558
		if (nomRef instanceof BookSection){
559
			handleBookSectionYear((BookSection)nomRef, datePublished);
560
		}else if (nomRef instanceof StrictReferenceBase){
561
			((StrictReferenceBase)nomRef).setDatePublished(datePublished);	
562
		}else if (nomRef instanceof BibtexReference){
563
				((BibtexReference)nomRef).setDatePublished(datePublished);
564
				((BibtexReference)nomRef).setYear(year);
565
		}else{
566
			throw new ClassCastException("nom Ref is not of type StrictReferenceBase but " + (nomRef == null? "(null)" : nomRef.getClass()));
567
		}
568
		return result;	
569
	}
570
	
571
	private String makeVolume(IVolumeReference nomRef, String strReference){
572
		//volume
573
		String volPart = null;
574
		String pVolPhrase = volumeSeparator +  volume + end;
575
		Matcher volPhraseMatcher = getMatcher(pVolPhrase, strReference);
576
		if (volPhraseMatcher.find()){
577
			volPart = volPhraseMatcher.group(0);
578
			strReference = strReference.substring(0, strReference.length() - volPart.length());
579
			volPart = volPart.replaceFirst(pStart + volumeSeparator, "").trim();
580
			nomRef.setVolume(volPart);
581
		}
582
		return strReference;
583
	}
584
	
585
	private String makeEdition(Book book, String strReference){
586
		//volume
587
		String editionPart = null;
588
		Matcher editionPhraseMatcher = getMatcher(pEditionPart, strReference);
589
		
590
		Matcher editionVolumeMatcher = getMatcher(pEditionVolPart, strReference);
591
		boolean isEditionAndVol = editionVolumeMatcher.find();
592
		
593
		if (editionPhraseMatcher.find()){
594
			editionPart = editionPhraseMatcher.group(0);
595
			int pos = strReference.indexOf(editionPart);
596
			int posEnd = pos + editionPart.length();
597
			if (isEditionAndVol){
598
				posEnd++;  //delete also comma
599
			}
600
			strReference = strReference.substring(0, pos) + strReference.substring(posEnd);
601
			editionPart = editionPart.replaceFirst(pStart + editionSeparator, "").trim();
602
			book.setEdition(editionPart);
603
		}
604
		return strReference;
605
	}
606
	
607
	private Book parseBook(String reference){
608
		Book result = Book.NewInstance();
609
		reference = makeEdition(result, reference);
610
		reference = makeVolume(result, reference);
611
		result.setTitle(reference);
612
		return result;
613
	}
614
	
615
	
616
	private Article parseArticle(String reference){
617
		//if (articlePatter)
618
		//(type, author, title, volume, editor, series;
619
		Article result = Article.NewInstance();
620
		reference = makeVolume(result, reference);
621
		Journal inJournal = Journal.NewInstance();
622
		inJournal.setTitle(reference);
623
		result.setInJournal(inJournal);
624
		return result;
625
	}
626
	
627
	private BookSection parseBookSection(String reference){
628
		BookSection result = BookSection.NewInstance();
629
		String[] parts = reference.split(referenceAuthorSeparator, 2);
630
		if (parts.length != 2){
631
			logger.warn("Unexpected number of parts");
632
			result.setTitleCache(reference);
633
		}else{
634
			String authorString = parts[0];
635
			String bookString = parts[1];
636
			
637
			TeamOrPersonBase<?> authorTeam = author(authorString);
638
			Book inBook = parseBook(bookString);
639
			inBook.setAuthorTeam(authorTeam);
640
			result.setInBook(inBook);
641
		}
642
		return result;
643
	}
644
	
645
	/**
646
	 * If the publication date of a book section and it's inBook do differ this is usually 
647
	 * caused by the fact that a book has been published during a period, because originally 
648
	 * it consisted of several parts that only later where put together to one book.
649
	 * If so, the book section's publication date may be a point in time (year or month of year)
650
	 * whereas the books publication date may be a period of several years.
651
	 * Therefore a valid nomenclatural reference string should use the book sections 
652
	 * publication date rather then the book's publication date.<BR>
653
	 * This method in general adds the publication date to the book section.
654
	 * An exception exists if the publication date is a period. Then the parser
655
	 * assumes that the nomenclatural reference string does not follow the above rule but
656
	 * the books publication date is set.
657
	 * @param bookSection
658
	 * @param datePublished
659
	 */
660
	private void handleBookSectionYear(BookSection bookSection, TimePeriod datePublished){
661
		if (datePublished == null || datePublished.getStart() == null || bookSection == null){
662
			return;
663
		}
664
		if (datePublished.isPeriod() && bookSection.getInBook() != null){
665
			bookSection.getInBook().setDatePublished(datePublished);
666
		}else{
667
			bookSection.setDatePublished(datePublished);	
668
		}
669
	}
670
	
671
	
672
	/* (non-Javadoc)
673
	 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseFullName(java.lang.String)
674
	 */
675
	public NonViralName parseFullName(String fullNameString){
676
		return parseFullName(fullNameString, null, null);
677
	}
678
	
679
	
680
	/* (non-Javadoc)
681
	 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
682
	 */
683
	public NonViralName parseFullName(String fullNameString, NomenclaturalCode nomCode, Rank rank) {
684
		if (fullNameString == null){
685
			return null;
686
		}else{
687
			NonViralName result = getNonViralNameInstance(fullNameString, nomCode, rank);
688
			parseFullName(result, fullNameString, rank, false);
689
			return result;
690
		}
691
	}
692
		
693
	
694
	public void parseFullName(NonViralName nameToBeFilled, String fullNameString, Rank rank, boolean makeEmpty) {
695
		//TODO prol. etc.
696
		
697
		if (nameToBeFilled == null){
698
			logger.warn("name is null!");
699
		}
700
		String authorString = null;
701
		
702
		if (fullNameString == null){
703
			return;
704
		}
705
		if (makeEmpty){
706
			makeEmpty(nameToBeFilled);
707
		}
708
		fullNameString.replaceAll(oWs , " ");
709
		//TODO 
710
		// OLD: fullName = oWsRE.subst(fullName, " "); //substitute multiple whitespaces		   
711
		fullNameString = fullNameString.trim();
712
		
713
		String[] epi = pattern.split(fullNameString);
714
		try {
715
	    	//cultivars //TODO 2 implement cultivars
716
//		    if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
717
//		    	result = parseCultivar(fullName);
718
//		    }
719
		    //hybrids //TODO 2 implement hybrids
720
		    //else 
721
		    if (hybridPattern.matcher(fullNameString).matches() ){
722
		    	nameToBeFilled = parseHybrid(fullNameString);
723
		    }
724
		    else if (genusOrSupraGenusPattern.matcher(fullNameString).matches()){
725
		    	//supraGeneric
726
				if (rank != null && (rank.isSupraGeneric()|| rank.isGenus())){
727
					nameToBeFilled.setRank(rank);
728
					nameToBeFilled.setGenusOrUninomial(epi[0]);
729
				} 
730
				//genus
731
				else {
732
					rank = null;
733
					nameToBeFilled.setRank(rank);
734
					nameToBeFilled.setGenusOrUninomial(epi[0]);
735
					nameToBeFilled.setHasProblem(true);
736
					nameToBeFilled.setProblemStarts(0);
737
					nameToBeFilled.setProblemEnds(epi[0].length());
738
				}
739
				authorString = fullNameString.substring(epi[0].length());
740
			}
741
			//infra genus
742
			else if (infraGenusPattern.matcher(fullNameString).matches()){
743
				nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[1]));
744
				nameToBeFilled.setGenusOrUninomial(epi[0]);
745
				nameToBeFilled.setInfraGenericEpithet(epi[2]);
746
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length()+ 1 + epi[2].length());
747
			}
748
			//aggr. or group
749
			else if (aggrOrGroupPattern.matcher(fullNameString).matches()){
750
				nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[2]));
751
				nameToBeFilled.setGenusOrUninomial(epi[0]);
752
				nameToBeFilled.setSpecificEpithet(epi[1]);
753
			}
754
			//species
755
			else if (speciesPattern.matcher(fullNameString).matches()){
756
				nameToBeFilled.setRank(Rank.SPECIES());
757
				nameToBeFilled.setGenusOrUninomial(epi[0]);
758
				nameToBeFilled.setSpecificEpithet(epi[1]);
759
				authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length());
760
			}
761
			//autonym
762
			else if (autonymPattern.matcher(fullNameString).matches()){
763
				nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[epi.length - 2]));
764
				nameToBeFilled.setGenusOrUninomial(epi[0]);
765
				nameToBeFilled.setSpecificEpithet(epi[1]);
766
				nameToBeFilled.setInfraSpecificEpithet(epi[epi.length - 1]);
767
				int lenSpecies = 2 + epi[0].length()+epi[1].length();
768
				int lenInfraSpecies =  2 + epi[epi.length - 2].length() + epi[epi.length - 1].length();
769
				authorString = fullNameString.substring(lenSpecies, fullNameString.length() - lenInfraSpecies);
770
			}
771
			//infraSpecies
772
			else if (infraSpeciesPattern.matcher(fullNameString).matches()){
773
				String infraSpecRankEpi = epi[2];
774
				String infraSpecEpi = epi[3];
775
				if ("tax.".equals(infraSpecRankEpi)){
776
					infraSpecRankEpi += " " +  epi[3];
777
					infraSpecEpi = epi[4];
778
				}
779
				nameToBeFilled.setRank(Rank.getRankByAbbreviation(infraSpecRankEpi));
780
				nameToBeFilled.setGenusOrUninomial(epi[0]);
781
				nameToBeFilled.setSpecificEpithet(epi[1]);
782
				nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
783
				authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecRankEpi.length() + 1 + infraSpecEpi.length());
784
			}//old infraSpecies
785
			else if (oldInfraSpeciesPattern.matcher(fullNameString).matches()){
786
				boolean implemented = false;
787
				if (implemented){
788
					nameToBeFilled.setRank(Rank.getRankByNameOrAbbreviation(epi[2]));
789
					nameToBeFilled.setGenusOrUninomial(epi[0]);
790
					nameToBeFilled.setSpecificEpithet(epi[1]);
791
					//TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
792
					authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + epi[2].length() + 1 + epi[3].length());
793
				}else{
794
					nameToBeFilled.setHasProblem(true);
795
					nameToBeFilled.setTitleCache(fullNameString);
796
					// FIXME Quick fix, otherwise search would not deilver results for unparsable names
797
					nameToBeFilled.setNameCache(fullNameString);
798
					// END
799
					logger.info("Name string " + fullNameString + " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
800
				}
801
			}
802
			//none
803
			else{ 
804
				nameToBeFilled.setHasProblem(true);
805
				nameToBeFilled.setTitleCache(fullNameString);
806
				// FIXME Quick fix, otherwise search would not deilver results for unparsable names
807
				nameToBeFilled.setNameCache(fullNameString);
808
				// END
809
				logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
810
		    }
811
			//authors
812
		    if (nameToBeFilled != null && authorString != null && authorString.trim().length() > 0 ){ 
813
				TeamOrPersonBase<?>[] authors = new TeamOrPersonBase[4];
814
				Integer[] years = new Integer[4];
815
				try {
816
					Class<? extends NonViralName> clazz = nameToBeFilled.getClass();
817
					fullAuthors(authorString, authors, years, clazz);
818
				} catch (StringNotParsableException e) {
819
					nameToBeFilled.setHasProblem(true);
820
					nameToBeFilled.setTitleCache(fullNameString);
821
					// FIXME Quick fix, otherwise search would not deilver results for unparsable names
822
					nameToBeFilled.setNameCache(fullNameString);
823
					// END
824
					logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");;
825
				}
826
				nameToBeFilled.setCombinationAuthorTeam(authors[0]);
827
				nameToBeFilled.setExCombinationAuthorTeam(authors[1]);
828
				nameToBeFilled.setBasionymAuthorTeam(authors[2]);
829
				nameToBeFilled.setExBasionymAuthorTeam(authors[3]);
830
				if (nameToBeFilled instanceof ZoologicalName){
831
					ZoologicalName zooName = (ZoologicalName)nameToBeFilled;
832
					zooName.setPublicationYear(years[0]);
833
					zooName.setOriginalPublicationYear(years[2]);
834
				}
835
			}	
836
			//return
837
			if (nameToBeFilled != null){
838
		    	//return(BotanicalName)result;
839
				return;
840
			}
841
		} catch (UnknownCdmTypeException e) {
842
			nameToBeFilled.setHasProblem(true);
843
			nameToBeFilled.setTitleCache(fullNameString);
844
			// FIXME Quick fix, otherwise search would not deilver results for unparsable names
845
			nameToBeFilled.setNameCache(fullNameString);
846
			// END
847
			logger.info("unknown rank (" + (rank == null? "null":rank) + ") or abbreviation in string " +  fullNameString);
848
			//return result;
849
			return;
850
		}
851
	}
852

    
853
	
854
	
855
	/**
856
	 * Parses the fullAuthorString
857
	 * @param fullAuthorString
858
	 * @return array of Teams containing the Team[0], 
859
	 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
860
	 */
861
	protected void fullAuthors (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years, Class<? extends NonViralName> clazz)
862
			throws StringNotParsableException{
863
		fullAuthorString = fullAuthorString.trim();
864
		if (fullAuthorString == null || clazz == null){
865
			return;
866
		}
867
		//Botanic
868
		if ( BotanicalName.class.isAssignableFrom(clazz) ){
869
			if (! fullBotanicAuthorStringPattern.matcher(fullAuthorString).matches() ){
870
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
871
			}
872
		}
873
		//Zoo
874
		else if ( ZoologicalName.class.isAssignableFrom(clazz) ){
875
			if (! fullZooAuthorStringPattern.matcher(fullAuthorString).matches() ){
876
				throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
877
			}
878
		}else {
879
			//TODO
880
			logger.warn ("not yet implemented");
881
			throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
882
		}
883
		fullAuthorsChecked(fullAuthorString, authors, years);
884
	}
885
	
886
	/*
887
	 * like fullTeams but without trim and match check
888
	 */
889
	protected void fullAuthorsChecked (String fullAuthorString, TeamOrPersonBase<?>[] authors, Integer[] years){
890
		int authorTeamStart = 0;
891
		Matcher basionymMatcher = basionymPattern.matcher(fullAuthorString);
892
		
893
		if (basionymMatcher.find(0)){
894
			
895
			String basString = basionymMatcher.group();
896
			basString = basString.replaceFirst(basStart, "");
897
			basString = basString.replaceAll(basEnd, "").trim();
898
			authorTeamStart = basionymMatcher.end(1) + 1;
899
			
900
			TeamOrPersonBase<?>[] basAuthors = new TeamOrPersonBase[2];
901
			Integer[] basYears = new Integer[2];
902
			authorsAndEx(basString, basAuthors, basYears);
903
			authors[2]= basAuthors[0];
904
			years[2] = basYears[0];
905
			authors[3]= basAuthors[1];
906
			years[3] = basYears[1];
907
		}
908
		if (fullAuthorString.length() >= authorTeamStart){
909
			TeamOrPersonBase<?>[] combinationAuthors = new TeamOrPersonBase[2];;
910
			Integer[] combinationYears = new Integer[2];
911
			authorsAndEx(fullAuthorString.substring(authorTeamStart), combinationAuthors, combinationYears);
912
			authors[0]= combinationAuthors[0] ;
913
			years[0] = combinationYears[0];
914
			authors[1]= combinationAuthors[1];
915
			years[1] = combinationYears[1];
916
		}
917
	}
918
	
919
	
920
	/**
921
	 * Parses the author and ex-author String
922
	 * @param authorTeamString String representing the author and the ex-author team
923
	 * @return array of Teams containing the Team[0] and the ExTeam[1]
924
	 */
925
	protected void authorsAndEx (String authorTeamString, TeamOrPersonBase<?>[] authors, Integer[] years){
926
		//TODO noch allgemeiner am anfang durch Replace etc. 
927
		authorTeamString = authorTeamString.trim();
928
		authorTeamString = authorTeamString.replaceFirst(oWs + "ex" + oWs, " ex. " ); 
929
		int authorEnd = authorTeamString.length();
930
		
931
		Matcher exAuthorMatcher = exAuthorPattern.matcher(authorTeamString);
932
		if (exAuthorMatcher.find(0)){
933
			int exAuthorBegin = exAuthorMatcher.end(0);
934
			String exString = authorTeamString.substring(exAuthorBegin).trim();
935
			authorEnd = exAuthorMatcher.start(0);
936
			authors [1] = author(exString);
937
		}
938
		zooOrBotanicAuthor(authorTeamString.substring(0, authorEnd), authors, years );
939
	}
940
	
941
	/**
942
	 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
943
	 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies 
944
	 * to the year in case of an zoological name. 
945
	 * @param authorString
946
	 * @param team
947
	 * @param year
948
	 */
949
	protected void zooOrBotanicAuthor(String authorString, TeamOrPersonBase<?>[] team, Integer[] year){
950
		if (authorString == null){ 
951
			return;
952
		}else if ((authorString = authorString.trim()).length() == 0){
953
			return;
954
		}
955
		Matcher zooAuthorAddidtionMatcher = zooAuthorAddidtionPattern.matcher(authorString);
956
		if (zooAuthorAddidtionMatcher.find()){
957
			int index = zooAuthorAddidtionMatcher.start(0); 
958
			String strYear = authorString.substring(index);
959
			strYear = strYear.replaceAll(zooAuthorYearSeperator, "").trim();
960
			year[0] = Integer.valueOf(strYear);
961
			authorString = authorString.substring(0, index).trim();
962
		}
963
		team[0] = author(authorString);
964
	}
965
	
966
	
967
	/**
968
	 * Parses an authorTeam String and returns the Team 
969
	 * !!! TODO (atomization not yet implemented)
970
	 * @param authorTeamString String representing the author team
971
	 * @return an Team 
972
	 */
973
	protected TeamOrPersonBase<?> author (String authorString){
974
		if (authorString == null){ 
975
			return null;
976
		}else if ((authorString = authorString.trim()).length() == 0){
977
			return null;
978
		}else if (! teamSplitterPattern.matcher(authorString).find()){
979
			//1 Person
980
			Person result = Person.NewInstance();
981
			result.setNomenclaturalTitle(authorString);
982
			return result;
983
		}else{
984
			return parsedTeam(authorString);
985
		} 
986
		
987
	}
988
	
989
	/**
990
	 * Parses an authorString (reprsenting a team into the single authors and add
991
	 * them to the return Team.
992
	 * @param authorString
993
	 * @return Team
994
	 */
995
	protected Team parsedTeam(String authorString){
996
		Team result = Team.NewInstance();
997
		String[] authors = authorString.split(teamSplitter);
998
		for (String author : authors){
999
			Person person = Person.NewInstance();
1000
			person.setNomenclaturalTitle(author);
1001
			result.addTeamMember(person); 
1002
		}
1003
		return result;
1004
	}
1005
	
1006

    
1007
	//Parsing of the given full name that has been identified as hybrid already somewhere else.
1008
	private BotanicalName parseHybrid(String fullName){
1009
	    logger.warn("parseHybrid --> function not yet implemented");
1010
	    BotanicalName result = BotanicalName.NewInstance(null);
1011
	    result.setTitleCache(fullName);
1012
	    return result;
1013
    }
1014
	
1015
//	// Parsing of the given full name that has been identified as a cultivar already somwhere else.
1016
//	// The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
1017
	public BotanicalName parseCultivar(String fullName)	throws StringNotParsableException{
1018
		CultivarPlantName result = null;
1019
		    String[] words = oWsPattern.split(fullName);
1020
			
1021
		    /* ---------------------------------------------------------------------------------
1022
		     * cultivar
1023
		     * ---------------------------------------------------------------------------------*/
1024
			if (fullName.indexOf(" '") != 0){
1025
				//TODO location of 'xx' is probably not arbitrary
1026
				Matcher cultivarMatcher = cultivarPattern.matcher(fullName);
1027
				if (cultivarMatcher.find()){
1028
					String namePart = fullName.replaceFirst(cultivar, "");
1029
					
1030
					String cultivarPart = cultivarMatcher.group(0).replace("'","").trim();
1031
					//OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
1032
					
1033
					result = (CultivarPlantName)parseFullName(namePart);
1034
					result.setCultivarName(cultivarPart);
1035
				}	
1036
			}else if (fullName.indexOf(" cv.") != 0){
1037
				// cv. is old form (not official) 
1038
				throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
1039
			}
1040
				
1041
		    /* ---------------------------------------------------------------------------------
1042
		     * cultivar group
1043
		     * ---------------------------------------------------------------------------------
1044
		     */ 
1045
			// TODO in work 
1046
			//Ann. this is not the official way of noting cultivar groups
1047
		    String group = oWs + "Group" + oWs + capitalEpiWord + end;
1048
			Pattern groupRE = Pattern.compile(group);
1049
			Matcher groupMatcher = groupRE.matcher(fullName);
1050
			if (groupMatcher.find()){
1051
		    	if (! words[words.length - 2].equals("group")){
1052
		            throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName + " should be 'group'");
1053
		        }else{
1054
		        	
1055
		        	String namePart = fullName.substring(0, groupMatcher.start(0) - 0);
1056
		        	//OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
1057
		        	
1058
		        	String cultivarPart = words[words.length -1];
1059
		        	result = (CultivarPlantName)parseFullName(namePart);
1060
		        	if (result != null){
1061
		        		result.setCultivarName(cultivarPart);
1062
			        	
1063
		        		//OLD: result.setCultivarGroupName(cultivarPart);
1064
		        	}
1065
		        }
1066

    
1067
		    }
1068
//		    // ---------------------------------------------------------------------------------
1069
//		    if ( result = "" ){
1070
//		        return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
1071
//		    }else{
1072
//		        return result;
1073
	//	    }
1074
			return result; //TODO
1075
	}
1076

    
1077
	
1078
	private void makeEmpty(NonViralName nameToBeFilled){
1079
		nameToBeFilled.setRank(null);
1080
		nameToBeFilled.setTitleCache(null, false);
1081
		nameToBeFilled.setFullTitleCache(null, false);
1082
		nameToBeFilled.setNameCache(null, false);
1083
				
1084
		nameToBeFilled.setAppendedPhrase(null);
1085
		//TODO ??
1086
		//nameToBeFilled.setBasionym(basionym);
1087
		nameToBeFilled.setBasionymAuthorTeam(null);
1088
		nameToBeFilled.setCombinationAuthorTeam(null);
1089
		nameToBeFilled.setExBasionymAuthorTeam(null);
1090
		nameToBeFilled.setExCombinationAuthorTeam(null);
1091
		nameToBeFilled.setAuthorshipCache(null, false);
1092
		
1093
		
1094
		nameToBeFilled.setHasProblem(false);
1095
		// TODO ?
1096
		//nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
1097

    
1098
		
1099
		nameToBeFilled.setGenusOrUninomial(null);
1100
		nameToBeFilled.setInfraGenericEpithet(null);
1101
		nameToBeFilled.setSpecificEpithet(null);
1102
		nameToBeFilled.setInfraSpecificEpithet(null);
1103
		
1104
		nameToBeFilled.setNomenclaturalMicroReference(null);
1105
		nameToBeFilled.setNomenclaturalReference(null);
1106
		
1107
		if (nameToBeFilled instanceof BotanicalName){
1108
			BotanicalName botanicalName = (BotanicalName)nameToBeFilled;
1109
			botanicalName.setAnamorphic(false);
1110
			botanicalName.setHybridFormula(false);
1111
			botanicalName.setMonomHybrid(false);
1112
			botanicalName.setBinomHybrid(false);
1113
			botanicalName.setTrinomHybrid(false);
1114
		}
1115
		
1116
		if (nameToBeFilled instanceof ZoologicalName){
1117
			ZoologicalName zoologicalName = (ZoologicalName)nameToBeFilled;
1118
			zoologicalName.setBreed(null);
1119
			zoologicalName.setOriginalPublicationYear(null);
1120
		}
1121
		
1122
		//TODO adapt to @Version of versionable entity, throws still optimistic locking error
1123
		//nameToBeFilled.setUpdated(Calendar.getInstance());
1124
		// TODO nameToBeFilled.setUpdatedBy(updatedBy);		
1125
	}
1126
	
1127
	
1128
    
1129
    //splitter
1130
    static String epiSplitter = "(\\s+|\\(|\\))"; //( ' '+| '(' | ')' )
1131
    static Pattern pattern = Pattern.compile(epiSplitter); 
1132
    
1133
    //some useful non-terminals
1134
    static String pStart = "^";
1135
    static String end = "$";
1136
    static String anyEnd = ".*" + end;
1137
    static String oWs = "\\s+"; //obligatory whitespaces
1138
    static String fWs = "\\s*"; //facultative whitespcace
1139
    
1140
    static String capitalWord = "\\p{javaUpperCase}\\p{javaLowerCase}*";
1141
    static String nonCapitalWord = "\\p{javaLowerCase}+";
1142
    static String word = "(" + capitalWord + "|" + nonCapitalWord + ")"; //word (capital or non-capital) with no '.' at the end
1143
    
1144
    
1145
    static String capitalDotWord = capitalWord + "\\.?"; //capitalWord with facultativ '.' at the end
1146
    static String nonCapitalDotWord = nonCapitalWord + "\\.?"; //nonCapitalWord with facultativ '.' at the end
1147
    static String dotWord = "(" + capitalWord + "|" + nonCapitalWord + ")\\.?"; //word (capital or non-capital) with facultativ '.' at the end
1148
    static String obligateDotWord = "(" + capitalWord + "|" + nonCapitalWord + ")\\.+"; //word (capital or non-capital) with obligate '.' at the end
1149
    
1150
    //Words used in an epethiton for a TaxonName
1151
    static String nonCapitalEpiWord = "[a-z\u00EF\\-]+";
1152
    static String capitalEpiWord = "[A-Z]"+ nonCapitalEpiWord;
1153
     
1154
    
1155
   //years
1156
    static String month = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)";
1157
    static String singleYear = "\\b" + "(?:17|18|19|20)" + "\\d{2}" + "\\b";                      // word boundary followed by either 17,18,19, or 20 (not captured) followed by 2 digits 	      
1158
    static String yearPhrase = singleYear + "("+ fWs + "-" + fWs + singleYear + ")?" ;
1159
    								//+ "(" + month + ")?)" ;                 // optional month
1160
    
1161
    //seperator
1162
    static String yearSeperator = "\\." + oWs;
1163
    static String detailSeparator = ":" + oWs;
1164
    static String referenceSeparator1 = "," + oWs ;
1165
    static String inReferenceSeparator = oWs + "in" + oWs;
1166
    static String referenceSeperator = "(" + referenceSeparator1 +"|" + inReferenceSeparator + ")" ;
1167
    static String referenceAuthorSeparator = ","+ oWs;
1168
    static String volumeSeparator = oWs ; // changed from "," + fWs
1169
    static String referenceEnd = "\\.";
1170
     
1171
    
1172
    //status
1173
    static String status = "";
1174
    
1175
    //marker
1176
    static String InfraGenusMarker = "(subgen.|subg.|sect.|subsect.|ser.|subser.|t.infgen.)";
1177
    static String aggrOrGroupMarker = "(aggr.|agg.|group)";
1178
    static String infraSpeciesMarker = "(subsp.|convar.|var.|subvar.|f.|subf.|f.spec.|tax." + fWs + "infrasp.)";
1179
    static String oldInfraSpeciesMarker = "(prol.|proles|race|taxon|sublusus)";
1180
    
1181
    
1182
    //AuthorString
1183
    static String authorPart = "(" + "(D'|L'|'t\\s)?" + capitalDotWord + "('" + nonCapitalDotWord + ")?" + "|da|de(n|l|\\sla)?)" ;
1184
    static String author = "(" + authorPart + "(" + fWs + "|-)" + ")+" + "(f.|fil.|secundus)?";
1185
    static String teamSplitter = fWs + "(&)" + fWs;
1186
    static String authorTeam = fWs + "(" + author + teamSplitter + ")*" + author + "(" + teamSplitter + "al.)?" + fWs;
1187
    static String exString = "(ex.?)";
1188
    static String authorAndExTeam = authorTeam + "(" + oWs + exString + oWs + authorTeam + ")?";
1189
    static String basStart = "\\(";
1190
    static String basEnd = "\\)";
1191
    static String botanicBasionymAuthor = basStart + "(" + authorAndExTeam + ")" + basEnd;  // '(' and ')' is for evaluation with RE.paren(x)
1192
    static String fullBotanicAuthorString = fWs + "((" + botanicBasionymAuthor +")?" + fWs + authorAndExTeam + "|" + botanicBasionymAuthor +")"+ fWs;
1193
    static String facultFullBotanicAuthorString = "(" +  fullBotanicAuthorString + ")?" ; 
1194
        
1195
    //Zoo. Author
1196
    //TODO does zoo author have ex-Author?
1197
    static String zooAuthorYearSeperator = ",";
1198
    static String zooAuthorAddidtion = fWs + zooAuthorYearSeperator + fWs + singleYear;
1199
    static String zooAuthorTeam = authorTeam + zooAuthorAddidtion;
1200
    static String zooBasionymAuthor = basStart + "(" + zooAuthorTeam + ")" + basEnd;
1201
    static String fullZooAuthorString = fWs + "((" + zooBasionymAuthor +")?" + fWs + zooAuthorTeam + "|" + zooBasionymAuthor +")"+ fWs;
1202
    static String facultFullZooAuthorString = "(" +  fullZooAuthorString + ")?" ; 
1203
 
1204
    static String facultFullAuthorString2 = "(" + facultFullBotanicAuthorString + "|" + facultFullZooAuthorString + ")";
1205
    
1206
    static String basionymAuthor = "(" + botanicBasionymAuthor + "|" + zooBasionymAuthor+ ")";
1207
    static String fullAuthorString = "(" + fullBotanicAuthorString + "|" + fullZooAuthorString+ ")";
1208
    
1209
    //details
1210
    //TODO still very simple
1211
    
1212
    
1213
    static String nr2 = "\\d{1,2}";
1214
    static String nr4 = "\\d{1,4}";
1215
    static String nr5 = "\\d{1,5}";
1216
    
1217
   
1218
    static String pPage = nr5 + "[a-z]?";
1219
    static String pStrNo = "n\u00B0" + fWs + "(" + nr4 + ")";
1220
    
1221
    static String pBracketNr = "\\[" + nr4 + "\\]";
1222
    static String pFolBracket = "\\[fol\\." + fWs + "\\d{1,2}(-\\d{1,2})?\\]";
1223
    
1224
    static String pStrTab = "tab\\." + fWs + nr4 + "(" + fWs + "(B|\u00DF|\\(\\d{1,3}\\)))?";
1225
    static String pFig = "fig." + fWs + nr4 + "[a-z]?";
1226
    static String pFigs = pFig + "(-" + nr4 + ")?";
1227
    //static String pTabFig = pStrTab + "(," + fWs + pFigs + ")?";
1228
    static String pTabFig = "(" + pStrTab + "|" + pFigs + ")";
1229
    
1230
    //e.g.: p455; p.455; pp455-456; pp.455-456; pp.455,456; 455, 456; pages 456-457; pages 456,567
1231
    static String pSinglePages = "(p\\.?)?" + fWs + pPage + "(," + pTabFig +")?";
1232
    static String pMultiPages = "(pp\\.?|pages)?" + fWs + pPage + fWs + "(-|,)" +fWs + pPage ;
1233
    //static String pPages = pPage + "(," + fWs + "(" + pPage + "|" + pTabFig + ")" + ")?";
1234
    static String pPages = "(" + pSinglePages +"|" + pMultiPages +")";
1235
    
1236
    
1237
    static String pCouv = "couv\\." + fWs + "\\d{1,3}";
1238
    
1239
    static String pTabSpecial = "tab\\." + fWs + "(ad" + fWs + "\\d{1,3}|alphab)";
1240
    static String pPageSpecial = nr4 + fWs + "(in obs|, Expl\\. Tab)";
1241
    static String pSpecialGardDict = capitalWord + oWs + "n\u00B0" + oWs + "\\d{1,2}";
1242
    //TODO
1243
    // static String pSpecialDetail = "(in err|in tab|sine pag|add\\. & emend|Emend|""\\d{3}"" \\[\\d{3}\\])";
1244
 // static String pSpecialDetail = "(in err|in tab|sine pag|add\\. & emend|Emend|""\\d{3}"" \\[\\d{3}\\])";
1245
    static String pSpecialDetail = "(in err|in tab|sine pag|add\\.)";
1246
    
1247
    
1248
//    Const romI = "[Ii]{0,3}"
1249
//    	Const romX = "[Xx]{0,3}"
1250
//    	Const romC = "[Cc]{0,3}"
1251
//    	Const romM = "[Mm]{0,3}"   
1252
//    ' roman numbers
1253
//    ' !! includes empty string: ""
1254
//    romOne = "([Vv]?" & romI & or_ & "(IV|iv)" & or_ & "(IX|ix)" & ")"
1255
//    romTen = "([Ll]?" & romX & or_ & "(XL|xl)" & or_ & "(XC|xc)" & ")"
1256
//    romHun = "([Dd]?" & romC & or_ & "(CD|cd)" & or_ & "(CM|cm)" & ")"
1257
//    romNr = "(?=[MDCLXVImdclxvi])(((" & romM & ")?" & romHun & ")?" & romTen & ")?" & romOne
1258
    static String pRomNr = "ljfweffaflas"; //TODO rom number have to be tested first
1259
    
1260
    static String pDetailAlternatives = "(" + pPages + "|" + pPageSpecial + "|" + pStrNo + "|" + pBracketNr +
1261
    			"|" + pTabFig + "|" + pTabSpecial + "|" + pFolBracket + "|" + pCouv + "|" + pRomNr + "|" + 
1262
    			pSpecialGardDict + "|" + pSpecialDetail + ")";
1263

    
1264
    static String detail = pDetailAlternatives;
1265
    
1266
    //reference
1267
    static String volume = nr4 + "(\\("+ nr4  + "\\))?"; 	      
1268
    static String anySepChar = "(," + fWs + ")";
1269
    
1270
    static int authorSeparatorMaxPosition = 4;
1271
    static String pTitleWordSeparator = "(\\."+ fWs+"|" + oWs + ")";
1272
    static String referenceTitleFirstPart = "(" + word + pTitleWordSeparator + ")";
1273
    static String referenceTitle = referenceTitleFirstPart + "*" + dotWord;
1274
    static String referenceTitleWithSepCharacters = "(" + referenceTitle  + anySepChar + "?)" + "{1,}";
1275
    static String referenceTitleWithoutAuthor = "(" + referenceTitleFirstPart + ")" + "{"+ (authorSeparatorMaxPosition -1) +",}" + dotWord + 
1276
    			anySepChar + referenceTitleWithSepCharacters;   //separators exist and first separator appears at position authorSeparatorMaxPosition or later
1277
   
1278
    static String editionSeparator = oWs + "ed\\.?" + oWs;
1279
    static String pEdition = nr2;
1280
    
1281
    static String pVolPart = volumeSeparator +  volume;
1282
    static String pEditionPart = editionSeparator +  pEdition;
1283
    static String pEditionVolPart = editionSeparator +  pEdition + fWs + "," + volumeSeparator +  volume;
1284
    static String pEditionVolAlternative = "(" + pEditionPart + "|" + pVolPart + "|" + pEditionVolPart + ")?";
1285
    
1286
    static String pVolRefTitle = referenceTitle + "(" + pVolPart + ")?";
1287
    static String softEditionVolRefTitle = referenceTitleWithSepCharacters + pEditionVolAlternative;
1288
    static String softVolNoAuthorRefTitle = referenceTitleWithoutAuthor + "(" + volumeSeparator +  volume + ")?";
1289
    
1290
    static String pBookReference = softEditionVolRefTitle;
1291
    static String pBookSectionReference = authorTeam + referenceAuthorSeparator + softEditionVolRefTitle;
1292
    static String pArticleReference = pVolRefTitle  ; 
1293
    static String pSoftArticleReference = softVolNoAuthorRefTitle  ; 
1294
    
1295
    
1296
    static String pReferenceSineDetail = "(" + pArticleReference + "|" + pBookSectionReference + "|" + pBookReference + ")"; 
1297
    
1298
    static String pReference = pReferenceSineDetail + detailSeparator + detail + 
1299
					yearSeperator + yearPhrase + "(" + referenceEnd + ")?"; 
1300

    
1301
    //static String strictBook = referenc 
1302
    
1303
    
1304
    
1305
    static Pattern referencePattern = Pattern.compile(pReference);
1306
    static Pattern referenceSineDetailPattern = Pattern.compile(pReferenceSineDetail);
1307
    
1308
    static String pNomStatusNom = "nom\\." + fWs + "(superfl\\.|nud\\.|illeg\\.|inval\\.|cons\\.|alternativ\\.|subnud.|"+
1309
    					"rej\\.|rej\\."+ fWs + "prop\\.|provis\\.)";
1310
    static String pNomStatusOrthVar = "orth\\." + fWs + "var\\.";
1311
    static String pNomStatus = "(" + pNomStatusNom + "|" + pNomStatusOrthVar +  ")";
1312
    static String pNomStatusPhrase1 = "," + fWs + pNomStatus;
1313
    static String pNomStatusPhrase2 = "\\[" + fWs + pNomStatus + "\\]";
1314
    
1315
    static String pNomStatusPhrase = "(?:" + pNomStatusPhrase1 + "|" + pNomStatusPhrase2 + ")";
1316

    
1317
// Soraya
1318
//opus utique oppr.
1319
//pro syn.
1320
//provisional synonym
1321
//fossil name
1322

    
1323
    
1324
    
1325
    //cultivars and hybrids
1326
    static String cultivar = oWs + "'..+'"; //Achtung mit Hochkomma in AuthorNamen
1327
    static String cultivarMarker = oWs + "(cv.|')";
1328
    static String hybrid = oWs + "((x|X)" + oWs + "|notho)";//= ( x )|( X )|( notho)
1329
    
1330
    //  Name String
1331
    static String genusOrSupraGenus = capitalEpiWord;
1332
    static String infraGenus = capitalEpiWord + oWs + InfraGenusMarker + oWs + capitalEpiWord;
1333
    static String aggrOrGroup = capitalEpiWord + oWs + nonCapitalEpiWord + oWs + aggrOrGroupMarker;
1334
    static String species = capitalEpiWord + oWs +  nonCapitalEpiWord;
1335
    static String infraSpecies = capitalEpiWord + oWs +  nonCapitalEpiWord + oWs + infraSpeciesMarker + oWs + nonCapitalEpiWord;
1336
    static String oldInfraSpecies = capitalEpiWord + oWs +  nonCapitalEpiWord + oWs + oldInfraSpeciesMarker + oWs + nonCapitalEpiWord;
1337
    static String autonym = capitalEpiWord + oWs + "(" + nonCapitalEpiWord +")" + oWs + fullBotanicAuthorString +  oWs + infraSpeciesMarker + oWs + "\\1";  //2-nd word and last word are the same 
1338

    
1339
    static String anyBotanicName = "(" + genusOrSupraGenus + "|" + infraGenus + "|" + aggrOrGroup + "|" + species + "|" + 
1340
					infraSpecies + "|" + infraSpecies + "|" + oldInfraSpecies + "|" + autonym   + ")+";
1341
    static String anyZooName = "(" + genusOrSupraGenus + "|" + infraGenus + "|" + aggrOrGroup + "|" + species + "|" + 
1342
					infraSpecies + "|" + infraSpecies + "|" + oldInfraSpecies + ")+";
1343
    static String anyBotanicFullName = anyBotanicName + oWs + fullBotanicAuthorString ;
1344
    static String anyZooFullName = anyZooName + oWs + fullZooAuthorString ;
1345
    static String anyFullName = "(" + anyBotanicFullName + "|" + anyZooFullName + ")";
1346
    
1347
    //Pattern
1348
    static Pattern oWsPattern = Pattern.compile(oWs);
1349
    static Pattern teamSplitterPattern = Pattern.compile(teamSplitter);
1350
    static Pattern cultivarPattern = Pattern.compile(cultivar);
1351
    static Pattern cultivarMarkerPattern = Pattern.compile(cultivarMarker);
1352
    static Pattern hybridPattern = Pattern.compile(hybrid); 
1353
    
1354
    static Pattern genusOrSupraGenusPattern = Pattern.compile(pStart + genusOrSupraGenus + facultFullAuthorString2 + end);
1355
    static Pattern infraGenusPattern = Pattern.compile(pStart + infraGenus + facultFullAuthorString2 + end);
1356
    static Pattern aggrOrGroupPattern = Pattern.compile(pStart + aggrOrGroup + fWs + end); //aggr. or group has no author string
1357
    static Pattern speciesPattern = Pattern.compile(pStart + species + facultFullAuthorString2 + end);
1358
    static Pattern infraSpeciesPattern = Pattern.compile(pStart + infraSpecies + facultFullAuthorString2 + end);
1359
    static Pattern oldInfraSpeciesPattern = Pattern.compile(pStart + oldInfraSpecies + facultFullAuthorString2 + end);
1360
    static Pattern autonymPattern = Pattern.compile(pStart + autonym + fWs + end);
1361
	
1362
    static Pattern botanicBasionymPattern = Pattern.compile(botanicBasionymAuthor);
1363
    static Pattern zooBasionymPattern = Pattern.compile(zooBasionymAuthor);
1364
    static Pattern basionymPattern = Pattern.compile(basionymAuthor);
1365
    
1366
    static Pattern zooAuthorPattern = Pattern.compile(zooAuthorTeam);
1367
    static Pattern zooAuthorAddidtionPattern = Pattern.compile(zooAuthorAddidtion);
1368
    
1369
    static Pattern exAuthorPattern = Pattern.compile(oWs + exString);
1370
    
1371
    static Pattern fullBotanicAuthorStringPattern = Pattern.compile(fullBotanicAuthorString);
1372
    static Pattern fullZooAuthorStringPattern = Pattern.compile(fullZooAuthorString);
1373
    static Pattern fullAuthorStringPattern = Pattern.compile(fullAuthorString);
1374
    
1375
    static Pattern anyBotanicFullNamePattern = Pattern.compile(anyBotanicFullName);
1376
    static Pattern anyZooFullNamePattern = Pattern.compile(anyZooFullName);
1377
    
1378
    
1379
}
(2-2/2)