183b1e66f59684f786dd51c16458afd3339a3a38
[cdmlib.git] / cdmlib-model / src / main / java / eu / etaxonomy / cdm / strategy / parser / NonViralNameParserImpl.java
1 /**
2 *
3 */
4 package eu.etaxonomy.cdm.strategy.parser;
5
6 import java.util.regex.Matcher;
7 import java.util.regex.Pattern;
8
9 import org.apache.log4j.Logger;
10
11 import eu.etaxonomy.cdm.model.agent.Person;
12 import eu.etaxonomy.cdm.model.agent.Team;
13 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
14 import eu.etaxonomy.cdm.model.name.BacterialName;
15 import eu.etaxonomy.cdm.model.name.BotanicalName;
16 import eu.etaxonomy.cdm.model.name.CultivarPlantName;
17 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
18 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
19 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
20 import eu.etaxonomy.cdm.model.name.NonViralName;
21 import eu.etaxonomy.cdm.model.name.Rank;
22 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
23 import eu.etaxonomy.cdm.model.name.ZoologicalName;
24 import eu.etaxonomy.cdm.model.reference.Article;
25 import eu.etaxonomy.cdm.model.reference.Book;
26 import eu.etaxonomy.cdm.model.reference.BookSection;
27 import eu.etaxonomy.cdm.model.reference.Generic;
28 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
29 import eu.etaxonomy.cdm.model.reference.StrictReferenceBase;
30 import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
31 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
32
33
34 /**
35 * @author a.mueller
36 *
37 */
38 public class NonViralNameParserImpl implements INonViralNameParser<NonViralName> {
39 private static final Logger logger = Logger.getLogger(NonViralNameParserImpl.class);
40
41 // good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
42
43 final static boolean MAKE_EMPTY = true;
44 final static boolean MAKE_NOT_EMPTY = false;
45
46
47 public static NonViralNameParserImpl NewInstance(){
48 return new NonViralNameParserImpl();
49 }
50
51 /* (non-Javadoc)
52 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSimpleName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
53 */
54 public NonViralName parseSimpleName(String simpleName, Rank rank){
55 //TODO
56 logger.warn("parseSimpleName() not yet implemented. Uses parseFullName() instead");
57 return parseFullName(simpleName, null, rank);
58 }
59
60
61 /* (non-Javadoc)
62 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericSimpleName(java.lang.String)
63 */
64 public NonViralName parseSimpleName(String simpleName){
65 return parseSimpleName(simpleName, null);
66 }
67
68 public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code){
69 return getNonViralNameInstance(fullString, code, null);
70 }
71
72 public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code, Rank rank){
73 NonViralName result = null;
74 if (code == null){
75 boolean isBotanicalName = anyBotanicFullNamePattern.matcher(fullString).find();
76 boolean isZoologicalName = anyZooFullNamePattern.matcher(fullString).find();;
77 boolean isBacteriologicalName = false;
78 boolean isCultivatedPlantName = false;
79 if ( (isBotanicalName || isCultivatedPlantName) && ! isZoologicalName && !isBacteriologicalName){
80 if (isBotanicalName){
81 result = BotanicalName.NewInstance(rank);
82 }else{
83 result = CultivarPlantName.NewInstance(rank);
84 }
85 }else if ( isZoologicalName /*&& ! isBotanicalName*/ && !isBacteriologicalName && !isCultivatedPlantName){
86 result = ZoologicalName.NewInstance(rank);
87 }else if ( isZoologicalName && ! isBotanicalName && !isBacteriologicalName && !isCultivatedPlantName){
88 result = BacterialName.NewInstance(rank);
89 }else {
90 result = NonViralName.NewInstance(rank);
91 }
92 }else if (code.equals(NomenclaturalCode.ICBN())){
93 result = BotanicalName.NewInstance(rank);
94 }else if (code.equals(NomenclaturalCode.ICZN())){
95 result = ZoologicalName.NewInstance(rank);
96 }else if (code.equals(NomenclaturalCode.ICNCP())){
97 logger.warn("ICNCP parsing not yet implemented");
98 result = CultivarPlantName.NewInstance(rank);
99 }else if (code.equals(NomenclaturalCode.BACTERIOLOGICAL())){
100 logger.warn("ICNCP not yet implemented");
101 result = BacterialName.NewInstance(rank);
102 }else if (code.equals(NomenclaturalCode.VIRAL())){
103 logger.error("Viral name is not a NonViralName !!");
104 }else{
105 logger.error("Unknown Nomenclatural Code !!");
106 }
107 return result;
108 }
109
110
111 /* (non-Javadoc)
112 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseFullReference(java.lang.String)
113 */
114 public NonViralName parseFullReference(String fullReferenceString) {
115 return parseFullReference(fullReferenceString, null, null);
116 }
117
118 /* (non-Javadoc)
119 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
120 */
121 public NonViralName parseFullReference(String fullReferenceString, NomenclaturalCode nomCode, Rank rank) {
122 if (fullReferenceString == null){
123 return null;
124 }else{
125 NonViralName result = getNonViralNameInstance(fullReferenceString, nomCode, rank);
126 parseFullReference(result, fullReferenceString, rank, MAKE_EMPTY);
127 return result;
128 }
129 }
130
131 /* (non-Javadoc)
132 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(eu.etaxonomy.cdm.model.name.BotanicalName, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, boolean)
133 */
134 public void parseFullReference(NonViralName nameToBeFilled, String fullReferenceString, Rank rank, boolean makeEmpty) {
135 if (fullReferenceString == null){
136 //return null;
137 return;
138 }
139 if (makeEmpty){
140 makeEmpty(nameToBeFilled);
141 }
142 fullReferenceString.replaceAll(oWs , " ");
143 fullReferenceString = fullReferenceString.trim();
144
145 String localFullName;
146 if (nameToBeFilled instanceof ZoologicalName){
147 localFullName = anyZooFullName;
148 }else{
149 localFullName = anyBotanicFullName;
150 }
151 //seperate name and reference part
152 String nameAndRefSeperator = "(^" + localFullName + ")("+ referenceSeperator + ")";
153 Pattern nameAndRefSeperatorPattern = Pattern.compile(nameAndRefSeperator);
154 Matcher nameAndRefSeperatorMatcher = nameAndRefSeperatorPattern.matcher(fullReferenceString);
155
156 if (nameAndRefSeperatorMatcher.find() ){
157 String nameAndSeperator = nameAndRefSeperatorMatcher.group(0);
158 String name = nameAndRefSeperatorMatcher.group(1);
159 String referenceString = fullReferenceString.substring(nameAndRefSeperatorMatcher.end());
160
161 // inRef?
162 String seperator = nameAndSeperator.substring(name.length());
163 boolean isInReference = false;
164 if (seperator.matches(inReferenceSeperator)){
165 isInReference = true;
166 }
167
168 //status
169 referenceString = parseNomStatus(referenceString, nameToBeFilled);
170
171 //parse subparts
172 parseFullName(nameToBeFilled, name, rank, makeEmpty);
173 parseReference(nameToBeFilled, referenceString, isInReference);
174 ReferenceBase ref = nameToBeFilled.getNomenclaturalReference();
175 if (ref != null && ref.getHasProblem()){
176 nameToBeFilled.setHasProblem(true);
177 }
178 }else{
179 //don't parse if name can't be seperated
180 nameToBeFilled.setHasProblem(true);
181 nameToBeFilled.setTitleCache(fullReferenceString);
182 logger.info("no applicable parsing rule could be found for \"" + fullReferenceString + "\"");
183 }
184 }
185
186 //TODO make it an Array of status
187 /**
188 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
189 * The nomenclatural status part ist deleted from the reference String.
190 * @return String the new (shortend) reference String
191 */
192 private String parseNomStatus(String reference, NonViralName nameToBeFilled) {
193 String statusString;
194 Pattern hasStatusPattern = Pattern.compile("(" + pNomStatusPhrase + ")");
195 Matcher hasStatusMatcher = hasStatusPattern.matcher(reference);
196
197 if (hasStatusMatcher.find()) {
198 String statusPhrase = hasStatusMatcher.group(0);
199
200 Pattern statusPattern = Pattern.compile(pNomStatus);
201 Matcher statusMatcher = statusPattern.matcher(statusPhrase);
202 statusMatcher.find();
203 statusString = statusMatcher.group(0);
204 try {
205 NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(statusString);
206 NomenclaturalStatus nomStatus = NomenclaturalStatus.NewInstance(nomStatusType);
207 nameToBeFilled.addStatus(nomStatus);
208
209 reference = reference.replace(statusPhrase, "");
210 } catch (UnknownCdmTypeException e) {
211 //Do nothing
212 }
213 }
214 return reference;
215 }
216
217
218 private void parseReference(NonViralName nameToBeFilled, String reference, boolean isInReference){
219
220 if (referencePattern.matcher(reference).matches() ){
221 //End (just delete, may be ambigous for yearPhrase, but no real information gets lost
222 Pattern endPattern = Pattern.compile( referenceEnd + end);
223 Matcher endMatcher = endPattern.matcher(reference);
224 if (endMatcher.find()){
225 String endPart = endMatcher.group(0);
226 reference = reference.substring(0, reference.length() - endPart.length());
227 }
228
229 //year
230 String yearPart = null;
231 String pYearPhrase = yearSeperator + yearPhrase + end;
232 Pattern yearPhrasePattern = Pattern.compile(pYearPhrase);
233 Matcher yearPhraseMatcher = yearPhrasePattern.matcher(reference);
234 if (yearPhraseMatcher.find()){
235 yearPart = yearPhraseMatcher.group(0);
236 reference = reference.substring(0, reference.length() - yearPart.length());
237 yearPart = yearPart.replaceFirst(start + yearSeperator, "").trim();
238 }
239
240 //detail
241 String pDetailPhrase = detailSeperator + detail + end;
242 Pattern detailPhrasePattern = Pattern.compile(pDetailPhrase);
243 Matcher detailPhraseMatcher = detailPhrasePattern.matcher(reference);
244 if (detailPhraseMatcher.find()){
245 String detailPart = detailPhraseMatcher.group(0);
246 reference = reference.substring(0, reference.length() - detailPart.length());
247 detailPart = detailPart.replaceFirst(start + detailSeperator, "").trim();
248 nameToBeFilled.setNomenclaturalMicroReference(detailPart);
249 }
250 //Title (and author)
251 parseReferenceTitle(reference, yearPart);
252 }else{
253 Generic ref = Generic.NewInstance();
254 ref.setTitleCache(reference);
255 ref.setHasProblem(true);
256 nameToBeFilled.setNomenclaturalReference(ref);
257 }
258
259 }
260
261 /**
262 * Parses the referenceTitlePart, including the author volume and edition.
263 * @param reference
264 * @param year
265 * @return
266 */
267 private ReferenceBase parseReferenceTitle(String reference, String year){
268 ReferenceBase result = null;
269 Pattern bookPattern = Pattern.compile(bookReference);
270 Pattern articlePattern = Pattern.compile(articleReference);
271 Pattern bookSectionPattern = Pattern.compile(bookSectionReference);
272
273
274 Matcher articleMatcher = articlePattern.matcher(reference);
275 Matcher bookMatcher = bookPattern.matcher(reference);
276 Matcher bookSectionMatcher = bookSectionPattern.matcher(reference);
277
278
279 if (articleMatcher.matches()){
280 //if (articlePatter)
281 //(type, author, title, volume, editor, series;
282 Article article = new Article();
283 article.setTitleCache(reference);
284 result = article;
285 }else if(bookMatcher.matches()){
286 Book book = new Book();
287 book .setTitleCache(reference);
288 result = book;
289 }else if (bookSectionMatcher.matches()){
290 BookSection bookSection = new BookSection();
291 bookSection.setTitleCache(reference);
292 result = bookSection;
293 }else{
294 logger.warn("unknown reference type not yet implemented");
295 //ReferenceBase refBase =
296 }
297 return result;
298 }
299
300
301 /* (non-Javadoc)
302 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericFullName(java.lang.String)
303 */
304 public NonViralName parseFullName(String fullNameString){
305 return parseFullName(fullNameString, null, null);
306 }
307
308
309 /* (non-Javadoc)
310 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
311 */
312 public NonViralName parseFullName(String fullNameString, NomenclaturalCode nomCode, Rank rank) {
313 if (fullNameString == null){
314 return null;
315 }else{
316 NonViralName result = getNonViralNameInstance(fullNameString, nomCode, rank);
317 parseFullName(result, fullNameString, rank, false);
318 return result;
319 }
320 }
321
322
323 public void parseFullName(NonViralName nameToBeFilled, String fullNameString, Rank rank, boolean makeEmpty) {
324 //TODO prol. etc.
325
326 String authorString = null;
327
328 if (fullNameString == null){
329 return;
330 }
331 if (makeEmpty){
332 makeEmpty(nameToBeFilled);
333 }
334 fullNameString.replaceAll(oWs , " ");
335 //TODO
336 // OLD: fullName = oWsRE.subst(fullName, " "); //substitute multiple whitespaces
337 fullNameString = fullNameString.trim();
338
339 String[] epi = pattern.split(fullNameString);
340 try {
341 //cultivars //TODO 2 implement cultivars
342 // if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
343 // result = parseCultivar(fullName);
344 // }
345 //hybrids //TODO 2 implement hybrids
346 //else
347 if (hybridPattern.matcher(fullNameString).matches() ){
348 nameToBeFilled = parseHybrid(fullNameString);
349 }
350 else if (genusOrSupraGenusPattern.matcher(fullNameString).matches()){
351 //supraGeneric
352 if (rank != null && rank.isSupraGeneric()){
353 nameToBeFilled.setRank(rank);
354 nameToBeFilled.setGenusOrUninomial(epi[0]);
355 }
356 //genus
357 else {
358 nameToBeFilled.setRank(Rank.GENUS());
359 nameToBeFilled.setGenusOrUninomial(epi[0]);
360 }
361 authorString = fullNameString.substring(epi[0].length());
362 }
363 //infra genus
364 else if (infraGenusPattern.matcher(fullNameString).matches()){
365 nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[1]));
366 nameToBeFilled.setGenusOrUninomial(epi[0]);
367 nameToBeFilled.setInfraGenericEpithet(epi[2]);
368 authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length()+ 1 + epi[2].length());
369 }
370 //aggr. or group
371 else if (aggrOrGroupPattern.matcher(fullNameString).matches()){
372 nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[2]));
373 nameToBeFilled.setGenusOrUninomial(epi[0]);
374 nameToBeFilled.setSpecificEpithet(epi[1]);
375 }
376 //species
377 else if (speciesPattern.matcher(fullNameString).matches()){
378 nameToBeFilled.setRank(Rank.SPECIES());
379 nameToBeFilled.setGenusOrUninomial(epi[0]);
380 nameToBeFilled.setSpecificEpithet(epi[1]);
381 authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length());
382 }
383 //autonym
384 else if (autonymPattern.matcher(fullNameString).matches()){
385 nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[epi.length - 2]));
386 nameToBeFilled.setGenusOrUninomial(epi[0]);
387 nameToBeFilled.setSpecificEpithet(epi[1]);
388 nameToBeFilled.setInfraSpecificEpithet(epi[epi.length - 1]);
389 int lenSpecies = 2 + epi[0].length()+epi[1].length();
390 int lenInfraSpecies = 2 + epi[epi.length - 2].length() + epi[epi.length - 1].length();
391 authorString = fullNameString.substring(lenSpecies, fullNameString.length() - lenInfraSpecies);
392 }
393 //infraSpecies
394 else if (infraSpeciesPattern.matcher(fullNameString).matches()){
395 String infraSpecRankEpi = epi[2];
396 String infraSpecEpi = epi[3];
397 if ("tax.".equals(infraSpecRankEpi)){
398 infraSpecRankEpi += " " + epi[3];
399 infraSpecEpi = epi[4];
400 }
401 nameToBeFilled.setRank(Rank.getRankByAbbreviation(infraSpecRankEpi));
402 nameToBeFilled.setGenusOrUninomial(epi[0]);
403 nameToBeFilled.setSpecificEpithet(epi[1]);
404 nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
405 authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecRankEpi.length() + 1 + infraSpecEpi.length());
406 }//old infraSpecies
407 else if (oldInfraSpeciesPattern.matcher(fullNameString).matches()){
408 boolean implemented = false;
409 if (implemented){
410 nameToBeFilled.setRank(Rank.getRankByNameOrAbbreviation(epi[2]));
411 nameToBeFilled.setGenusOrUninomial(epi[0]);
412 nameToBeFilled.setSpecificEpithet(epi[1]);
413 //TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
414 authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + epi[2].length() + 1 + epi[3].length());
415 }else{
416 nameToBeFilled.setHasProblem(true);
417 nameToBeFilled.setTitleCache(fullNameString);
418 logger.info("Name string " + fullNameString + " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
419 }
420 }
421 //none
422 else{
423 nameToBeFilled.setHasProblem(true);
424 nameToBeFilled.setTitleCache(fullNameString);
425 logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
426 }
427 //authors
428 if (nameToBeFilled != null && authorString != null && authorString.trim().length() > 0 ){
429 TeamOrPersonBase[] authors = new TeamOrPersonBase[4];
430 Integer[] years = new Integer[4];
431 try {
432 fullAuthors(authorString, authors, years, nameToBeFilled.getClass());
433 } catch (StringNotParsableException e) {
434 nameToBeFilled.setHasProblem(true);
435 nameToBeFilled.setTitleCache(fullNameString);
436 logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");;
437 }
438 nameToBeFilled.setCombinationAuthorTeam(authors[0]);
439 nameToBeFilled.setExCombinationAuthorTeam(authors[1]);
440 nameToBeFilled.setBasionymAuthorTeam(authors[2]);
441 nameToBeFilled.setExBasionymAuthorTeam(authors[3]);
442 if (nameToBeFilled instanceof ZoologicalName){
443 ZoologicalName zooName = (ZoologicalName)nameToBeFilled;
444 zooName.setPublicationYear(years[0]);
445 zooName.setOriginalPublicationYear(years[2]);
446 }
447 }
448 //return
449 if (nameToBeFilled != null){
450 //return(BotanicalName)result;
451 return;
452 }else{
453 nameToBeFilled.setHasProblem(true);
454 nameToBeFilled.setTitleCache(fullNameString);
455 logger.info("Name string " + fullNameString + " could not be parsed!");
456 //return result;
457 return;
458 }
459 } catch (UnknownCdmTypeException e) {
460 nameToBeFilled.setHasProblem(true);
461 nameToBeFilled.setTitleCache(fullNameString);
462 logger.info("unknown rank (" + (rank == null? "null":rank) + ") or abbreviation in string " + fullNameString);
463 //return result;
464 return;
465 }
466 }
467
468
469
470 /**
471 * Parses the fullAuthorString
472 * @param fullAuthorString
473 * @return array of Teams containing the Team[0],
474 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
475 */
476 protected void fullAuthors (String fullAuthorString, TeamOrPersonBase[] authors, Integer[] years, Class clazz)
477 throws StringNotParsableException{
478 fullAuthorString = fullAuthorString.trim();
479 if (fullAuthorString == null || clazz == null){
480 return;
481 }
482 //Botanic
483 if ( BotanicalName.class.isAssignableFrom(clazz) ){
484 if (! fullBotanicAuthorStringPattern.matcher(fullAuthorString).matches() ){
485 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
486 }
487 }
488 //Zoo
489 else if ( ZoologicalName.class.isAssignableFrom(clazz) ){
490 if (! fullZooAuthorStringPattern.matcher(fullAuthorString).matches() ){
491 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
492 }
493 }else {
494 //TODO
495 logger.warn ("not yet implemented");
496 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
497 }
498 fullAuthorsChecked(fullAuthorString, authors, years);
499 }
500
501 /*
502 * like fullTeams but without trim and match check
503 */
504 protected void fullAuthorsChecked (String fullAuthorString, TeamOrPersonBase[] authors, Integer[] years){
505 int authorTeamStart = 0;
506 Matcher basionymMatcher = basionymPattern.matcher(fullAuthorString);
507
508 if (basionymMatcher.find(0)){
509
510 String basString = basionymMatcher.group();
511 basString = basString.replaceFirst(basStart, "");
512 basString = basString.replaceAll(basEnd, "").trim();
513 authorTeamStart = basionymMatcher.end(1) + 1;
514
515 TeamOrPersonBase[] basAuthors = new TeamOrPersonBase[2];
516 Integer[] basYears = new Integer[2];
517 authorsAndEx(basString, basAuthors, basYears);
518 authors[2]= basAuthors[0];
519 years[2] = basYears[0];
520 authors[3]= basAuthors[1];
521 years[3] = basYears[1];
522 }
523 TeamOrPersonBase[] combinationAuthors = new TeamOrPersonBase[2];;
524 Integer[] combinationYears = new Integer[2];
525 authorsAndEx(fullAuthorString.substring(authorTeamStart), combinationAuthors, combinationYears);
526 authors[0]= combinationAuthors[0] ;
527 years[0] = combinationYears[0];
528 authors[1]= combinationAuthors[1];
529 years[1] = combinationYears[1];
530 }
531
532
533 /**
534 * Parses the author and ex-author String
535 * @param authorTeamString String representing the author and the ex-author team
536 * @return array of Teams containing the Team[0] and the ExTeam[1]
537 */
538 protected void authorsAndEx (String authorTeamString, TeamOrPersonBase[] authors, Integer[] years){
539 //TODO noch allgemeiner am anfang durch Replace etc.
540 authorTeamString = authorTeamString.trim();
541 authorTeamString = authorTeamString.replaceFirst(oWs + "ex" + oWs, " ex. " );
542 int authorEnd = authorTeamString.length();
543
544 Matcher exAuthorMatcher = exAuthorPattern.matcher(authorTeamString);
545 if (exAuthorMatcher.find(0)){
546 int exAuthorBegin = exAuthorMatcher.end(0);
547 String exString = authorTeamString.substring(exAuthorBegin).trim();
548 authorEnd = exAuthorMatcher.start(0);
549 authors [1] = author(exString);
550 }
551 zooOrBotanicAuthor(authorTeamString.substring(0, authorEnd), authors, years );
552 }
553
554 /**
555 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
556 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies
557 * to the year in case of an zoological name.
558 * @param authorString
559 * @param team
560 * @param year
561 */
562 protected void zooOrBotanicAuthor(String authorString, TeamOrPersonBase[] team, Integer[] year){
563 if (authorString == null){
564 return;
565 }else if ((authorString = authorString.trim()).length() == 0){
566 return;
567 }
568 Matcher zooAuthorAddidtionMatcher = zooAuthorAddidtionPattern.matcher(authorString);
569 if (zooAuthorAddidtionMatcher.find()){
570 int index = zooAuthorAddidtionMatcher.start(0);
571 String strYear = authorString.substring(index);
572 strYear = strYear.replaceAll(zooAuthorYearSeperator, "").trim();
573 year[0] = Integer.valueOf(strYear);
574 authorString = authorString.substring(0, index).trim();
575 }
576 team[0] = author(authorString);
577 }
578
579
580 /**
581 * Parses an authorTeam String and returns the Team
582 * !!! TODO (atomization not yet implemented)
583 * @param authorTeamString String representing the author team
584 * @return an Team
585 */
586 protected TeamOrPersonBase author (String authorString){
587 if (authorString == null){
588 return null;
589 }else if ((authorString = authorString.trim()).length() == 0){
590 return null;
591 }else if (! teamSplitterPattern.matcher(authorString).find()){
592 //1 Person
593 Person result = Person.NewInstance();
594 result.setNomenclaturalTitle(authorString);
595 return result;
596 }else{
597 return parsedTeam(authorString);
598 }
599
600 }
601
602 /**
603 * Parses an authorString (reprsenting a team into the single authors and add
604 * them to the return Team.
605 * @param authorString
606 * @return Team
607 */
608 protected Team parsedTeam(String authorString){
609 Team result = Team.NewInstance();
610 String[] authors = authorString.split(teamSplitter);
611 for (String author : authors){
612 Person person = Person.NewInstance();
613 person.setNomenclaturalTitle(author);
614 result.addTeamMember(person);
615 }
616 return result;
617 }
618
619
620 //Parsing of the given full name that has been identified as hybrid already somewhere else.
621 private BotanicalName parseHybrid(String fullName){
622 logger.warn("parseHybrid --> function not yet implemented");
623 BotanicalName result = BotanicalName.NewInstance(null);
624 result.setTitleCache(fullName);
625 return result;
626 }
627
628 // // Parsing of the given full name that has been identified as a cultivar already somwhere else.
629 // // The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
630 public BotanicalName parseCultivar(String fullName) throws StringNotParsableException{
631 CultivarPlantName result = null;
632 String[] words = oWsPattern.split(fullName);
633
634 /* ---------------------------------------------------------------------------------
635 * cultivar
636 * ---------------------------------------------------------------------------------*/
637 if (fullName.indexOf(" '") != 0){
638 //TODO location of 'xx' is probably not arbitrary
639 Matcher cultivarMatcher = cultivarPattern.matcher(fullName);
640 if (cultivarMatcher.find()){
641 String namePart = fullName.replaceFirst(cultivar, "");
642
643 String cultivarPart = cultivarMatcher.group(0).replace("'","").trim();
644 //OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
645
646 result = (CultivarPlantName)parseFullName(namePart);
647 result.setCultivarName(cultivarPart);
648 }
649 }else if (fullName.indexOf(" cv.") != 0){
650 // cv. is old form (not official)
651 throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
652 }
653
654 /* ---------------------------------------------------------------------------------
655 * cultivar group
656 * ---------------------------------------------------------------------------------
657 */
658 // TODO in work
659 //Ann. this is not the official way of noting cultivar groups
660 String group = oWs + "Group" + oWs + capitalEpiWord + end;
661 Pattern groupRE = Pattern.compile(group);
662 Matcher groupMatcher = groupRE.matcher(fullName);
663 if (groupMatcher.find()){
664 if (! words[words.length - 2].equals("group")){
665 throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName + " should be 'group'");
666 }else{
667
668 String namePart = fullName.substring(0, groupMatcher.start(0) - 0);
669 //OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
670
671 String cultivarPart = words[words.length -1];
672 result = (CultivarPlantName)parseFullName(namePart);
673 if (result != null){
674 result.setCultivarName(cultivarPart);
675
676 //OLD: result.setCultivarGroupName(cultivarPart);
677 }
678 }
679
680 }
681 // // ---------------------------------------------------------------------------------
682 // if ( result = "" ){
683 // return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
684 // }else{
685 // return result;
686 // }
687 return result; //TODO
688 }
689
690
691 private void makeEmpty(NonViralName nameToBeFilled){
692 nameToBeFilled.setRank(null);
693 nameToBeFilled.setTitleCache(null, false);
694 nameToBeFilled.setNameCache(null);
695
696 nameToBeFilled.setAppendedPhrase(null);
697 //TODO ??
698 //nameToBeFilled.setBasionym(basionym);
699 nameToBeFilled.setBasionymAuthorTeam(null);
700 nameToBeFilled.setCombinationAuthorTeam(null);
701 nameToBeFilled.setExBasionymAuthorTeam(null);
702 nameToBeFilled.setExCombinationAuthorTeam(null);
703 nameToBeFilled.setAuthorshipCache(null);
704
705
706 nameToBeFilled.setHasProblem(false);
707 // TODO ?
708 //nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
709
710
711 nameToBeFilled.setGenusOrUninomial(null);
712 nameToBeFilled.setInfraGenericEpithet(null);
713 nameToBeFilled.setSpecificEpithet(null);
714 nameToBeFilled.setInfraSpecificEpithet(null);
715
716 nameToBeFilled.setNomenclaturalMicroReference(null);
717 nameToBeFilled.setNomenclaturalReference(null);
718
719 if (nameToBeFilled instanceof BotanicalName){
720 BotanicalName botanicalName = (BotanicalName)nameToBeFilled;
721 botanicalName.setAnamorphic(false);
722 botanicalName.setHybridFormula(false);
723 botanicalName.setMonomHybrid(false);
724 botanicalName.setBinomHybrid(false);
725 botanicalName.setTrinomHybrid(false);
726 }
727
728 if (nameToBeFilled instanceof ZoologicalName){
729 ZoologicalName zoologicalName = (ZoologicalName)nameToBeFilled;
730 zoologicalName.setBreed(null);
731 zoologicalName.setOriginalPublicationYear(null);
732 }
733
734 //TODO adapt to @Version of versionable entity, throws still optimistic locking error
735 //nameToBeFilled.setUpdated(Calendar.getInstance());
736 // TODO nameToBeFilled.setUpdatedBy(updatedBy);
737 }
738
739
740
741 //splitter
742 static String epiSplitter = "(\\s+|\\(|\\))"; //( ' '+| '(' | ')' )
743 static Pattern pattern = Pattern.compile(epiSplitter);
744
745 //some useful non-terminals
746 static String start = "^";
747 static String end = "$";
748 static String anyEnd = ".*" + end;
749 static String oWs = "\\s+"; //obligatory whitespaces
750 static String fWs = "\\s*"; //facultative whitespcace
751
752 static String capitalWord = "\\p{javaUpperCase}\\p{javaLowerCase}*";
753 static String nonCapitalWord = "\\p{javaLowerCase}+";
754
755 static String capitalDotWord = capitalWord + "\\.?"; //capitalWord with facultativ '.' at the end
756 static String nonCapitalDotWord = nonCapitalWord + "\\.?"; //nonCapitalWord with facultativ '.' at the end
757 static String dotWord = "(" + capitalWord + "|" + nonCapitalWord + ")\\.?"; //word (capital or non-capital) with facultativ '.' at the end
758 //Words used in an epethiton for a TaxonName
759 static String nonCapitalEpiWord = "[a-zï\\-]+"; //TODO solve checkin Problem with Unicode character "[a-z�\\-]+";
760 static String capitalEpiWord = "[A-Z]"+ nonCapitalEpiWord;
761
762
763 //years
764 static String month = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)";
765 static String singleYear = "\\b" + "(?:17|18|19|20)" + "\\d{2}" + "\\b"; // word boundary followed by either 17,18,19, or 20 (not captured) followed by 2 digits
766 static String yearPhrase = "(" + singleYear + "(-" + singleYear + ")?" +
767 "(" + month + ")?)" ; // optional month
768
769 //seperator
770 static String yearSeperator = "." + oWs;
771 static String detailSeperator = ":" + oWs;
772 static String referenceSeperator1 = "," + oWs ;
773 static String inReferenceSeperator = oWs + "in" + oWs;
774 static String referenceSeperator = "(" + referenceSeperator1 +"|" + inReferenceSeperator + ")" ;
775 static String referenceAuthorSeperator = ","+ oWs;
776 static String volumeSeperator = "," + fWs ;
777 static String referenceEnd = ".";
778
779
780 //status
781 static String status = "";
782
783 //marker
784 static String InfraGenusMarker = "(subgen.|subg.|sect.|subsect.|ser.|subser.|t.infgen.)";
785 static String aggrOrGroupMarker = "(aggr.|agg.|group)";
786 static String infraSpeciesMarker = "(subsp.|convar.|var.|subvar.|f.|subf.|f.spec.|tax." + fWs + "infrasp.)";
787 static String oldInfraSpeciesMarker = "(prol.|proles|race|taxon|sublusus)";
788
789
790 //AuthorString
791 static String authorPart = "(" + "(D'|L'|'t\\s)?" + capitalDotWord + "('" + nonCapitalDotWord + ")?" + "|da|de(n|l|\\sla)?)" ;
792 static String author = "(" + authorPart + "(" + fWs + "|-)" + ")+" + "(f.|fil.|secundus)?";
793 static String teamSplitter = fWs + "(&)" + fWs;
794 static String authorTeam = fWs + "(" + author + teamSplitter + ")*" + author + "(" + teamSplitter + "al.)?" + fWs;
795 static String exString = "(ex.?)";
796 static String authorAndExTeam = authorTeam + "(" + oWs + exString + oWs + authorTeam + ")?";
797 static String basStart = "\\(";
798 static String basEnd = "\\)";
799 static String botanicBasionymAuthor = basStart + "(" + authorAndExTeam + ")" + basEnd; // '(' and ')' is for evaluation with RE.paren(x)
800 static String fullBotanicAuthorString = fWs + "(" + botanicBasionymAuthor +")?" + fWs + authorAndExTeam + fWs;
801 static String facultFullBotanicAuthorString = "(" + fullBotanicAuthorString + ")?" ;
802
803 //Zoo. Author
804 //TODO does zoo author have ex-Author?
805 static String zooAuthorYearSeperator = ",";
806 static String zooAuthorAddidtion = fWs + zooAuthorYearSeperator + fWs + singleYear;
807 static String zooAuthorTeam = authorTeam + zooAuthorAddidtion;
808 static String zooBasionymAuthor = basStart + "(" + zooAuthorTeam + ")" + basEnd;
809 static String fullZooAuthorString = fWs + "(" + zooBasionymAuthor +")?" + fWs + zooAuthorTeam + fWs;
810 static String facultFullZooAuthorString = "(" + fullZooAuthorString + ")?" ;
811
812 static String facultFullAuthorString2 = "(" + facultFullBotanicAuthorString + "|" + facultFullZooAuthorString + ")";
813
814 static String basionymAuthor = "(" + botanicBasionymAuthor + "|" + zooBasionymAuthor+ ")";
815 static String fullAuthorString = "(" + fullBotanicAuthorString + "|" + fullZooAuthorString+ ")";
816
817 //details
818 //TODO still very simple
819 static String pageNumber = "\\d{1,5}";
820 static String detail = "(" + pageNumber + ")";
821
822 //reference
823 static String volume = "\\d{4}" + "\\(\\d{4}\\)?";
824
825 static String referenceTitle = "(" + dotWord + fWs + ")" + "{2,}";
826 static String bookReference = referenceTitle + volumeSeperator + volume;
827 static String bookSectionReference = authorTeam + referenceAuthorSeperator;
828 static String articleReference = inReferenceSeperator + bookReference ;
829 static String reference = "(" + articleReference + "|" + bookReference +")" +
830 detailSeperator + detail + yearSeperator + yearPhrase +
831 referenceEnd;
832
833 static Pattern referencePattern = Pattern.compile(reference);
834
835 static String pNomStatusNom = "nom\\." + fWs + "(superfl\\.|nud\\.|illeg\\.|inval\\.|cons\\.|alternativ\\.|subnud.|"+
836 "rej\\.|rej\\."+ fWs + "prop\\.|provis\\.)";
837 static String pNomStatusOrthVar = "orth\\." + fWs + "var\\.";
838 static String pNomStatus = "(" + pNomStatusNom + "|" + pNomStatusOrthVar + ")";
839 static String pNomStatusPhrase1 = "," + fWs + pNomStatus;
840 static String pNomStatusPhrase2 = "\\[" + fWs + pNomStatus + "\\]";
841
842 static String pNomStatusPhrase = "(?:" + pNomStatusPhrase1 + "|" + pNomStatusPhrase2 + ")";
843
844 // Soraya
845 //opus utique oppr.
846 //pro syn.
847 //provisional synonym
848 //fossil name
849
850
851
852 //cultivars and hybrids
853 static String cultivar = oWs + "'..+'"; //Achtung mit Hochkomma in AuthorNamen
854 static String cultivarMarker = oWs + "(cv.|')";
855 static String hybrid = oWs + "((x|X)" + oWs + "|notho)";//= ( x )|( X )|( notho)
856
857 // Name String
858 static String genusOrSupraGenus = capitalEpiWord;
859 static String infraGenus = capitalEpiWord + oWs + InfraGenusMarker + oWs + capitalEpiWord;
860 static String aggrOrGroup = capitalEpiWord + oWs + nonCapitalEpiWord + oWs + aggrOrGroupMarker;
861 static String species = capitalEpiWord + oWs + nonCapitalEpiWord;
862 static String infraSpecies = capitalEpiWord + oWs + nonCapitalEpiWord + oWs + infraSpeciesMarker + oWs + nonCapitalEpiWord;
863 static String oldInfraSpecies = capitalEpiWord + oWs + nonCapitalEpiWord + oWs + oldInfraSpeciesMarker + oWs + nonCapitalEpiWord;
864 static String autonym = capitalEpiWord + oWs + "(" + nonCapitalEpiWord +")" + oWs + fullBotanicAuthorString + oWs + infraSpeciesMarker + oWs + "\\1"; //2-nd word and last word are the same
865
866 static String anyBotanicName = "(" + genusOrSupraGenus + "|" + infraGenus + "|" + aggrOrGroup + "|" + species + "|" +
867 infraSpecies + "|" + infraSpecies + "|" + oldInfraSpecies + "|" + autonym + ")+";
868 static String anyZooName = "(" + genusOrSupraGenus + "|" + infraGenus + "|" + aggrOrGroup + "|" + species + "|" +
869 infraSpecies + "|" + infraSpecies + "|" + oldInfraSpecies + ")+";
870 static String anyBotanicFullName = anyBotanicName + oWs + fullBotanicAuthorString;
871 static String anyZooFullName = anyZooName + oWs + fullZooAuthorString;
872 static String anyFullName = "(" + anyBotanicFullName + "|" + anyZooFullName + ")";
873
874 //Pattern
875 static Pattern oWsPattern = Pattern.compile(oWs);
876 static Pattern teamSplitterPattern = Pattern.compile(teamSplitter);
877 static Pattern cultivarPattern = Pattern.compile(cultivar);
878 static Pattern cultivarMarkerPattern = Pattern.compile(cultivarMarker);
879 static Pattern hybridPattern = Pattern.compile(hybrid);
880
881 static Pattern genusOrSupraGenusPattern = Pattern.compile(start + genusOrSupraGenus + facultFullAuthorString2 + end);
882 static Pattern infraGenusPattern = Pattern.compile(start + infraGenus + facultFullAuthorString2 + end);
883 static Pattern aggrOrGroupPattern = Pattern.compile(start + aggrOrGroup + fWs + end); //aggr. or group has no author string
884 static Pattern speciesPattern = Pattern.compile(start + species + facultFullAuthorString2 + end);
885 static Pattern infraSpeciesPattern = Pattern.compile(start + infraSpecies + facultFullAuthorString2 + end);
886 static Pattern oldInfraSpeciesPattern = Pattern.compile(start + oldInfraSpecies + facultFullAuthorString2 + end);
887 static Pattern autonymPattern = Pattern.compile(start + autonym + fWs + end);
888
889 static Pattern botanicBasionymPattern = Pattern.compile(botanicBasionymAuthor);
890 static Pattern zooBasionymPattern = Pattern.compile(zooBasionymAuthor);
891 static Pattern basionymPattern = Pattern.compile(basionymAuthor);
892
893 static Pattern zooAuthorPattern = Pattern.compile(zooAuthorTeam);
894 static Pattern zooAuthorAddidtionPattern = Pattern.compile(zooAuthorAddidtion);
895
896 static Pattern exAuthorPattern = Pattern.compile(oWs + exString);
897
898 static Pattern fullBotanicAuthorStringPattern = Pattern.compile(fullBotanicAuthorString);
899 static Pattern fullZooAuthorStringPattern = Pattern.compile(fullZooAuthorString);
900 static Pattern fullAuthorStringPattern = Pattern.compile(fullAuthorString);
901
902 static Pattern anyBotanicFullNamePattern = Pattern.compile(anyBotanicFullName);
903 static Pattern anyZooFullNamePattern = Pattern.compile(anyZooFullName);
904
905
906
907
908
909 }