4 package eu
.etaxonomy
.cdm
.strategy
.parser
;
6 import java
.util
.regex
.Matcher
;
7 import java
.util
.regex
.Pattern
;
9 import org
.apache
.log4j
.Logger
;
11 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
12 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
13 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
14 import eu
.etaxonomy
.cdm
.model
.name
.BacterialName
;
15 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
16 import eu
.etaxonomy
.cdm
.model
.name
.CultivarPlantName
;
17 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
18 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
19 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
20 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
21 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
22 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
23 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
24 import eu
.etaxonomy
.cdm
.model
.reference
.Article
;
25 import eu
.etaxonomy
.cdm
.model
.reference
.Book
;
26 import eu
.etaxonomy
.cdm
.model
.reference
.BookSection
;
27 import eu
.etaxonomy
.cdm
.model
.reference
.Generic
;
28 import eu
.etaxonomy
.cdm
.model
.reference
.INomenclaturalReference
;
29 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceBase
;
30 import eu
.etaxonomy
.cdm
.model
.reference
.StrictReferenceBase
;
31 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.StringNotParsableException
;
32 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
39 public class NonViralNameParserImpl
implements INonViralNameParser
<NonViralName
> {
40 private static final Logger logger
= Logger
.getLogger(NonViralNameParserImpl
.class);
42 // good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
44 final static boolean MAKE_EMPTY
= true;
45 final static boolean MAKE_NOT_EMPTY
= false;
48 public static NonViralNameParserImpl
NewInstance(){
49 return new NonViralNameParserImpl();
53 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSimpleName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
55 public NonViralName
parseSimpleName(String simpleName
, Rank rank
){
57 logger
.warn("parseSimpleName() not yet implemented. Uses parseFullName() instead");
58 return parseFullName(simpleName
, null, rank
);
63 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericSimpleName(java.lang.String)
65 public NonViralName
parseSimpleName(String simpleName
){
66 return parseSimpleName(simpleName
, null);
69 public NonViralName
getNonViralNameInstance(String fullString
, NomenclaturalCode code
){
70 return getNonViralNameInstance(fullString
, code
, null);
73 public NonViralName
getNonViralNameInstance(String fullString
, NomenclaturalCode code
, Rank rank
){
74 NonViralName result
= null;
76 boolean isBotanicalName
= anyBotanicFullNamePattern
.matcher(fullString
).find();
77 boolean isZoologicalName
= anyZooFullNamePattern
.matcher(fullString
).find();;
78 boolean isBacteriologicalName
= false;
79 boolean isCultivatedPlantName
= false;
80 if ( (isBotanicalName
|| isCultivatedPlantName
) && ! isZoologicalName
&& !isBacteriologicalName
){
82 result
= BotanicalName
.NewInstance(rank
);
84 result
= CultivarPlantName
.NewInstance(rank
);
86 }else if ( isZoologicalName
/*&& ! isBotanicalName*/ && !isBacteriologicalName
&& !isCultivatedPlantName
){
87 result
= ZoologicalName
.NewInstance(rank
);
88 }else if ( isZoologicalName
&& ! isBotanicalName
&& !isBacteriologicalName
&& !isCultivatedPlantName
){
89 result
= BacterialName
.NewInstance(rank
);
91 result
= NonViralName
.NewInstance(rank
);
93 }else if (code
.equals(NomenclaturalCode
.ICBN())){
94 result
= BotanicalName
.NewInstance(rank
);
95 }else if (code
.equals(NomenclaturalCode
.ICZN())){
96 result
= ZoologicalName
.NewInstance(rank
);
97 }else if (code
.equals(NomenclaturalCode
.ICNCP())){
98 logger
.warn("ICNCP parsing not yet implemented");
99 result
= CultivarPlantName
.NewInstance(rank
);
100 }else if (code
.equals(NomenclaturalCode
.BACTERIOLOGICAL())){
101 logger
.warn("ICNCP not yet implemented");
102 result
= BacterialName
.NewInstance(rank
);
103 }else if (code
.equals(NomenclaturalCode
.VIRAL())){
104 logger
.error("Viral name is not a NonViralName !!");
106 logger
.error("Unknown Nomenclatural Code !!");
113 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseFullReference(java.lang.String)
115 public NonViralName
parseFullReference(String fullReferenceString
) {
116 return parseFullReference(fullReferenceString
, null, null);
120 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
122 public NonViralName
parseFullReference(String fullReferenceString
, NomenclaturalCode nomCode
, Rank rank
) {
123 if (fullReferenceString
== null){
126 NonViralName result
= getNonViralNameInstance(fullReferenceString
, nomCode
, rank
);
127 parseFullReference(result
, fullReferenceString
, rank
, MAKE_EMPTY
);
133 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(eu.etaxonomy.cdm.model.name.BotanicalName, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, boolean)
135 public void parseFullReference(NonViralName nameToBeFilled
, String fullReferenceString
, Rank rank
, boolean makeEmpty
) {
136 if (fullReferenceString
== null){
141 makeEmpty(nameToBeFilled
);
143 fullReferenceString
.replaceAll(oWs
, " ");
144 fullReferenceString
= fullReferenceString
.trim();
146 String localFullName
;
147 if (nameToBeFilled
instanceof ZoologicalName
){
148 localFullName
= anyZooFullName
;
150 localFullName
= anyBotanicFullName
;
152 //seperate name and reference part
153 String nameAndRefSeperator
= "(^" + localFullName
+ ")("+ referenceSeperator
+ ")";
154 Pattern nameAndRefSeperatorPattern
= Pattern
.compile(nameAndRefSeperator
);
155 Matcher nameAndRefSeperatorMatcher
= nameAndRefSeperatorPattern
.matcher(fullReferenceString
);
157 if (nameAndRefSeperatorMatcher
.find() ){
158 String nameAndSeperator
= nameAndRefSeperatorMatcher
.group(0);
159 String name
= nameAndRefSeperatorMatcher
.group(1);
160 String referenceString
= fullReferenceString
.substring(nameAndRefSeperatorMatcher
.end());
163 String seperator
= nameAndSeperator
.substring(name
.length());
164 boolean isInReference
= false;
165 if (seperator
.matches(inReferenceSeperator
)){
166 isInReference
= true;
170 referenceString
= parseNomStatus(referenceString
, nameToBeFilled
);
173 parseFullName(nameToBeFilled
, name
, rank
, makeEmpty
);
174 parseReference(nameToBeFilled
, referenceString
, isInReference
);
175 INomenclaturalReference ref
= nameToBeFilled
.getNomenclaturalReference();
176 if (ref
!= null && ref
.getHasProblem()){
177 nameToBeFilled
.setHasProblem(true);
180 //don't parse if name can't be seperated
181 nameToBeFilled
.setHasProblem(true);
182 nameToBeFilled
.setTitleCache(fullReferenceString
);
183 logger
.info("no applicable parsing rule could be found for \"" + fullReferenceString
+ "\"");
187 //TODO make it an Array of status
189 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
190 * The nomenclatural status part ist deleted from the reference String.
191 * @return String the new (shortend) reference String
193 private String
parseNomStatus(String reference
, NonViralName nameToBeFilled
) {
195 Pattern hasStatusPattern
= Pattern
.compile("(" + pNomStatusPhrase
+ ")");
196 Matcher hasStatusMatcher
= hasStatusPattern
.matcher(reference
);
198 if (hasStatusMatcher
.find()) {
199 String statusPhrase
= hasStatusMatcher
.group(0);
201 Pattern statusPattern
= Pattern
.compile(pNomStatus
);
202 Matcher statusMatcher
= statusPattern
.matcher(statusPhrase
);
203 statusMatcher
.find();
204 statusString
= statusMatcher
.group(0);
206 NomenclaturalStatusType nomStatusType
= NomenclaturalStatusType
.getNomenclaturalStatusTypeByAbbreviation(statusString
);
207 NomenclaturalStatus nomStatus
= NomenclaturalStatus
.NewInstance(nomStatusType
);
208 nameToBeFilled
.addStatus(nomStatus
);
210 reference
= reference
.replace(statusPhrase
, "");
211 } catch (UnknownCdmTypeException e
) {
219 private void parseReference(NonViralName nameToBeFilled
, String reference
, boolean isInReference
){
221 if (referencePattern
.matcher(reference
).matches() ){
222 //End (just delete, may be ambigous for yearPhrase, but no real information gets lost
223 Pattern endPattern
= Pattern
.compile( referenceEnd
+ end
);
224 Matcher endMatcher
= endPattern
.matcher(reference
);
225 if (endMatcher
.find()){
226 String endPart
= endMatcher
.group(0);
227 reference
= reference
.substring(0, reference
.length() - endPart
.length());
231 String yearPart
= null;
232 String pYearPhrase
= yearSeperator
+ yearPhrase
+ end
;
233 Pattern yearPhrasePattern
= Pattern
.compile(pYearPhrase
);
234 Matcher yearPhraseMatcher
= yearPhrasePattern
.matcher(reference
);
235 if (yearPhraseMatcher
.find()){
236 yearPart
= yearPhraseMatcher
.group(0);
237 reference
= reference
.substring(0, reference
.length() - yearPart
.length());
238 yearPart
= yearPart
.replaceFirst(start
+ yearSeperator
, "").trim();
242 String pDetailPhrase
= detailSeperator
+ detail
+ end
;
243 Pattern detailPhrasePattern
= Pattern
.compile(pDetailPhrase
);
244 Matcher detailPhraseMatcher
= detailPhrasePattern
.matcher(reference
);
245 if (detailPhraseMatcher
.find()){
246 String detailPart
= detailPhraseMatcher
.group(0);
247 reference
= reference
.substring(0, reference
.length() - detailPart
.length());
248 detailPart
= detailPart
.replaceFirst(start
+ detailSeperator
, "").trim();
249 nameToBeFilled
.setNomenclaturalMicroReference(detailPart
);
252 parseReferenceTitle(reference
, yearPart
);
254 Generic ref
= Generic
.NewInstance();
255 ref
.setTitleCache(reference
);
256 ref
.setHasProblem(true);
257 nameToBeFilled
.setNomenclaturalReference(ref
);
263 * Parses the referenceTitlePart, including the author volume and edition.
268 private ReferenceBase
parseReferenceTitle(String reference
, String year
){
269 ReferenceBase result
= null;
270 Pattern bookPattern
= Pattern
.compile(bookReference
);
271 Pattern articlePattern
= Pattern
.compile(articleReference
);
272 Pattern bookSectionPattern
= Pattern
.compile(bookSectionReference
);
275 Matcher articleMatcher
= articlePattern
.matcher(reference
);
276 Matcher bookMatcher
= bookPattern
.matcher(reference
);
277 Matcher bookSectionMatcher
= bookSectionPattern
.matcher(reference
);
280 if (articleMatcher
.matches()){
282 //(type, author, title, volume, editor, series;
283 Article article
= new Article();
284 article
.setTitleCache(reference
);
286 }else if(bookMatcher
.matches()){
287 Book book
= new Book();
288 book
.setTitleCache(reference
);
290 }else if (bookSectionMatcher
.matches()){
291 BookSection bookSection
= new BookSection();
292 bookSection
.setTitleCache(reference
);
293 result
= bookSection
;
295 logger
.warn("unknown reference type not yet implemented");
296 //ReferenceBase refBase =
303 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericFullName(java.lang.String)
305 public NonViralName
parseFullName(String fullNameString
){
306 return parseFullName(fullNameString
, null, null);
311 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
313 public NonViralName
parseFullName(String fullNameString
, NomenclaturalCode nomCode
, Rank rank
) {
314 if (fullNameString
== null){
317 NonViralName result
= getNonViralNameInstance(fullNameString
, nomCode
, rank
);
318 parseFullName(result
, fullNameString
, rank
, false);
324 public void parseFullName(NonViralName nameToBeFilled
, String fullNameString
, Rank rank
, boolean makeEmpty
) {
327 String authorString
= null;
329 if (fullNameString
== null){
333 makeEmpty(nameToBeFilled
);
335 fullNameString
.replaceAll(oWs
, " ");
337 // OLD: fullName = oWsRE.subst(fullName, " "); //substitute multiple whitespaces
338 fullNameString
= fullNameString
.trim();
340 String
[] epi
= pattern
.split(fullNameString
);
342 //cultivars //TODO 2 implement cultivars
343 // if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
344 // result = parseCultivar(fullName);
346 //hybrids //TODO 2 implement hybrids
348 if (hybridPattern
.matcher(fullNameString
).matches() ){
349 nameToBeFilled
= parseHybrid(fullNameString
);
351 else if (genusOrSupraGenusPattern
.matcher(fullNameString
).matches()){
353 if (rank
!= null && rank
.isSupraGeneric()){
354 nameToBeFilled
.setRank(rank
);
355 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
359 nameToBeFilled
.setRank(Rank
.GENUS());
360 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
362 authorString
= fullNameString
.substring(epi
[0].length());
365 else if (infraGenusPattern
.matcher(fullNameString
).matches()){
366 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(epi
[1]));
367 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
368 nameToBeFilled
.setInfraGenericEpithet(epi
[2]);
369 authorString
= fullNameString
.substring(epi
[0].length() + 1 + epi
[1].length()+ 1 + epi
[2].length());
372 else if (aggrOrGroupPattern
.matcher(fullNameString
).matches()){
373 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(epi
[2]));
374 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
375 nameToBeFilled
.setSpecificEpithet(epi
[1]);
378 else if (speciesPattern
.matcher(fullNameString
).matches()){
379 nameToBeFilled
.setRank(Rank
.SPECIES());
380 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
381 nameToBeFilled
.setSpecificEpithet(epi
[1]);
382 authorString
= fullNameString
.substring(epi
[0].length() + 1 + epi
[1].length());
385 else if (autonymPattern
.matcher(fullNameString
).matches()){
386 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(epi
[epi
.length
- 2]));
387 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
388 nameToBeFilled
.setSpecificEpithet(epi
[1]);
389 nameToBeFilled
.setInfraSpecificEpithet(epi
[epi
.length
- 1]);
390 int lenSpecies
= 2 + epi
[0].length()+epi
[1].length();
391 int lenInfraSpecies
= 2 + epi
[epi
.length
- 2].length() + epi
[epi
.length
- 1].length();
392 authorString
= fullNameString
.substring(lenSpecies
, fullNameString
.length() - lenInfraSpecies
);
395 else if (infraSpeciesPattern
.matcher(fullNameString
).matches()){
396 String infraSpecRankEpi
= epi
[2];
397 String infraSpecEpi
= epi
[3];
398 if ("tax.".equals(infraSpecRankEpi
)){
399 infraSpecRankEpi
+= " " + epi
[3];
400 infraSpecEpi
= epi
[4];
402 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(infraSpecRankEpi
));
403 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
404 nameToBeFilled
.setSpecificEpithet(epi
[1]);
405 nameToBeFilled
.setInfraSpecificEpithet(infraSpecEpi
);
406 authorString
= fullNameString
.substring(epi
[0].length()+ 1 + epi
[1].length() +1 + infraSpecRankEpi
.length() + 1 + infraSpecEpi
.length());
408 else if (oldInfraSpeciesPattern
.matcher(fullNameString
).matches()){
409 boolean implemented
= false;
411 nameToBeFilled
.setRank(Rank
.getRankByNameOrAbbreviation(epi
[2]));
412 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
413 nameToBeFilled
.setSpecificEpithet(epi
[1]);
414 //TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
415 authorString
= fullNameString
.substring(epi
[0].length()+ 1 + epi
[1].length() +1 + epi
[2].length() + 1 + epi
[3].length());
417 nameToBeFilled
.setHasProblem(true);
418 nameToBeFilled
.setTitleCache(fullNameString
);
419 logger
.info("Name string " + fullNameString
+ " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
424 nameToBeFilled
.setHasProblem(true);
425 nameToBeFilled
.setTitleCache(fullNameString
);
426 logger
.info("no applicable parsing rule could be found for \"" + fullNameString
+ "\"");
429 if (nameToBeFilled
!= null && authorString
!= null && authorString
.trim().length() > 0 ){
430 TeamOrPersonBase
[] authors
= new TeamOrPersonBase
[4];
431 Integer
[] years
= new Integer
[4];
433 fullAuthors(authorString
, authors
, years
, nameToBeFilled
.getClass());
434 } catch (StringNotParsableException e
) {
435 nameToBeFilled
.setHasProblem(true);
436 nameToBeFilled
.setTitleCache(fullNameString
);
437 logger
.info("no applicable parsing rule could be found for \"" + fullNameString
+ "\"");;
439 nameToBeFilled
.setCombinationAuthorTeam(authors
[0]);
440 nameToBeFilled
.setExCombinationAuthorTeam(authors
[1]);
441 nameToBeFilled
.setBasionymAuthorTeam(authors
[2]);
442 nameToBeFilled
.setExBasionymAuthorTeam(authors
[3]);
443 if (nameToBeFilled
instanceof ZoologicalName
){
444 ZoologicalName zooName
= (ZoologicalName
)nameToBeFilled
;
445 zooName
.setPublicationYear(years
[0]);
446 zooName
.setOriginalPublicationYear(years
[2]);
450 if (nameToBeFilled
!= null){
451 //return(BotanicalName)result;
454 nameToBeFilled
.setHasProblem(true);
455 nameToBeFilled
.setTitleCache(fullNameString
);
456 logger
.info("Name string " + fullNameString
+ " could not be parsed!");
460 } catch (UnknownCdmTypeException e
) {
461 nameToBeFilled
.setHasProblem(true);
462 nameToBeFilled
.setTitleCache(fullNameString
);
463 logger
.info("unknown rank (" + (rank
== null?
"null":rank
) + ") or abbreviation in string " + fullNameString
);
472 * Parses the fullAuthorString
473 * @param fullAuthorString
474 * @return array of Teams containing the Team[0],
475 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
477 protected void fullAuthors (String fullAuthorString
, TeamOrPersonBase
[] authors
, Integer
[] years
, Class clazz
)
478 throws StringNotParsableException
{
479 fullAuthorString
= fullAuthorString
.trim();
480 if (fullAuthorString
== null || clazz
== null){
484 if ( BotanicalName
.class.isAssignableFrom(clazz
) ){
485 if (! fullBotanicAuthorStringPattern
.matcher(fullAuthorString
).matches() ){
486 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString
+") not parsable: ");
490 else if ( ZoologicalName
.class.isAssignableFrom(clazz
) ){
491 if (! fullZooAuthorStringPattern
.matcher(fullAuthorString
).matches() ){
492 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString
+") not parsable: ");
496 logger
.warn ("not yet implemented");
497 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString
+") not parsable: ");
499 fullAuthorsChecked(fullAuthorString
, authors
, years
);
503 * like fullTeams but without trim and match check
505 protected void fullAuthorsChecked (String fullAuthorString
, TeamOrPersonBase
[] authors
, Integer
[] years
){
506 int authorTeamStart
= 0;
507 Matcher basionymMatcher
= basionymPattern
.matcher(fullAuthorString
);
509 if (basionymMatcher
.find(0)){
511 String basString
= basionymMatcher
.group();
512 basString
= basString
.replaceFirst(basStart
, "");
513 basString
= basString
.replaceAll(basEnd
, "").trim();
514 authorTeamStart
= basionymMatcher
.end(1) + 1;
516 TeamOrPersonBase
[] basAuthors
= new TeamOrPersonBase
[2];
517 Integer
[] basYears
= new Integer
[2];
518 authorsAndEx(basString
, basAuthors
, basYears
);
519 authors
[2]= basAuthors
[0];
520 years
[2] = basYears
[0];
521 authors
[3]= basAuthors
[1];
522 years
[3] = basYears
[1];
524 TeamOrPersonBase
[] combinationAuthors
= new TeamOrPersonBase
[2];;
525 Integer
[] combinationYears
= new Integer
[2];
526 authorsAndEx(fullAuthorString
.substring(authorTeamStart
), combinationAuthors
, combinationYears
);
527 authors
[0]= combinationAuthors
[0] ;
528 years
[0] = combinationYears
[0];
529 authors
[1]= combinationAuthors
[1];
530 years
[1] = combinationYears
[1];
535 * Parses the author and ex-author String
536 * @param authorTeamString String representing the author and the ex-author team
537 * @return array of Teams containing the Team[0] and the ExTeam[1]
539 protected void authorsAndEx (String authorTeamString
, TeamOrPersonBase
[] authors
, Integer
[] years
){
540 //TODO noch allgemeiner am anfang durch Replace etc.
541 authorTeamString
= authorTeamString
.trim();
542 authorTeamString
= authorTeamString
.replaceFirst(oWs
+ "ex" + oWs
, " ex. " );
543 int authorEnd
= authorTeamString
.length();
545 Matcher exAuthorMatcher
= exAuthorPattern
.matcher(authorTeamString
);
546 if (exAuthorMatcher
.find(0)){
547 int exAuthorBegin
= exAuthorMatcher
.end(0);
548 String exString
= authorTeamString
.substring(exAuthorBegin
).trim();
549 authorEnd
= exAuthorMatcher
.start(0);
550 authors
[1] = author(exString
);
552 zooOrBotanicAuthor(authorTeamString
.substring(0, authorEnd
), authors
, years
);
556 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
557 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies
558 * to the year in case of an zoological name.
559 * @param authorString
563 protected void zooOrBotanicAuthor(String authorString
, TeamOrPersonBase
[] team
, Integer
[] year
){
564 if (authorString
== null){
566 }else if ((authorString
= authorString
.trim()).length() == 0){
569 Matcher zooAuthorAddidtionMatcher
= zooAuthorAddidtionPattern
.matcher(authorString
);
570 if (zooAuthorAddidtionMatcher
.find()){
571 int index
= zooAuthorAddidtionMatcher
.start(0);
572 String strYear
= authorString
.substring(index
);
573 strYear
= strYear
.replaceAll(zooAuthorYearSeperator
, "").trim();
574 year
[0] = Integer
.valueOf(strYear
);
575 authorString
= authorString
.substring(0, index
).trim();
577 team
[0] = author(authorString
);
582 * Parses an authorTeam String and returns the Team
583 * !!! TODO (atomization not yet implemented)
584 * @param authorTeamString String representing the author team
587 protected TeamOrPersonBase
author (String authorString
){
588 if (authorString
== null){
590 }else if ((authorString
= authorString
.trim()).length() == 0){
592 }else if (! teamSplitterPattern
.matcher(authorString
).find()){
594 Person result
= Person
.NewInstance();
595 result
.setNomenclaturalTitle(authorString
);
598 return parsedTeam(authorString
);
604 * Parses an authorString (reprsenting a team into the single authors and add
605 * them to the return Team.
606 * @param authorString
609 protected Team
parsedTeam(String authorString
){
610 Team result
= Team
.NewInstance();
611 String
[] authors
= authorString
.split(teamSplitter
);
612 for (String author
: authors
){
613 Person person
= Person
.NewInstance();
614 person
.setNomenclaturalTitle(author
);
615 result
.addTeamMember(person
);
621 //Parsing of the given full name that has been identified as hybrid already somewhere else.
622 private BotanicalName
parseHybrid(String fullName
){
623 logger
.warn("parseHybrid --> function not yet implemented");
624 BotanicalName result
= BotanicalName
.NewInstance(null);
625 result
.setTitleCache(fullName
);
629 // // Parsing of the given full name that has been identified as a cultivar already somwhere else.
630 // // The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
631 public BotanicalName
parseCultivar(String fullName
) throws StringNotParsableException
{
632 CultivarPlantName result
= null;
633 String
[] words
= oWsPattern
.split(fullName
);
635 /* ---------------------------------------------------------------------------------
637 * ---------------------------------------------------------------------------------*/
638 if (fullName
.indexOf(" '") != 0){
639 //TODO location of 'xx' is probably not arbitrary
640 Matcher cultivarMatcher
= cultivarPattern
.matcher(fullName
);
641 if (cultivarMatcher
.find()){
642 String namePart
= fullName
.replaceFirst(cultivar
, "");
644 String cultivarPart
= cultivarMatcher
.group(0).replace("'","").trim();
645 //OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
647 result
= (CultivarPlantName
)parseFullName(namePart
);
648 result
.setCultivarName(cultivarPart
);
650 }else if (fullName
.indexOf(" cv.") != 0){
651 // cv. is old form (not official)
652 throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
655 /* ---------------------------------------------------------------------------------
657 * ---------------------------------------------------------------------------------
660 //Ann. this is not the official way of noting cultivar groups
661 String group
= oWs
+ "Group" + oWs
+ capitalEpiWord
+ end
;
662 Pattern groupRE
= Pattern
.compile(group
);
663 Matcher groupMatcher
= groupRE
.matcher(fullName
);
664 if (groupMatcher
.find()){
665 if (! words
[words
.length
- 2].equals("group")){
666 throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName
+ " should be 'group'");
669 String namePart
= fullName
.substring(0, groupMatcher
.start(0) - 0);
670 //OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
672 String cultivarPart
= words
[words
.length
-1];
673 result
= (CultivarPlantName
)parseFullName(namePart
);
675 result
.setCultivarName(cultivarPart
);
677 //OLD: result.setCultivarGroupName(cultivarPart);
682 // // ---------------------------------------------------------------------------------
683 // if ( result = "" ){
684 // return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
688 return result
; //TODO
692 private void makeEmpty(NonViralName nameToBeFilled
){
693 nameToBeFilled
.setRank(null);
694 nameToBeFilled
.setTitleCache(null, false);
695 nameToBeFilled
.setNameCache(null);
697 nameToBeFilled
.setAppendedPhrase(null);
699 //nameToBeFilled.setBasionym(basionym);
700 nameToBeFilled
.setBasionymAuthorTeam(null);
701 nameToBeFilled
.setCombinationAuthorTeam(null);
702 nameToBeFilled
.setExBasionymAuthorTeam(null);
703 nameToBeFilled
.setExCombinationAuthorTeam(null);
704 nameToBeFilled
.setAuthorshipCache(null);
707 nameToBeFilled
.setHasProblem(false);
709 //nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
712 nameToBeFilled
.setGenusOrUninomial(null);
713 nameToBeFilled
.setInfraGenericEpithet(null);
714 nameToBeFilled
.setSpecificEpithet(null);
715 nameToBeFilled
.setInfraSpecificEpithet(null);
717 nameToBeFilled
.setNomenclaturalMicroReference(null);
718 nameToBeFilled
.setNomenclaturalReference(null);
720 if (nameToBeFilled
instanceof BotanicalName
){
721 BotanicalName botanicalName
= (BotanicalName
)nameToBeFilled
;
722 botanicalName
.setAnamorphic(false);
723 botanicalName
.setHybridFormula(false);
724 botanicalName
.setMonomHybrid(false);
725 botanicalName
.setBinomHybrid(false);
726 botanicalName
.setTrinomHybrid(false);
729 if (nameToBeFilled
instanceof ZoologicalName
){
730 ZoologicalName zoologicalName
= (ZoologicalName
)nameToBeFilled
;
731 zoologicalName
.setBreed(null);
732 zoologicalName
.setOriginalPublicationYear(null);
735 //TODO adapt to @Version of versionable entity, throws still optimistic locking error
736 //nameToBeFilled.setUpdated(Calendar.getInstance());
737 // TODO nameToBeFilled.setUpdatedBy(updatedBy);
743 static String epiSplitter
= "(\\s+|\\(|\\))"; //( ' '+| '(' | ')' )
744 static Pattern pattern
= Pattern
.compile(epiSplitter
);
746 //some useful non-terminals
747 static String start
= "^";
748 static String end
= "$";
749 static String anyEnd
= ".*" + end
;
750 static String oWs
= "\\s+"; //obligatory whitespaces
751 static String fWs
= "\\s*"; //facultative whitespcace
753 static String capitalWord
= "\\p{javaUpperCase}\\p{javaLowerCase}*";
754 static String nonCapitalWord
= "\\p{javaLowerCase}+";
756 static String capitalDotWord
= capitalWord
+ "\\.?"; //capitalWord with facultativ '.' at the end
757 static String nonCapitalDotWord
= nonCapitalWord
+ "\\.?"; //nonCapitalWord with facultativ '.' at the end
758 static String dotWord
= "(" + capitalWord
+ "|" + nonCapitalWord
+ ")\\.?"; //word (capital or non-capital) with facultativ '.' at the end
759 //Words used in an epethiton for a TaxonName
760 static String nonCapitalEpiWord
= "[a-zï\\-]+"; //TODO solve checkin Problem with Unicode character "[a-z�\\-]+";
761 static String capitalEpiWord
= "[A-Z]"+ nonCapitalEpiWord
;
765 static String month
= "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)";
766 static String singleYear
= "\\b" + "(?:17|18|19|20)" + "\\d{2}" + "\\b"; // word boundary followed by either 17,18,19, or 20 (not captured) followed by 2 digits
767 static String yearPhrase
= "(" + singleYear
+ "(-" + singleYear
+ ")?" +
768 "(" + month
+ ")?)" ; // optional month
771 static String yearSeperator
= "." + oWs
;
772 static String detailSeperator
= ":" + oWs
;
773 static String referenceSeperator1
= "," + oWs
;
774 static String inReferenceSeperator
= oWs
+ "in" + oWs
;
775 static String referenceSeperator
= "(" + referenceSeperator1
+"|" + inReferenceSeperator
+ ")" ;
776 static String referenceAuthorSeperator
= ","+ oWs
;
777 static String volumeSeperator
= "," + fWs
;
778 static String referenceEnd
= ".";
782 static String status
= "";
785 static String InfraGenusMarker
= "(subgen.|subg.|sect.|subsect.|ser.|subser.|t.infgen.)";
786 static String aggrOrGroupMarker
= "(aggr.|agg.|group)";
787 static String infraSpeciesMarker
= "(subsp.|convar.|var.|subvar.|f.|subf.|f.spec.|tax." + fWs
+ "infrasp.)";
788 static String oldInfraSpeciesMarker
= "(prol.|proles|race|taxon|sublusus)";
792 static String authorPart
= "(" + "(D'|L'|'t\\s)?" + capitalDotWord
+ "('" + nonCapitalDotWord
+ ")?" + "|da|de(n|l|\\sla)?)" ;
793 static String author
= "(" + authorPart
+ "(" + fWs
+ "|-)" + ")+" + "(f.|fil.|secundus)?";
794 static String teamSplitter
= fWs
+ "(&)" + fWs
;
795 static String authorTeam
= fWs
+ "(" + author
+ teamSplitter
+ ")*" + author
+ "(" + teamSplitter
+ "al.)?" + fWs
;
796 static String exString
= "(ex.?)";
797 static String authorAndExTeam
= authorTeam
+ "(" + oWs
+ exString
+ oWs
+ authorTeam
+ ")?";
798 static String basStart
= "\\(";
799 static String basEnd
= "\\)";
800 static String botanicBasionymAuthor
= basStart
+ "(" + authorAndExTeam
+ ")" + basEnd
; // '(' and ')' is for evaluation with RE.paren(x)
801 static String fullBotanicAuthorString
= fWs
+ "(" + botanicBasionymAuthor
+")?" + fWs
+ authorAndExTeam
+ fWs
;
802 static String facultFullBotanicAuthorString
= "(" + fullBotanicAuthorString
+ ")?" ;
805 //TODO does zoo author have ex-Author?
806 static String zooAuthorYearSeperator
= ",";
807 static String zooAuthorAddidtion
= fWs
+ zooAuthorYearSeperator
+ fWs
+ singleYear
;
808 static String zooAuthorTeam
= authorTeam
+ zooAuthorAddidtion
;
809 static String zooBasionymAuthor
= basStart
+ "(" + zooAuthorTeam
+ ")" + basEnd
;
810 static String fullZooAuthorString
= fWs
+ "(" + zooBasionymAuthor
+")?" + fWs
+ zooAuthorTeam
+ fWs
;
811 static String facultFullZooAuthorString
= "(" + fullZooAuthorString
+ ")?" ;
813 static String facultFullAuthorString2
= "(" + facultFullBotanicAuthorString
+ "|" + facultFullZooAuthorString
+ ")";
815 static String basionymAuthor
= "(" + botanicBasionymAuthor
+ "|" + zooBasionymAuthor
+ ")";
816 static String fullAuthorString
= "(" + fullBotanicAuthorString
+ "|" + fullZooAuthorString
+ ")";
819 //TODO still very simple
820 static String pageNumber
= "\\d{1,5}";
821 static String detail
= "(" + pageNumber
+ ")";
824 static String volume
= "\\d{4}" + "\\(\\d{4}\\)?";
826 static String referenceTitle
= "(" + dotWord
+ fWs
+ ")" + "{2,}";
827 static String bookReference
= referenceTitle
+ volumeSeperator
+ volume
;
828 static String bookSectionReference
= authorTeam
+ referenceAuthorSeperator
;
829 static String articleReference
= inReferenceSeperator
+ bookReference
;
830 static String reference
= "(" + articleReference
+ "|" + bookReference
+")" +
831 detailSeperator
+ detail
+ yearSeperator
+ yearPhrase
+
834 static Pattern referencePattern
= Pattern
.compile(reference
);
836 static String pNomStatusNom
= "nom\\." + fWs
+ "(superfl\\.|nud\\.|illeg\\.|inval\\.|cons\\.|alternativ\\.|subnud.|"+
837 "rej\\.|rej\\."+ fWs
+ "prop\\.|provis\\.)";
838 static String pNomStatusOrthVar
= "orth\\." + fWs
+ "var\\.";
839 static String pNomStatus
= "(" + pNomStatusNom
+ "|" + pNomStatusOrthVar
+ ")";
840 static String pNomStatusPhrase1
= "," + fWs
+ pNomStatus
;
841 static String pNomStatusPhrase2
= "\\[" + fWs
+ pNomStatus
+ "\\]";
843 static String pNomStatusPhrase
= "(?:" + pNomStatusPhrase1
+ "|" + pNomStatusPhrase2
+ ")";
848 //provisional synonym
853 //cultivars and hybrids
854 static String cultivar
= oWs
+ "'..+'"; //Achtung mit Hochkomma in AuthorNamen
855 static String cultivarMarker
= oWs
+ "(cv.|')";
856 static String hybrid
= oWs
+ "((x|X)" + oWs
+ "|notho)";//= ( x )|( X )|( notho)
859 static String genusOrSupraGenus
= capitalEpiWord
;
860 static String infraGenus
= capitalEpiWord
+ oWs
+ InfraGenusMarker
+ oWs
+ capitalEpiWord
;
861 static String aggrOrGroup
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
+ oWs
+ aggrOrGroupMarker
;
862 static String species
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
;
863 static String infraSpecies
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
+ oWs
+ infraSpeciesMarker
+ oWs
+ nonCapitalEpiWord
;
864 static String oldInfraSpecies
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
+ oWs
+ oldInfraSpeciesMarker
+ oWs
+ nonCapitalEpiWord
;
865 static String autonym
= capitalEpiWord
+ oWs
+ "(" + nonCapitalEpiWord
+")" + oWs
+ fullBotanicAuthorString
+ oWs
+ infraSpeciesMarker
+ oWs
+ "\\1"; //2-nd word and last word are the same
867 static String anyBotanicName
= "(" + genusOrSupraGenus
+ "|" + infraGenus
+ "|" + aggrOrGroup
+ "|" + species
+ "|" +
868 infraSpecies
+ "|" + infraSpecies
+ "|" + oldInfraSpecies
+ "|" + autonym
+ ")+";
869 static String anyZooName
= "(" + genusOrSupraGenus
+ "|" + infraGenus
+ "|" + aggrOrGroup
+ "|" + species
+ "|" +
870 infraSpecies
+ "|" + infraSpecies
+ "|" + oldInfraSpecies
+ ")+";
871 static String anyBotanicFullName
= anyBotanicName
+ oWs
+ fullBotanicAuthorString
;
872 static String anyZooFullName
= anyZooName
+ oWs
+ fullZooAuthorString
;
873 static String anyFullName
= "(" + anyBotanicFullName
+ "|" + anyZooFullName
+ ")";
876 static Pattern oWsPattern
= Pattern
.compile(oWs
);
877 static Pattern teamSplitterPattern
= Pattern
.compile(teamSplitter
);
878 static Pattern cultivarPattern
= Pattern
.compile(cultivar
);
879 static Pattern cultivarMarkerPattern
= Pattern
.compile(cultivarMarker
);
880 static Pattern hybridPattern
= Pattern
.compile(hybrid
);
882 static Pattern genusOrSupraGenusPattern
= Pattern
.compile(start
+ genusOrSupraGenus
+ facultFullAuthorString2
+ end
);
883 static Pattern infraGenusPattern
= Pattern
.compile(start
+ infraGenus
+ facultFullAuthorString2
+ end
);
884 static Pattern aggrOrGroupPattern
= Pattern
.compile(start
+ aggrOrGroup
+ fWs
+ end
); //aggr. or group has no author string
885 static Pattern speciesPattern
= Pattern
.compile(start
+ species
+ facultFullAuthorString2
+ end
);
886 static Pattern infraSpeciesPattern
= Pattern
.compile(start
+ infraSpecies
+ facultFullAuthorString2
+ end
);
887 static Pattern oldInfraSpeciesPattern
= Pattern
.compile(start
+ oldInfraSpecies
+ facultFullAuthorString2
+ end
);
888 static Pattern autonymPattern
= Pattern
.compile(start
+ autonym
+ fWs
+ end
);
890 static Pattern botanicBasionymPattern
= Pattern
.compile(botanicBasionymAuthor
);
891 static Pattern zooBasionymPattern
= Pattern
.compile(zooBasionymAuthor
);
892 static Pattern basionymPattern
= Pattern
.compile(basionymAuthor
);
894 static Pattern zooAuthorPattern
= Pattern
.compile(zooAuthorTeam
);
895 static Pattern zooAuthorAddidtionPattern
= Pattern
.compile(zooAuthorAddidtion
);
897 static Pattern exAuthorPattern
= Pattern
.compile(oWs
+ exString
);
899 static Pattern fullBotanicAuthorStringPattern
= Pattern
.compile(fullBotanicAuthorString
);
900 static Pattern fullZooAuthorStringPattern
= Pattern
.compile(fullZooAuthorString
);
901 static Pattern fullAuthorStringPattern
= Pattern
.compile(fullAuthorString
);
903 static Pattern anyBotanicFullNamePattern
= Pattern
.compile(anyBotanicFullName
);
904 static Pattern anyZooFullNamePattern
= Pattern
.compile(anyZooFullName
);