4 package eu
.etaxonomy
.cdm
.strategy
.parser
;
6 import java
.util
.regex
.Matcher
;
7 import java
.util
.regex
.Pattern
;
9 import org
.apache
.log4j
.Logger
;
11 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
12 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
13 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
14 import eu
.etaxonomy
.cdm
.model
.name
.BacterialName
;
15 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
16 import eu
.etaxonomy
.cdm
.model
.name
.CultivarPlantName
;
17 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
18 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
19 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
20 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
21 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
22 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
23 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
24 import eu
.etaxonomy
.cdm
.model
.reference
.Article
;
25 import eu
.etaxonomy
.cdm
.model
.reference
.Book
;
26 import eu
.etaxonomy
.cdm
.model
.reference
.BookSection
;
27 import eu
.etaxonomy
.cdm
.model
.reference
.Generic
;
28 import eu
.etaxonomy
.cdm
.model
.reference
.INomenclaturalReference
;
29 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceBase
;
30 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.StringNotParsableException
;
31 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
38 public class NonViralNameParserImpl
implements INonViralNameParser
<NonViralName
> {
39 private static final Logger logger
= Logger
.getLogger(NonViralNameParserImpl
.class);
41 // good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
43 final static boolean MAKE_EMPTY
= true;
44 final static boolean MAKE_NOT_EMPTY
= false;
47 public static NonViralNameParserImpl
NewInstance(){
48 return new NonViralNameParserImpl();
52 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSimpleName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
54 public NonViralName
parseSimpleName(String simpleName
, Rank rank
){
56 logger
.warn("parseSimpleName() not yet implemented. Uses parseFullName() instead");
57 return parseFullName(simpleName
, null, rank
);
62 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericSimpleName(java.lang.String)
64 public NonViralName
parseSimpleName(String simpleName
){
65 return parseSimpleName(simpleName
, null);
68 public NonViralName
getNonViralNameInstance(String fullString
, NomenclaturalCode code
){
69 return getNonViralNameInstance(fullString
, code
, null);
72 public NonViralName
getNonViralNameInstance(String fullString
, NomenclaturalCode code
, Rank rank
){
73 NonViralName result
= null;
75 boolean isBotanicalName
= anyBotanicFullNamePattern
.matcher(fullString
).find();
76 boolean isZoologicalName
= anyZooFullNamePattern
.matcher(fullString
).find();;
77 boolean isBacteriologicalName
= false;
78 boolean isCultivatedPlantName
= false;
79 if ( (isBotanicalName
|| isCultivatedPlantName
) && ! isZoologicalName
&& !isBacteriologicalName
){
81 result
= BotanicalName
.NewInstance(rank
);
83 result
= CultivarPlantName
.NewInstance(rank
);
85 }else if ( isZoologicalName
/*&& ! isBotanicalName*/ && !isBacteriologicalName
&& !isCultivatedPlantName
){
86 result
= ZoologicalName
.NewInstance(rank
);
87 }else if ( isZoologicalName
&& ! isBotanicalName
&& !isBacteriologicalName
&& !isCultivatedPlantName
){
88 result
= BacterialName
.NewInstance(rank
);
90 result
= NonViralName
.NewInstance(rank
);
92 }else if (code
.equals(NomenclaturalCode
.ICBN())){
93 result
= BotanicalName
.NewInstance(rank
);
94 }else if (code
.equals(NomenclaturalCode
.ICZN())){
95 result
= ZoologicalName
.NewInstance(rank
);
96 }else if (code
.equals(NomenclaturalCode
.ICNCP())){
97 logger
.warn("ICNCP parsing not yet implemented");
98 result
= CultivarPlantName
.NewInstance(rank
);
99 }else if (code
.equals(NomenclaturalCode
.ICNB())){
100 logger
.warn("ICNB not yet implemented");
101 result
= BacterialName
.NewInstance(rank
);
102 }else if (code
.equals(NomenclaturalCode
.ICVCN())){
103 logger
.error("Viral name is not a NonViralName !!");
105 logger
.error("Unknown Nomenclatural Code !!");
112 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseFullReference(java.lang.String)
114 public NonViralName
parseFullReference(String fullReferenceString
) {
115 return parseFullReference(fullReferenceString
, null, null);
119 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
121 public NonViralName
parseFullReference(String fullReferenceString
, NomenclaturalCode nomCode
, Rank rank
) {
122 if (fullReferenceString
== null){
125 NonViralName result
= getNonViralNameInstance(fullReferenceString
, nomCode
, rank
);
126 parseFullReference(result
, fullReferenceString
, rank
, MAKE_EMPTY
);
132 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(eu.etaxonomy.cdm.model.name.BotanicalName, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, boolean)
134 public void parseFullReference(NonViralName nameToBeFilled
, String fullReferenceString
, Rank rank
, boolean makeEmpty
) {
135 if (fullReferenceString
== null){
140 makeEmpty(nameToBeFilled
);
142 fullReferenceString
.replaceAll(oWs
, " ");
143 fullReferenceString
= fullReferenceString
.trim();
145 String localFullName
;
146 if (nameToBeFilled
instanceof ZoologicalName
){
147 localFullName
= anyZooFullName
;
149 localFullName
= anyBotanicFullName
;
151 //seperate name and reference part
152 String nameAndRefSeperator
= "(^" + localFullName
+ ")("+ referenceSeperator
+ ")";
153 Pattern nameAndRefSeperatorPattern
= Pattern
.compile(nameAndRefSeperator
);
154 Matcher nameAndRefSeperatorMatcher
= nameAndRefSeperatorPattern
.matcher(fullReferenceString
);
156 if (nameAndRefSeperatorMatcher
.find() ){
157 String nameAndSeperator
= nameAndRefSeperatorMatcher
.group(0);
158 String name
= nameAndRefSeperatorMatcher
.group(1);
159 String referenceString
= fullReferenceString
.substring(nameAndRefSeperatorMatcher
.end());
162 String seperator
= nameAndSeperator
.substring(name
.length());
163 boolean isInReference
= false;
164 if (seperator
.matches(inReferenceSeperator
)){
165 isInReference
= true;
169 referenceString
= parseNomStatus(referenceString
, nameToBeFilled
);
172 parseFullName(nameToBeFilled
, name
, rank
, makeEmpty
);
173 parseReference(nameToBeFilled
, referenceString
, isInReference
);
174 INomenclaturalReference ref
= nameToBeFilled
.getNomenclaturalReference();
175 if (ref
!= null && ref
.getHasProblem()){
176 nameToBeFilled
.setHasProblem(true);
179 //don't parse if name can't be seperated
180 nameToBeFilled
.setHasProblem(true);
181 nameToBeFilled
.setTitleCache(fullReferenceString
);
182 logger
.info("no applicable parsing rule could be found for \"" + fullReferenceString
+ "\"");
186 //TODO make it an Array of status
188 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
189 * The nomenclatural status part ist deleted from the reference String.
190 * @return String the new (shortend) reference String
192 private String
parseNomStatus(String reference
, NonViralName nameToBeFilled
) {
194 Pattern hasStatusPattern
= Pattern
.compile("(" + pNomStatusPhrase
+ ")");
195 Matcher hasStatusMatcher
= hasStatusPattern
.matcher(reference
);
197 if (hasStatusMatcher
.find()) {
198 String statusPhrase
= hasStatusMatcher
.group(0);
200 Pattern statusPattern
= Pattern
.compile(pNomStatus
);
201 Matcher statusMatcher
= statusPattern
.matcher(statusPhrase
);
202 statusMatcher
.find();
203 statusString
= statusMatcher
.group(0);
205 NomenclaturalStatusType nomStatusType
= NomenclaturalStatusType
.getNomenclaturalStatusTypeByAbbreviation(statusString
);
206 NomenclaturalStatus nomStatus
= NomenclaturalStatus
.NewInstance(nomStatusType
);
207 nameToBeFilled
.addStatus(nomStatus
);
209 reference
= reference
.replace(statusPhrase
, "");
210 } catch (UnknownCdmTypeException e
) {
218 private void parseReference(NonViralName nameToBeFilled
, String reference
, boolean isInReference
){
220 if (referencePattern
.matcher(reference
).matches() ){
221 //End (just delete, may be ambigous for yearPhrase, but no real information gets lost
222 Pattern endPattern
= Pattern
.compile( referenceEnd
+ end
);
223 Matcher endMatcher
= endPattern
.matcher(reference
);
224 if (endMatcher
.find()){
225 String endPart
= endMatcher
.group(0);
226 reference
= reference
.substring(0, reference
.length() - endPart
.length());
230 String yearPart
= null;
231 String pYearPhrase
= yearSeperator
+ yearPhrase
+ end
;
232 Pattern yearPhrasePattern
= Pattern
.compile(pYearPhrase
);
233 Matcher yearPhraseMatcher
= yearPhrasePattern
.matcher(reference
);
234 if (yearPhraseMatcher
.find()){
235 yearPart
= yearPhraseMatcher
.group(0);
236 reference
= reference
.substring(0, reference
.length() - yearPart
.length());
237 yearPart
= yearPart
.replaceFirst(start
+ yearSeperator
, "").trim();
241 String pDetailPhrase
= detailSeperator
+ detail
+ end
;
242 Pattern detailPhrasePattern
= Pattern
.compile(pDetailPhrase
);
243 Matcher detailPhraseMatcher
= detailPhrasePattern
.matcher(reference
);
244 if (detailPhraseMatcher
.find()){
245 String detailPart
= detailPhraseMatcher
.group(0);
246 reference
= reference
.substring(0, reference
.length() - detailPart
.length());
247 detailPart
= detailPart
.replaceFirst(start
+ detailSeperator
, "").trim();
248 nameToBeFilled
.setNomenclaturalMicroReference(detailPart
);
251 parseReferenceTitle(reference
, yearPart
);
253 Generic ref
= Generic
.NewInstance();
254 ref
.setTitleCache(reference
);
255 ref
.setHasProblem(true);
256 nameToBeFilled
.setNomenclaturalReference(ref
);
262 * Parses the referenceTitlePart, including the author volume and edition.
267 private ReferenceBase
parseReferenceTitle(String reference
, String year
){
268 ReferenceBase result
= null;
269 Pattern bookPattern
= Pattern
.compile(bookReference
);
270 Pattern articlePattern
= Pattern
.compile(articleReference
);
271 Pattern bookSectionPattern
= Pattern
.compile(bookSectionReference
);
274 Matcher articleMatcher
= articlePattern
.matcher(reference
);
275 Matcher bookMatcher
= bookPattern
.matcher(reference
);
276 Matcher bookSectionMatcher
= bookSectionPattern
.matcher(reference
);
279 if (articleMatcher
.matches()){
281 //(type, author, title, volume, editor, series;
282 Article article
= Article
.NewInstance();
283 article
.setTitleCache(reference
);
285 }else if(bookMatcher
.matches()){
286 Book book
= Book
.NewInstance();
287 book
.setTitleCache(reference
);
289 }else if (bookSectionMatcher
.matches()){
290 BookSection bookSection
= BookSection
.NewInstance();
291 bookSection
.setTitleCache(reference
);
292 result
= bookSection
;
294 logger
.warn("unknown reference type not yet implemented");
295 //ReferenceBase refBase =
302 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericFullName(java.lang.String)
304 public NonViralName
parseFullName(String fullNameString
){
305 return parseFullName(fullNameString
, null, null);
310 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
312 public NonViralName
parseFullName(String fullNameString
, NomenclaturalCode nomCode
, Rank rank
) {
313 if (fullNameString
== null){
316 NonViralName result
= getNonViralNameInstance(fullNameString
, nomCode
, rank
);
317 parseFullName(result
, fullNameString
, rank
, false);
323 public void parseFullName(NonViralName nameToBeFilled
, String fullNameString
, Rank rank
, boolean makeEmpty
) {
326 String authorString
= null;
328 if (fullNameString
== null){
332 makeEmpty(nameToBeFilled
);
334 fullNameString
.replaceAll(oWs
, " ");
336 // OLD: fullName = oWsRE.subst(fullName, " "); //substitute multiple whitespaces
337 fullNameString
= fullNameString
.trim();
339 String
[] epi
= pattern
.split(fullNameString
);
341 //cultivars //TODO 2 implement cultivars
342 // if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
343 // result = parseCultivar(fullName);
345 //hybrids //TODO 2 implement hybrids
347 if (hybridPattern
.matcher(fullNameString
).matches() ){
348 nameToBeFilled
= parseHybrid(fullNameString
);
350 else if (genusOrSupraGenusPattern
.matcher(fullNameString
).matches()){
352 if (rank
!= null && rank
.isSupraGeneric()){
353 nameToBeFilled
.setRank(rank
);
354 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
358 nameToBeFilled
.setRank(Rank
.GENUS());
359 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
361 authorString
= fullNameString
.substring(epi
[0].length());
364 else if (infraGenusPattern
.matcher(fullNameString
).matches()){
365 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(epi
[1]));
366 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
367 nameToBeFilled
.setInfraGenericEpithet(epi
[2]);
368 authorString
= fullNameString
.substring(epi
[0].length() + 1 + epi
[1].length()+ 1 + epi
[2].length());
371 else if (aggrOrGroupPattern
.matcher(fullNameString
).matches()){
372 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(epi
[2]));
373 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
374 nameToBeFilled
.setSpecificEpithet(epi
[1]);
377 else if (speciesPattern
.matcher(fullNameString
).matches()){
378 nameToBeFilled
.setRank(Rank
.SPECIES());
379 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
380 nameToBeFilled
.setSpecificEpithet(epi
[1]);
381 authorString
= fullNameString
.substring(epi
[0].length() + 1 + epi
[1].length());
384 else if (autonymPattern
.matcher(fullNameString
).matches()){
385 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(epi
[epi
.length
- 2]));
386 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
387 nameToBeFilled
.setSpecificEpithet(epi
[1]);
388 nameToBeFilled
.setInfraSpecificEpithet(epi
[epi
.length
- 1]);
389 int lenSpecies
= 2 + epi
[0].length()+epi
[1].length();
390 int lenInfraSpecies
= 2 + epi
[epi
.length
- 2].length() + epi
[epi
.length
- 1].length();
391 authorString
= fullNameString
.substring(lenSpecies
, fullNameString
.length() - lenInfraSpecies
);
394 else if (infraSpeciesPattern
.matcher(fullNameString
).matches()){
395 String infraSpecRankEpi
= epi
[2];
396 String infraSpecEpi
= epi
[3];
397 if ("tax.".equals(infraSpecRankEpi
)){
398 infraSpecRankEpi
+= " " + epi
[3];
399 infraSpecEpi
= epi
[4];
401 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(infraSpecRankEpi
));
402 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
403 nameToBeFilled
.setSpecificEpithet(epi
[1]);
404 nameToBeFilled
.setInfraSpecificEpithet(infraSpecEpi
);
405 authorString
= fullNameString
.substring(epi
[0].length()+ 1 + epi
[1].length() +1 + infraSpecRankEpi
.length() + 1 + infraSpecEpi
.length());
407 else if (oldInfraSpeciesPattern
.matcher(fullNameString
).matches()){
408 boolean implemented
= false;
410 nameToBeFilled
.setRank(Rank
.getRankByNameOrAbbreviation(epi
[2]));
411 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
412 nameToBeFilled
.setSpecificEpithet(epi
[1]);
413 //TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
414 authorString
= fullNameString
.substring(epi
[0].length()+ 1 + epi
[1].length() +1 + epi
[2].length() + 1 + epi
[3].length());
416 nameToBeFilled
.setHasProblem(true);
417 nameToBeFilled
.setTitleCache(fullNameString
);
418 logger
.info("Name string " + fullNameString
+ " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
423 nameToBeFilled
.setHasProblem(true);
424 nameToBeFilled
.setTitleCache(fullNameString
);
425 logger
.info("no applicable parsing rule could be found for \"" + fullNameString
+ "\"");
428 if (nameToBeFilled
!= null && authorString
!= null && authorString
.trim().length() > 0 ){
429 TeamOrPersonBase
[] authors
= new TeamOrPersonBase
[4];
430 Integer
[] years
= new Integer
[4];
432 fullAuthors(authorString
, authors
, years
, nameToBeFilled
.getClass());
433 } catch (StringNotParsableException e
) {
434 nameToBeFilled
.setHasProblem(true);
435 nameToBeFilled
.setTitleCache(fullNameString
);
436 logger
.info("no applicable parsing rule could be found for \"" + fullNameString
+ "\"");;
438 nameToBeFilled
.setCombinationAuthorTeam(authors
[0]);
439 nameToBeFilled
.setExCombinationAuthorTeam(authors
[1]);
440 nameToBeFilled
.setBasionymAuthorTeam(authors
[2]);
441 nameToBeFilled
.setExBasionymAuthorTeam(authors
[3]);
442 if (nameToBeFilled
instanceof ZoologicalName
){
443 ZoologicalName zooName
= (ZoologicalName
)nameToBeFilled
;
444 zooName
.setPublicationYear(years
[0]);
445 zooName
.setOriginalPublicationYear(years
[2]);
449 if (nameToBeFilled
!= null){
450 //return(BotanicalName)result;
453 nameToBeFilled
.setHasProblem(true);
454 nameToBeFilled
.setTitleCache(fullNameString
);
455 logger
.info("Name string " + fullNameString
+ " could not be parsed!");
459 } catch (UnknownCdmTypeException e
) {
460 nameToBeFilled
.setHasProblem(true);
461 nameToBeFilled
.setTitleCache(fullNameString
);
462 logger
.info("unknown rank (" + (rank
== null?
"null":rank
) + ") or abbreviation in string " + fullNameString
);
471 * Parses the fullAuthorString
472 * @param fullAuthorString
473 * @return array of Teams containing the Team[0],
474 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
476 protected void fullAuthors (String fullAuthorString
, TeamOrPersonBase
[] authors
, Integer
[] years
, Class clazz
)
477 throws StringNotParsableException
{
478 fullAuthorString
= fullAuthorString
.trim();
479 if (fullAuthorString
== null || clazz
== null){
483 if ( BotanicalName
.class.isAssignableFrom(clazz
) ){
484 if (! fullBotanicAuthorStringPattern
.matcher(fullAuthorString
).matches() ){
485 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString
+") not parsable: ");
489 else if ( ZoologicalName
.class.isAssignableFrom(clazz
) ){
490 if (! fullZooAuthorStringPattern
.matcher(fullAuthorString
).matches() ){
491 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString
+") not parsable: ");
495 logger
.warn ("not yet implemented");
496 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString
+") not parsable: ");
498 fullAuthorsChecked(fullAuthorString
, authors
, years
);
502 * like fullTeams but without trim and match check
504 protected void fullAuthorsChecked (String fullAuthorString
, TeamOrPersonBase
[] authors
, Integer
[] years
){
505 int authorTeamStart
= 0;
506 Matcher basionymMatcher
= basionymPattern
.matcher(fullAuthorString
);
508 if (basionymMatcher
.find(0)){
510 String basString
= basionymMatcher
.group();
511 basString
= basString
.replaceFirst(basStart
, "");
512 basString
= basString
.replaceAll(basEnd
, "").trim();
513 authorTeamStart
= basionymMatcher
.end(1) + 1;
515 TeamOrPersonBase
[] basAuthors
= new TeamOrPersonBase
[2];
516 Integer
[] basYears
= new Integer
[2];
517 authorsAndEx(basString
, basAuthors
, basYears
);
518 authors
[2]= basAuthors
[0];
519 years
[2] = basYears
[0];
520 authors
[3]= basAuthors
[1];
521 years
[3] = basYears
[1];
523 TeamOrPersonBase
[] combinationAuthors
= new TeamOrPersonBase
[2];;
524 Integer
[] combinationYears
= new Integer
[2];
525 authorsAndEx(fullAuthorString
.substring(authorTeamStart
), combinationAuthors
, combinationYears
);
526 authors
[0]= combinationAuthors
[0] ;
527 years
[0] = combinationYears
[0];
528 authors
[1]= combinationAuthors
[1];
529 years
[1] = combinationYears
[1];
534 * Parses the author and ex-author String
535 * @param authorTeamString String representing the author and the ex-author team
536 * @return array of Teams containing the Team[0] and the ExTeam[1]
538 protected void authorsAndEx (String authorTeamString
, TeamOrPersonBase
[] authors
, Integer
[] years
){
539 //TODO noch allgemeiner am anfang durch Replace etc.
540 authorTeamString
= authorTeamString
.trim();
541 authorTeamString
= authorTeamString
.replaceFirst(oWs
+ "ex" + oWs
, " ex. " );
542 int authorEnd
= authorTeamString
.length();
544 Matcher exAuthorMatcher
= exAuthorPattern
.matcher(authorTeamString
);
545 if (exAuthorMatcher
.find(0)){
546 int exAuthorBegin
= exAuthorMatcher
.end(0);
547 String exString
= authorTeamString
.substring(exAuthorBegin
).trim();
548 authorEnd
= exAuthorMatcher
.start(0);
549 authors
[1] = author(exString
);
551 zooOrBotanicAuthor(authorTeamString
.substring(0, authorEnd
), authors
, years
);
555 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
556 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies
557 * to the year in case of an zoological name.
558 * @param authorString
562 protected void zooOrBotanicAuthor(String authorString
, TeamOrPersonBase
[] team
, Integer
[] year
){
563 if (authorString
== null){
565 }else if ((authorString
= authorString
.trim()).length() == 0){
568 Matcher zooAuthorAddidtionMatcher
= zooAuthorAddidtionPattern
.matcher(authorString
);
569 if (zooAuthorAddidtionMatcher
.find()){
570 int index
= zooAuthorAddidtionMatcher
.start(0);
571 String strYear
= authorString
.substring(index
);
572 strYear
= strYear
.replaceAll(zooAuthorYearSeperator
, "").trim();
573 year
[0] = Integer
.valueOf(strYear
);
574 authorString
= authorString
.substring(0, index
).trim();
576 team
[0] = author(authorString
);
581 * Parses an authorTeam String and returns the Team
582 * !!! TODO (atomization not yet implemented)
583 * @param authorTeamString String representing the author team
586 protected TeamOrPersonBase
author (String authorString
){
587 if (authorString
== null){
589 }else if ((authorString
= authorString
.trim()).length() == 0){
591 }else if (! teamSplitterPattern
.matcher(authorString
).find()){
593 Person result
= Person
.NewInstance();
594 result
.setNomenclaturalTitle(authorString
);
597 return parsedTeam(authorString
);
603 * Parses an authorString (reprsenting a team into the single authors and add
604 * them to the return Team.
605 * @param authorString
608 protected Team
parsedTeam(String authorString
){
609 Team result
= Team
.NewInstance();
610 String
[] authors
= authorString
.split(teamSplitter
);
611 for (String author
: authors
){
612 Person person
= Person
.NewInstance();
613 person
.setNomenclaturalTitle(author
);
614 result
.addTeamMember(person
);
620 //Parsing of the given full name that has been identified as hybrid already somewhere else.
621 private BotanicalName
parseHybrid(String fullName
){
622 logger
.warn("parseHybrid --> function not yet implemented");
623 BotanicalName result
= BotanicalName
.NewInstance(null);
624 result
.setTitleCache(fullName
);
628 // // Parsing of the given full name that has been identified as a cultivar already somwhere else.
629 // // The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
630 public BotanicalName
parseCultivar(String fullName
) throws StringNotParsableException
{
631 CultivarPlantName result
= null;
632 String
[] words
= oWsPattern
.split(fullName
);
634 /* ---------------------------------------------------------------------------------
636 * ---------------------------------------------------------------------------------*/
637 if (fullName
.indexOf(" '") != 0){
638 //TODO location of 'xx' is probably not arbitrary
639 Matcher cultivarMatcher
= cultivarPattern
.matcher(fullName
);
640 if (cultivarMatcher
.find()){
641 String namePart
= fullName
.replaceFirst(cultivar
, "");
643 String cultivarPart
= cultivarMatcher
.group(0).replace("'","").trim();
644 //OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
646 result
= (CultivarPlantName
)parseFullName(namePart
);
647 result
.setCultivarName(cultivarPart
);
649 }else if (fullName
.indexOf(" cv.") != 0){
650 // cv. is old form (not official)
651 throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
654 /* ---------------------------------------------------------------------------------
656 * ---------------------------------------------------------------------------------
659 //Ann. this is not the official way of noting cultivar groups
660 String group
= oWs
+ "Group" + oWs
+ capitalEpiWord
+ end
;
661 Pattern groupRE
= Pattern
.compile(group
);
662 Matcher groupMatcher
= groupRE
.matcher(fullName
);
663 if (groupMatcher
.find()){
664 if (! words
[words
.length
- 2].equals("group")){
665 throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName
+ " should be 'group'");
668 String namePart
= fullName
.substring(0, groupMatcher
.start(0) - 0);
669 //OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
671 String cultivarPart
= words
[words
.length
-1];
672 result
= (CultivarPlantName
)parseFullName(namePart
);
674 result
.setCultivarName(cultivarPart
);
676 //OLD: result.setCultivarGroupName(cultivarPart);
681 // // ---------------------------------------------------------------------------------
682 // if ( result = "" ){
683 // return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
687 return result
; //TODO
691 private void makeEmpty(NonViralName nameToBeFilled
){
692 nameToBeFilled
.setRank(null);
693 nameToBeFilled
.setTitleCache(null, false);
694 nameToBeFilled
.setNameCache(null);
696 nameToBeFilled
.setAppendedPhrase(null);
698 //nameToBeFilled.setBasionym(basionym);
699 nameToBeFilled
.setBasionymAuthorTeam(null);
700 nameToBeFilled
.setCombinationAuthorTeam(null);
701 nameToBeFilled
.setExBasionymAuthorTeam(null);
702 nameToBeFilled
.setExCombinationAuthorTeam(null);
703 nameToBeFilled
.setAuthorshipCache(null);
706 nameToBeFilled
.setHasProblem(false);
708 //nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
711 nameToBeFilled
.setGenusOrUninomial(null);
712 nameToBeFilled
.setInfraGenericEpithet(null);
713 nameToBeFilled
.setSpecificEpithet(null);
714 nameToBeFilled
.setInfraSpecificEpithet(null);
716 nameToBeFilled
.setNomenclaturalMicroReference(null);
717 nameToBeFilled
.setNomenclaturalReference(null);
719 if (nameToBeFilled
instanceof BotanicalName
){
720 BotanicalName botanicalName
= (BotanicalName
)nameToBeFilled
;
721 botanicalName
.setAnamorphic(false);
722 botanicalName
.setHybridFormula(false);
723 botanicalName
.setMonomHybrid(false);
724 botanicalName
.setBinomHybrid(false);
725 botanicalName
.setTrinomHybrid(false);
728 if (nameToBeFilled
instanceof ZoologicalName
){
729 ZoologicalName zoologicalName
= (ZoologicalName
)nameToBeFilled
;
730 zoologicalName
.setBreed(null);
731 zoologicalName
.setOriginalPublicationYear(null);
734 //TODO adapt to @Version of versionable entity, throws still optimistic locking error
735 //nameToBeFilled.setUpdated(Calendar.getInstance());
736 // TODO nameToBeFilled.setUpdatedBy(updatedBy);
742 static String epiSplitter
= "(\\s+|\\(|\\))"; //( ' '+| '(' | ')' )
743 static Pattern pattern
= Pattern
.compile(epiSplitter
);
745 //some useful non-terminals
746 static String start
= "^";
747 static String end
= "$";
748 static String anyEnd
= ".*" + end
;
749 static String oWs
= "\\s+"; //obligatory whitespaces
750 static String fWs
= "\\s*"; //facultative whitespcace
752 static String capitalWord
= "\\p{javaUpperCase}\\p{javaLowerCase}*";
753 static String nonCapitalWord
= "\\p{javaLowerCase}+";
755 static String capitalDotWord
= capitalWord
+ "\\.?"; //capitalWord with facultativ '.' at the end
756 static String nonCapitalDotWord
= nonCapitalWord
+ "\\.?"; //nonCapitalWord with facultativ '.' at the end
757 static String dotWord
= "(" + capitalWord
+ "|" + nonCapitalWord
+ ")\\.?"; //word (capital or non-capital) with facultativ '.' at the end
758 //Words used in an epethiton for a TaxonName
759 static String nonCapitalEpiWord
= "[a-zï\\-]+"; //TODO solve checkin Problem with Unicode character "[a-z�\\-]+";
760 static String capitalEpiWord
= "[A-Z]"+ nonCapitalEpiWord
;
764 static String month
= "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)";
765 static String singleYear
= "\\b" + "(?:17|18|19|20)" + "\\d{2}" + "\\b"; // word boundary followed by either 17,18,19, or 20 (not captured) followed by 2 digits
766 static String yearPhrase
= "(" + singleYear
+ "(-" + singleYear
+ ")?" +
767 "(" + month
+ ")?)" ; // optional month
770 static String yearSeperator
= "." + oWs
;
771 static String detailSeperator
= ":" + oWs
;
772 static String referenceSeperator1
= "," + oWs
;
773 static String inReferenceSeperator
= oWs
+ "in" + oWs
;
774 static String referenceSeperator
= "(" + referenceSeperator1
+"|" + inReferenceSeperator
+ ")" ;
775 static String referenceAuthorSeperator
= ","+ oWs
;
776 static String volumeSeperator
= "," + fWs
;
777 static String referenceEnd
= ".";
781 static String status
= "";
784 static String InfraGenusMarker
= "(subgen.|subg.|sect.|subsect.|ser.|subser.|t.infgen.)";
785 static String aggrOrGroupMarker
= "(aggr.|agg.|group)";
786 static String infraSpeciesMarker
= "(subsp.|convar.|var.|subvar.|f.|subf.|f.spec.|tax." + fWs
+ "infrasp.)";
787 static String oldInfraSpeciesMarker
= "(prol.|proles|race|taxon|sublusus)";
791 static String authorPart
= "(" + "(D'|L'|'t\\s)?" + capitalDotWord
+ "('" + nonCapitalDotWord
+ ")?" + "|da|de(n|l|\\sla)?)" ;
792 static String author
= "(" + authorPart
+ "(" + fWs
+ "|-)" + ")+" + "(f.|fil.|secundus)?";
793 static String teamSplitter
= fWs
+ "(&)" + fWs
;
794 static String authorTeam
= fWs
+ "(" + author
+ teamSplitter
+ ")*" + author
+ "(" + teamSplitter
+ "al.)?" + fWs
;
795 static String exString
= "(ex.?)";
796 static String authorAndExTeam
= authorTeam
+ "(" + oWs
+ exString
+ oWs
+ authorTeam
+ ")?";
797 static String basStart
= "\\(";
798 static String basEnd
= "\\)";
799 static String botanicBasionymAuthor
= basStart
+ "(" + authorAndExTeam
+ ")" + basEnd
; // '(' and ')' is for evaluation with RE.paren(x)
800 static String fullBotanicAuthorString
= fWs
+ "(" + botanicBasionymAuthor
+")?" + fWs
+ authorAndExTeam
+ fWs
;
801 static String facultFullBotanicAuthorString
= "(" + fullBotanicAuthorString
+ ")?" ;
804 //TODO does zoo author have ex-Author?
805 static String zooAuthorYearSeperator
= ",";
806 static String zooAuthorAddidtion
= fWs
+ zooAuthorYearSeperator
+ fWs
+ singleYear
;
807 static String zooAuthorTeam
= authorTeam
+ zooAuthorAddidtion
;
808 static String zooBasionymAuthor
= basStart
+ "(" + zooAuthorTeam
+ ")" + basEnd
;
809 static String fullZooAuthorString
= fWs
+ "(" + zooBasionymAuthor
+")?" + fWs
+ zooAuthorTeam
+ fWs
;
810 static String facultFullZooAuthorString
= "(" + fullZooAuthorString
+ ")?" ;
812 static String facultFullAuthorString2
= "(" + facultFullBotanicAuthorString
+ "|" + facultFullZooAuthorString
+ ")";
814 static String basionymAuthor
= "(" + botanicBasionymAuthor
+ "|" + zooBasionymAuthor
+ ")";
815 static String fullAuthorString
= "(" + fullBotanicAuthorString
+ "|" + fullZooAuthorString
+ ")";
818 //TODO still very simple
819 static String pageNumber
= "\\d{1,5}";
820 static String detail
= "(" + pageNumber
+ ")";
823 static String volume
= "\\d{4}" + "\\(\\d{4}\\)?";
825 static String referenceTitle
= "(" + dotWord
+ fWs
+ ")" + "{2,}";
826 static String bookReference
= referenceTitle
+ volumeSeperator
+ volume
;
827 static String bookSectionReference
= authorTeam
+ referenceAuthorSeperator
;
828 static String articleReference
= inReferenceSeperator
+ bookReference
;
829 static String reference
= "(" + articleReference
+ "|" + bookReference
+")" +
830 detailSeperator
+ detail
+ yearSeperator
+ yearPhrase
+
833 static Pattern referencePattern
= Pattern
.compile(reference
);
835 static String pNomStatusNom
= "nom\\." + fWs
+ "(superfl\\.|nud\\.|illeg\\.|inval\\.|cons\\.|alternativ\\.|subnud.|"+
836 "rej\\.|rej\\."+ fWs
+ "prop\\.|provis\\.)";
837 static String pNomStatusOrthVar
= "orth\\." + fWs
+ "var\\.";
838 static String pNomStatus
= "(" + pNomStatusNom
+ "|" + pNomStatusOrthVar
+ ")";
839 static String pNomStatusPhrase1
= "," + fWs
+ pNomStatus
;
840 static String pNomStatusPhrase2
= "\\[" + fWs
+ pNomStatus
+ "\\]";
842 static String pNomStatusPhrase
= "(?:" + pNomStatusPhrase1
+ "|" + pNomStatusPhrase2
+ ")";
847 //provisional synonym
852 //cultivars and hybrids
853 static String cultivar
= oWs
+ "'..+'"; //Achtung mit Hochkomma in AuthorNamen
854 static String cultivarMarker
= oWs
+ "(cv.|')";
855 static String hybrid
= oWs
+ "((x|X)" + oWs
+ "|notho)";//= ( x )|( X )|( notho)
858 static String genusOrSupraGenus
= capitalEpiWord
;
859 static String infraGenus
= capitalEpiWord
+ oWs
+ InfraGenusMarker
+ oWs
+ capitalEpiWord
;
860 static String aggrOrGroup
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
+ oWs
+ aggrOrGroupMarker
;
861 static String species
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
;
862 static String infraSpecies
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
+ oWs
+ infraSpeciesMarker
+ oWs
+ nonCapitalEpiWord
;
863 static String oldInfraSpecies
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
+ oWs
+ oldInfraSpeciesMarker
+ oWs
+ nonCapitalEpiWord
;
864 static String autonym
= capitalEpiWord
+ oWs
+ "(" + nonCapitalEpiWord
+")" + oWs
+ fullBotanicAuthorString
+ oWs
+ infraSpeciesMarker
+ oWs
+ "\\1"; //2-nd word and last word are the same
866 static String anyBotanicName
= "(" + genusOrSupraGenus
+ "|" + infraGenus
+ "|" + aggrOrGroup
+ "|" + species
+ "|" +
867 infraSpecies
+ "|" + infraSpecies
+ "|" + oldInfraSpecies
+ "|" + autonym
+ ")+";
868 static String anyZooName
= "(" + genusOrSupraGenus
+ "|" + infraGenus
+ "|" + aggrOrGroup
+ "|" + species
+ "|" +
869 infraSpecies
+ "|" + infraSpecies
+ "|" + oldInfraSpecies
+ ")+";
870 static String anyBotanicFullName
= anyBotanicName
+ oWs
+ fullBotanicAuthorString
;
871 static String anyZooFullName
= anyZooName
+ oWs
+ fullZooAuthorString
;
872 static String anyFullName
= "(" + anyBotanicFullName
+ "|" + anyZooFullName
+ ")";
875 static Pattern oWsPattern
= Pattern
.compile(oWs
);
876 static Pattern teamSplitterPattern
= Pattern
.compile(teamSplitter
);
877 static Pattern cultivarPattern
= Pattern
.compile(cultivar
);
878 static Pattern cultivarMarkerPattern
= Pattern
.compile(cultivarMarker
);
879 static Pattern hybridPattern
= Pattern
.compile(hybrid
);
881 static Pattern genusOrSupraGenusPattern
= Pattern
.compile(start
+ genusOrSupraGenus
+ facultFullAuthorString2
+ end
);
882 static Pattern infraGenusPattern
= Pattern
.compile(start
+ infraGenus
+ facultFullAuthorString2
+ end
);
883 static Pattern aggrOrGroupPattern
= Pattern
.compile(start
+ aggrOrGroup
+ fWs
+ end
); //aggr. or group has no author string
884 static Pattern speciesPattern
= Pattern
.compile(start
+ species
+ facultFullAuthorString2
+ end
);
885 static Pattern infraSpeciesPattern
= Pattern
.compile(start
+ infraSpecies
+ facultFullAuthorString2
+ end
);
886 static Pattern oldInfraSpeciesPattern
= Pattern
.compile(start
+ oldInfraSpecies
+ facultFullAuthorString2
+ end
);
887 static Pattern autonymPattern
= Pattern
.compile(start
+ autonym
+ fWs
+ end
);
889 static Pattern botanicBasionymPattern
= Pattern
.compile(botanicBasionymAuthor
);
890 static Pattern zooBasionymPattern
= Pattern
.compile(zooBasionymAuthor
);
891 static Pattern basionymPattern
= Pattern
.compile(basionymAuthor
);
893 static Pattern zooAuthorPattern
= Pattern
.compile(zooAuthorTeam
);
894 static Pattern zooAuthorAddidtionPattern
= Pattern
.compile(zooAuthorAddidtion
);
896 static Pattern exAuthorPattern
= Pattern
.compile(oWs
+ exString
);
898 static Pattern fullBotanicAuthorStringPattern
= Pattern
.compile(fullBotanicAuthorString
);
899 static Pattern fullZooAuthorStringPattern
= Pattern
.compile(fullZooAuthorString
);
900 static Pattern fullAuthorStringPattern
= Pattern
.compile(fullAuthorString
);
902 static Pattern anyBotanicFullNamePattern
= Pattern
.compile(anyBotanicFullName
);
903 static Pattern anyZooFullNamePattern
= Pattern
.compile(anyZooFullName
);