4 package eu
.etaxonomy
.cdm
.strategy
.parser
;
6 import java
.util
.regex
.Matcher
;
7 import java
.util
.regex
.Pattern
;
9 import org
.apache
.log4j
.Logger
;
11 import eu
.etaxonomy
.cdm
.model
.agent
.INomenclaturalAuthor
;
12 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
13 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
14 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
15 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
16 import eu
.etaxonomy
.cdm
.model
.name
.CultivarPlantName
;
17 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
18 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatus
;
19 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalStatusType
;
20 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
21 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
22 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
23 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
24 import eu
.etaxonomy
.cdm
.model
.reference
.Article
;
25 import eu
.etaxonomy
.cdm
.model
.reference
.Book
;
26 import eu
.etaxonomy
.cdm
.model
.reference
.BookSection
;
27 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceBase
;
28 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.StringNotParsableException
;
29 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
36 public class NonViralNameParserImpl
implements ITaxonNameParser
<NonViralName
> {
37 private static final Logger logger
= Logger
.getLogger(NonViralNameParserImpl
.class);
39 // good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
41 public static NonViralNameParserImpl
NewInstance(){
42 return new NonViralNameParserImpl();
47 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSimpleName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
49 public NonViralName
parseSimpleName(String simpleName
, Rank rank
){
51 logger
.warn("parseSimpleName() not yet implemented. Uses parseFullName() instead");
52 return parseFullName(simpleName
, rank
);
57 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericSimpleName(java.lang.String)
59 public NonViralName
parseSimpleName(String simpleName
){
60 return parseSimpleName(simpleName
, null);
64 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
66 public NonViralName
parseFullReference(String fullReferenceString
, NomenclaturalCode nomCode
, Rank rank
) {
67 if (fullReferenceString
== null){
70 NonViralName result
= null;
72 nomCode
= getNomeclaturalCode(reference
);
75 result
= NonViralName
.NewInstance(rank
);
76 }else if (nomCode
.equals(NomenclaturalCode
.ICBN())){
77 result
= BotanicalName
.NewInstance(rank
);
78 }else if (nomCode
.equals(NomenclaturalCode
.ICZN())){
79 result
= ZoologicalName
.NewInstance(rank
);
80 }else if (nomCode
.equals(NomenclaturalCode
.ICNCP())){
81 logger
.warn("ICNCP parsing not yet implemented");
82 }else if (nomCode
.equals(NomenclaturalCode
.BACTERIOLOGICAL())){
83 logger
.warn("ICNCP not yet implemented");
84 }else if (nomCode
.equals(NomenclaturalCode
.VIRAL())){
85 logger
.error("Viral name is not an NonViralName !!");
87 logger
.error("Unknown Nomenclatural Code !!");
89 parseFullReference(result
, fullReferenceString
, rank
, false);
94 public NomenclaturalCode
getNomeclaturalCode(String reference
){
95 logger
.warn("not yet implemented");
101 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(eu.etaxonomy.cdm.model.name.BotanicalName, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, boolean)
103 public void parseFullReference(NonViralName nameToBeFilled
, String fullReferenceString
, Rank rank
, boolean makeEmpty
) {
104 if (fullReferenceString
== null){
109 makeEmpty(nameToBeFilled
);
111 fullReferenceString
.replaceAll(oWs
, " ");
112 fullReferenceString
= fullReferenceString
.trim();
114 //seperate name and reference part
115 String nameAndRefSeperator
= "(^" + anyFullName
+ ")("+ referenceSeperator
+ ")";
116 Pattern nameAndRefSeperatorPattern
= Pattern
.compile(nameAndRefSeperator
);
117 Matcher nameAndRefSeperatorMatcher
= nameAndRefSeperatorPattern
.matcher(fullReferenceString
);
119 if (nameAndRefSeperatorMatcher
.find() ){
120 String nameAndSeperator
= nameAndRefSeperatorMatcher
.group(0);
121 String name
= nameAndRefSeperatorMatcher
.group(1);
122 String reference
= fullReferenceString
.substring(nameAndRefSeperatorMatcher
.end());
125 String seperator
= nameAndSeperator
.substring(name
.length());
126 boolean isInReference
= false;
127 if (seperator
.matches(inReferenceSeperator
)){
128 isInReference
= true;
132 reference
= parseNomStatus(reference
, nameToBeFilled
);
135 parseFullName(nameToBeFilled
, name
, rank
, makeEmpty
);
136 parseReference(nameToBeFilled
, reference
, isInReference
);
139 //don't parse if name can't be seperated
140 nameToBeFilled
.setHasProblem(true);
141 nameToBeFilled
.setTitleCache(fullReferenceString
);
142 logger
.info("no applicable parsing rule could be found for \"" + fullReferenceString
+ "\"");
146 //TODO make it an Array of status
148 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
149 * The nomenclatural status part ist deleted from the reference String.
150 * @return String the new (shortend) reference String
152 String
parseNomStatus(String reference
, NonViralName nameToBeFilled
) {
154 Pattern hasStatusPattern
= Pattern
.compile("(" + pNomStatusPhrase
+ ")");
155 Matcher hasStatusMatcher
= hasStatusPattern
.matcher(reference
);
157 if (hasStatusMatcher
.find()) {
158 String statusPhrase
= hasStatusMatcher
.group(0);
160 Pattern statusPattern
= Pattern
.compile(pNomStatus
);
161 Matcher statusMatcher
= statusPattern
.matcher(statusPhrase
);
162 statusMatcher
.find();
163 statusString
= statusMatcher
.group(0);
165 NomenclaturalStatusType nomStatusType
= NomenclaturalStatusType
.getNomenclaturalStatusTypeByAbbreviation(statusString
);
166 NomenclaturalStatus nomStatus
= NomenclaturalStatus
.NewInstance(nomStatusType
);
167 nameToBeFilled
.addStatus(nomStatus
);
169 reference
= reference
.replace(statusPhrase
, "");
170 } catch (UnknownCdmTypeException e
) {
178 private void parseReference(NonViralName nameToBeFilled
, String reference
, boolean isInReference
){
180 if (referencePattern
.matcher(reference
).matches() ){
181 //End (just delete, may be ambigous for yearPhrase, but no real information gets lost
182 Pattern endPattern
= Pattern
.compile( referenceEnd
+ end
);
183 Matcher endMatcher
= endPattern
.matcher(reference
);
184 if (endMatcher
.find()){
185 String endPart
= endMatcher
.group(0);
186 reference
= reference
.substring(0, reference
.length() - endPart
.length());
190 String yearPart
= null;
191 String pYearPhrase
= yearSeperator
+ yearPhrase
+ end
;
192 Pattern yearPhrasePattern
= Pattern
.compile(pYearPhrase
);
193 Matcher yearPhraseMatcher
= yearPhrasePattern
.matcher(reference
);
194 if (yearPhraseMatcher
.find()){
195 yearPart
= yearPhraseMatcher
.group(0);
196 reference
= reference
.substring(0, reference
.length() - yearPart
.length());
197 yearPart
= yearPart
.replaceFirst(start
+ yearSeperator
, "").trim();
201 String pDetailPhrase
= detailSeperator
+ detail
+ end
;
202 Pattern detailPhrasePattern
= Pattern
.compile(pDetailPhrase
);
203 Matcher detailPhraseMatcher
= detailPhrasePattern
.matcher(reference
);
204 if (detailPhraseMatcher
.find()){
205 String detailPart
= detailPhraseMatcher
.group(0);
206 reference
= reference
.substring(0, reference
.length() - detailPart
.length());
207 detailPart
= detailPart
.replaceFirst(start
+ detailSeperator
, "").trim();
208 nameToBeFilled
.setNomenclaturalMicroReference(detailPart
);
211 parseReferenceTitle(reference
, yearPart
);
217 * Parses the referenceTitlePart, including the author volume and edition.
222 private ReferenceBase
parseReferenceTitle(String reference
, String year
){
223 ReferenceBase result
= null;
224 Pattern bookPattern
= Pattern
.compile(bookReference
);
225 Pattern articlePattern
= Pattern
.compile(articleReference
);
226 Pattern bookSectionPattern
= Pattern
.compile(bookSectionReference
);
229 Matcher articleMatcher
= articlePattern
.matcher(reference
);
230 Matcher bookMatcher
= bookPattern
.matcher(reference
);
231 Matcher bookSectionMatcher
= bookSectionPattern
.matcher(reference
);
234 if (articleMatcher
.matches()){
236 //(type, author, title, volume, editor, series;
237 Article article
= new Article();
238 article
.setTitleCache(reference
);
240 }else if(bookMatcher
.matches()){
241 Book book
= new Book();
242 book
.setTitleCache(reference
);
244 }else if (bookSectionMatcher
.matches()){
245 BookSection bookSection
= new BookSection();
246 bookSection
.setTitleCache(reference
);
247 result
= bookSection
;
249 logger
.warn("unknown reference type not yet implemented");
250 //ReferenceBase refBase =
257 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericFullName(java.lang.String)
259 public BotanicalName
parseFullName(String fullNameString
){
260 return parseFullName(fullNameString
, null);
265 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
267 public BotanicalName
parseFullName(String fullNameString
, Rank rank
) {
268 if (fullNameString
== null){
271 BotanicalName result
= BotanicalName
.NewInstance(null);
272 parseFullName(result
, fullNameString
, rank
, false);
278 public void parseFullName(NonViralName nameToBeFilled
, String fullNameString
, Rank rank
, boolean makeEmpty
) {
281 String authorString
= null;
283 if (fullNameString
== null){
287 makeEmpty(nameToBeFilled
);
289 fullNameString
.replaceAll(oWs
, " ");
291 // OLD: fullName = oWsRE.subst(fullName, " "); //substitute multiple whitespaces
292 fullNameString
= fullNameString
.trim();
294 String
[] epi
= pattern
.split(fullNameString
);
296 //cultivars //TODO 2 implement cultivars
297 // if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
298 // result = parseCultivar(fullName);
300 //hybrids //TODO 2 implement hybrids
302 if (hybridPattern
.matcher(fullNameString
).matches() ){
303 nameToBeFilled
= parseHybrid(fullNameString
);
305 else if (genusOrSupraGenusPattern
.matcher(fullNameString
).matches()){
307 if (rank
!= null && rank
.isSupraGeneric()){
308 nameToBeFilled
.setRank(rank
);
309 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
313 nameToBeFilled
.setRank(Rank
.GENUS());
314 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
316 authorString
= fullNameString
.substring(epi
[0].length());
319 else if (infraGenusPattern
.matcher(fullNameString
).matches()){
320 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(epi
[1]));
321 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
322 nameToBeFilled
.setInfraGenericEpithet(epi
[2]);
323 authorString
= fullNameString
.substring(epi
[0].length() + 1 + epi
[1].length()+ 1 + epi
[2].length());
326 else if (aggrOrGroupPattern
.matcher(fullNameString
).matches()){
327 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(epi
[2]));
328 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
329 nameToBeFilled
.setSpecificEpithet(epi
[1]);
332 else if (speciesPattern
.matcher(fullNameString
).matches()){
333 nameToBeFilled
.setRank(Rank
.SPECIES());
334 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
335 nameToBeFilled
.setSpecificEpithet(epi
[1]);
336 authorString
= fullNameString
.substring(epi
[0].length() + 1 + epi
[1].length());
339 else if (autonymPattern
.matcher(fullNameString
).matches()){
340 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(epi
[epi
.length
- 2]));
341 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
342 nameToBeFilled
.setSpecificEpithet(epi
[1]);
343 nameToBeFilled
.setInfraSpecificEpithet(epi
[epi
.length
- 1]);
344 int lenSpecies
= 2 + epi
[0].length()+epi
[1].length();
345 int lenInfraSpecies
= 2 + epi
[epi
.length
- 2].length() + epi
[epi
.length
- 1].length();
346 authorString
= fullNameString
.substring(lenSpecies
, fullNameString
.length() - lenInfraSpecies
);
349 else if (infraSpeciesPattern
.matcher(fullNameString
).matches()){
350 String infraSpecRankEpi
= epi
[2];
351 String infraSpecEpi
= epi
[3];
352 if ("tax.".equals(infraSpecRankEpi
)){
353 infraSpecRankEpi
+= " " + epi
[3];
354 infraSpecEpi
= epi
[4];
356 nameToBeFilled
.setRank(Rank
.getRankByAbbreviation(infraSpecRankEpi
));
357 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
358 nameToBeFilled
.setSpecificEpithet(epi
[1]);
359 nameToBeFilled
.setInfraSpecificEpithet(infraSpecEpi
);
360 authorString
= fullNameString
.substring(epi
[0].length()+ 1 + epi
[1].length() +1 + infraSpecRankEpi
.length() + 1 + infraSpecEpi
.length());
362 else if (oldInfraSpeciesPattern
.matcher(fullNameString
).matches()){
363 boolean implemented
= false;
365 nameToBeFilled
.setRank(Rank
.getRankByNameOrAbbreviation(epi
[2]));
366 nameToBeFilled
.setGenusOrUninomial(epi
[0]);
367 nameToBeFilled
.setSpecificEpithet(epi
[1]);
368 //TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
369 authorString
= fullNameString
.substring(epi
[0].length()+ 1 + epi
[1].length() +1 + epi
[2].length() + 1 + epi
[3].length());
371 nameToBeFilled
.setHasProblem(true);
372 nameToBeFilled
.setTitleCache(fullNameString
);
373 logger
.info("Name string " + fullNameString
+ " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
378 nameToBeFilled
.setHasProblem(true);
379 nameToBeFilled
.setTitleCache(fullNameString
);
380 logger
.info("no applicable parsing rule could be found for \"" + fullNameString
+ "\"");
383 if (nameToBeFilled
!= null && authorString
!= null && authorString
.trim().length() > 0 ){
384 TeamOrPersonBase
[] authors
= null;
385 Integer
[] years
= null;
387 fullAuthors(authorString
, authors
, years
);
388 } catch (StringNotParsableException e
) {
389 nameToBeFilled
.setHasProblem(true);
390 nameToBeFilled
.setTitleCache(fullNameString
);
391 logger
.info("no applicable parsing rule could be found for \"" + fullNameString
+ "\"");;
393 nameToBeFilled
.setCombinationAuthorTeam(authors
[0]);
394 nameToBeFilled
.setExCombinationAuthorTeam(authors
[1]);
395 nameToBeFilled
.setBasionymAuthorTeam(authors
[2]);
396 nameToBeFilled
.setExBasionymAuthorTeam(authors
[3]);
397 if (nameToBeFilled
instanceof ZoologicalName
){
398 ZoologicalName zooName
= (ZoologicalName
)nameToBeFilled
;
399 zooName
.setPublicationYear(years
[0]);
400 zooName
.setOriginalPublicationYear(years
[2]);
404 if (nameToBeFilled
!= null){
405 //return(BotanicalName)result;
408 nameToBeFilled
.setHasProblem(true);
409 nameToBeFilled
.setTitleCache(fullNameString
);
410 logger
.info("Name string " + fullNameString
+ " could not be parsed!");
414 } catch (UnknownCdmTypeException e
) {
415 nameToBeFilled
.setHasProblem(true);
416 nameToBeFilled
.setTitleCache(fullNameString
);
417 logger
.info("unknown rank (" + (rank
== null?
"null":rank
) + ") or abbreviation in string " + fullNameString
);
423 private void makeEmpty(NonViralName nameToBeFilled
){
424 nameToBeFilled
.setRank(null);
425 nameToBeFilled
.setTitleCache(null, false);
426 nameToBeFilled
.setNameCache(null);
428 nameToBeFilled
.setAppendedPhrase(null);
430 //nameToBeFilled.setBasionym(basionym);
431 nameToBeFilled
.setBasionymAuthorTeam(null);
432 nameToBeFilled
.setCombinationAuthorTeam(null);
433 nameToBeFilled
.setExBasionymAuthorTeam(null);
434 nameToBeFilled
.setExCombinationAuthorTeam(null);
435 nameToBeFilled
.setAuthorshipCache(null);
438 nameToBeFilled
.setHasProblem(false);
440 //nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
443 nameToBeFilled
.setGenusOrUninomial(null);
444 nameToBeFilled
.setInfraGenericEpithet(null);
445 nameToBeFilled
.setSpecificEpithet(null);
446 nameToBeFilled
.setInfraSpecificEpithet(null);
448 nameToBeFilled
.setNomenclaturalMicroReference(null);
449 nameToBeFilled
.setNomenclaturalReference(null);
451 if (nameToBeFilled
instanceof BotanicalName
){
452 BotanicalName botanicalName
= (BotanicalName
)nameToBeFilled
;
453 botanicalName
.setAnamorphic(false);
454 botanicalName
.setHybridFormula(false);
455 botanicalName
.setMonomHybrid(false);
456 botanicalName
.setBinomHybrid(false);
457 botanicalName
.setTrinomHybrid(false);
460 if (nameToBeFilled
instanceof ZoologicalName
){
461 ZoologicalName zoologicalName
= (ZoologicalName
)nameToBeFilled
;
462 zoologicalName
.setBreed(null);
463 zoologicalName
.setOriginalPublicationYear(null);
466 //TODO adapt to @Version of versionable entity, throws still optimistic locking error
467 //nameToBeFilled.setUpdated(Calendar.getInstance());
468 // TODO nameToBeFilled.setUpdatedBy(updatedBy);
474 * Parses the fullAuthorString
475 * @param fullAuthorString
476 * @return array of Teams containing the Team[0],
477 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
479 public void fullAuthors (String fullAuthorString
, TeamOrPersonBase
[] authors
, Integer
[] years
)
480 throws StringNotParsableException
{
481 fullAuthorString
= fullAuthorString
.trim();
482 if (! fullAuthorStringPattern
.matcher(fullAuthorString
).matches())
483 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString
+") not parsable: ");
484 fullAuthorsChecked(fullAuthorString
, authors
, years
);
489 * like fullTeams but without trim and match check
491 private void fullAuthorsChecked (String fullAuthorString
, TeamOrPersonBase
[] authors
, Integer
[] years
){
492 TeamOrPersonBase
[] result
= new TeamOrPersonBase
[4];
493 int authorTeamStart
= 0;
494 Matcher basionymMatcher
= basionymPattern
.matcher(fullAuthorString
);
495 if (basionymMatcher
.find(0)){
497 String basString
= basionymMatcher
.group();
498 basString
= basString
.replaceFirst(basStart
, "");
499 basString
= basString
.replaceAll(basEnd
, "").trim();
500 authorTeamStart
= basionymMatcher
.end(1) + 1;
502 TeamOrPersonBase
[] basAuthors
;
504 authorsAndEx(basString
, basAuthors
, basYears
);
505 authors
[2]= basAuthors
[0];
506 years
[2] = basYears
[0];
507 authors
[3]= basAuthors
[1];
508 years
[3] = basYears
[1];
510 TeamOrPersonBase
[] combinationAuthors
;
511 Integer
[] combinationYears
;
512 authorsAndEx(fullAuthorString
.substring(authorTeamStart
), combinationAuthors
, combinationYears
);
513 authors
[0]= combinationAuthors
[0];
514 years
[0] = combinationYears
[0];
515 authors
[1]= combinationAuthors
[1];
516 years
[1] = combinationYears
[1];
521 * Parses the author and ex-author String
522 * @param authorTeamString String representing the author and the ex-author team
523 * @return array of Teams containing the Team[0] and the ExTeam[1]
525 public void authorsAndEx (String authorTeamString
, TeamOrPersonBase
[] authors
, Integer
[] years
){
526 TeamOrPersonBase
[] result
= new TeamOrPersonBase
[2];
527 //TODO noch allgemeiner am anfang durch Replace etc.
528 authorTeamString
= authorTeamString
.trim();
529 authorTeamString
= authorTeamString
.replaceFirst(oWs
+ "ex" + oWs
, " ex. " );
530 int authorEnd
= authorTeamString
.length();
532 Matcher exAuthorMatcher
= exAuthorPattern
.matcher(authorTeamString
);
533 if (exAuthorMatcher
.find(0)){
534 int exAuthorBegin
= exAuthorMatcher
.end(0);
535 String exString
= authorTeamString
.substring(exAuthorBegin
).trim();
536 authorEnd
= exAuthorMatcher
.start(0);
537 authors
[1] = author(exString
);
539 authors
[0] = author(authorTeamString
.substring(0, authorEnd
));
544 * Parses an authorTeam String and returns the Team
545 * !!! TODO (atomization not yet implemented)
546 * @param authorTeamString String representing the author team
549 public TeamOrPersonBase
author (String authorString
){
550 if (authorString
== null){
552 }else if ((authorString
= authorString
.trim()).length() == 0){
554 }else if (! teamSplitterPattern
.matcher(authorString
).find()){
556 Person result
= Person
.NewInstance();
557 result
.setNomenclaturalTitle(authorString
);
560 return parsedTeam(authorString
);
565 private Team
parsedTeam(String authorString
){
566 Team result
= Team
.NewInstance();
567 String
[] authors
= authorString
.split(teamSplitter
);
568 for (String author
: authors
){
569 Person person
= Person
.NewInstance();
570 person
.setNomenclaturalTitle(author
);
571 result
.addTeamMember(person
);
577 //Parsing of the given full name that has been identified as hybrid already somewhere else.
578 private BotanicalName
parseHybrid(String fullName
){
579 logger
.warn("parseHybrid --> function not yet implemented");
580 BotanicalName result
= BotanicalName
.NewInstance(null);
581 result
.setTitleCache(fullName
);
585 // // Parsing of the given full name that has been identified as a cultivar already somwhere else.
586 // // The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
587 public BotanicalName
parseCultivar(String fullName
) throws StringNotParsableException
{
588 CultivarPlantName result
= null;
589 String
[] words
= oWsPattern
.split(fullName
);
591 /* ---------------------------------------------------------------------------------
593 * ---------------------------------------------------------------------------------*/
594 if (fullName
.indexOf(" '") != 0){
595 //TODO location of 'xx' is probably not arbitrary
596 Matcher cultivarMatcher
= cultivarPattern
.matcher(fullName
);
597 if (cultivarMatcher
.find()){
598 String namePart
= fullName
.replaceFirst(cultivar
, "");
600 String cultivarPart
= cultivarMatcher
.group(0).replace("'","").trim();
601 //OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
603 result
= (CultivarPlantName
)parseFullName(namePart
);
604 result
.setCultivarName(cultivarPart
);
606 }else if (fullName
.indexOf(" cv.") != 0){
607 // cv. is old form (not official)
608 throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
611 /* ---------------------------------------------------------------------------------
613 * ---------------------------------------------------------------------------------
616 //Ann. this is not the official way of noting cultivar groups
617 String group
= oWs
+ "Group" + oWs
+ capitalEpiWord
+ end
;
618 Pattern groupRE
= Pattern
.compile(group
);
619 Matcher groupMatcher
= groupRE
.matcher(fullName
);
620 if (groupMatcher
.find()){
621 if (! words
[words
.length
- 2].equals("group")){
622 throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName
+ " should be 'group'");
625 String namePart
= fullName
.substring(0, groupMatcher
.start(0) - 0);
626 //OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
628 String cultivarPart
= words
[words
.length
-1];
629 result
= (CultivarPlantName
)parseFullName(namePart
);
631 result
.setCultivarName(cultivarPart
);
633 //OLD: result.setCultivarGroupName(cultivarPart);
638 // // ---------------------------------------------------------------------------------
639 // if ( result = "" ){
640 // return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
644 return result
; //TODO
650 static String epiSplitter
= "(\\s+|\\(|\\))"; //( ' '+| '(' | ')' )
651 static Pattern pattern
= Pattern
.compile(epiSplitter
);
653 //some useful non-terminals
654 static String start
= "^";
655 static String end
= "$";
656 static String anyEnd
= ".*" + end
;
657 static String oWs
= "\\s+"; //obligatory whitespaces
658 static String fWs
= "\\s*"; //facultative whitespcace
660 static String capitalWord
= "\\p{javaUpperCase}\\p{javaLowerCase}*";
661 static String nonCapitalWord
= "\\p{javaLowerCase}+";
663 static String capitalDotWord
= capitalWord
+ "\\.?"; //capitalWord with facultativ '.' at the end
664 static String nonCapitalDotWord
= nonCapitalWord
+ "\\.?"; //nonCapitalWord with facultativ '.' at the end
665 static String dotWord
= "(" + capitalWord
+ "|" + nonCapitalWord
+ ")\\.?"; //word (capital or non-capital) with facultativ '.' at the end
666 //Words used in an epethiton for a TaxonName
667 static String nonCapitalEpiWord
= "[a-zï\\-]+"; //TODO solve checkin Problem with Unicode character "[a-z�\\-]+";
668 static String capitalEpiWord
= "[A-Z]"+ nonCapitalEpiWord
;
672 static String month
= "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)";
673 static String singleYear
= "\\b" + "(?:17|18|19|20)" + "\\d{2}" + "\\b"; // word boundary followed by either 17,18,19, or 20 (not captured) followed by 2 digits
674 static String yearPhrase
= "(" + singleYear
+ "(-" + singleYear
+ ")?" +
675 "(" + month
+ ")?)" ; // optional month
678 static String yearSeperator
= "." + oWs
;
679 static String detailSeperator
= ":" + oWs
;
680 static String referenceSeperator1
= "," + oWs
;
681 static String inReferenceSeperator
= oWs
+ "in" + oWs
;
682 static String referenceSeperator
= "(" + referenceSeperator1
+"|" + inReferenceSeperator
+ ")" ;
683 static String referenceAuthorSeperator
= ","+ oWs
;
684 static String volumeSeperator
= "," + fWs
;
685 static String referenceEnd
= ".";
689 static String status
= "";
692 static String InfraGenusMarker
= "(subgen.|subg.|sect.|subsect.|ser.|subser.|t.infgen.)";
693 static String aggrOrGroupMarker
= "(aggr.|agg.|group)";
694 static String infraSpeciesMarker
= "(subsp.|convar.|var.|subvar.|f.|subf.|f.spec.|tax." + fWs
+ "infrasp.)";
695 static String oldInfraSpeciesMarker
= "(prol.|proles|race|taxon|sublusus)";
699 static String authorPart
= "(" + "(D'|L'|'t\\s)?" + capitalDotWord
+ "('" + nonCapitalDotWord
+ ")?" + "|da|de(n|l|\\sla)?)" ;
700 static String author
= "(" + authorPart
+ "(" + fWs
+ "|-)" + ")+" + "(f.|fil.|secundus)?";
701 static String teamSplitter
= fWs
+ "(&)" + fWs
;
702 static String authorTeam
= fWs
+ "(" + author
+ teamSplitter
+ ")*" + author
+ "(" + teamSplitter
+ "al.)?" + fWs
;
703 static String exString
= "(ex.?)";
704 static String authorAndExTeam
= authorTeam
+ "(" + oWs
+ exString
+ oWs
+ authorTeam
+ ")?";
705 static String basStart
= "\\(";
706 static String basEnd
= "\\)";
707 static String botanicBasionymAuthor
= basStart
+ "(" + authorAndExTeam
+ ")" + basEnd
; // '(' and ')' is for evaluation with RE.paren(x)
708 static String fullBotanicAuthorString
= fWs
+ "(" + botanicBasionymAuthor
+")?" + fWs
+ authorAndExTeam
+ fWs
;
709 static String facultFullBotanicAuthorString
= "(" + fullBotanicAuthorString
+ ")?" ;
712 //TODO does zoo author have ex-Author?
713 static String zooAuthorTeam
= authorTeam
+ fWs
+ "," + fWs
+ singleYear
;
714 static String zooBasionymAuthor
= basStart
+ "(" + zooAuthorTeam
+ ")" + basEnd
;
715 static String fullZooAuthorString
= fWs
+ "(" + zooBasionymAuthor
+")?" + fWs
+ zooAuthorTeam
+ fWs
;
716 static String facultFullZooAuthorString
= "(" + fullZooAuthorString
+ ")?" ;
718 static String facultFullAuthorString2
= "(" + facultFullBotanicAuthorString
+ "|" + facultFullZooAuthorString
+ ")";
722 //TODO still very simple
723 static String pageNumber
= "\\d{1,5}";
724 static String detail
= "(" + pageNumber
+ ")";
727 static String volume
= "\\d{4}" + "\\(\\d{4}\\)?";
729 static String referenceTitle
= "(" + dotWord
+ fWs
+ ")" + "{2,}";
730 static String bookReference
= referenceTitle
+ volumeSeperator
+ volume
;
731 static String bookSectionReference
= authorTeam
+ referenceAuthorSeperator
;
732 static String articleReference
= inReferenceSeperator
+ bookReference
;
733 static String reference
= "(" + articleReference
+ "|" + bookReference
+")" +
734 detailSeperator
+ detail
+ yearSeperator
+ yearPhrase
+
737 static Pattern referencePattern
= Pattern
.compile(reference
);
739 static String pNomStatusNom
= "nom\\." + fWs
+ "(superfl\\.|nud\\.|illeg\\.|inval\\.|cons\\.|alternativ\\.|subnud.|"+
740 "rej\\.|rej\\."+ fWs
+ "prop\\.|provis\\.)";
741 static String pNomStatusOrthVar
= "orth\\." + fWs
+ "var\\.";
742 static String pNomStatus
= "(" + pNomStatusNom
+ "|" + pNomStatusOrthVar
+ ")";
743 static String pNomStatusPhrase1
= "," + fWs
+ pNomStatus
;
744 static String pNomStatusPhrase2
= "\\[" + fWs
+ pNomStatus
+ "\\]";
746 static String pNomStatusPhrase
= "(?:" + pNomStatusPhrase1
+ "|" + pNomStatusPhrase2
+ ")";
751 //provisional synonym
756 //cultivars and hybrids
757 static String cultivar
= oWs
+ "'..+'"; //Achtung mit Hochkomma in AuthorNamen
758 static String cultivarMarker
= oWs
+ "(cv.|')";
759 static String hybrid
= oWs
+ "((x|X)" + oWs
+ "|notho)";//= ( x )|( X )|( notho)
762 static String genusOrSupraGenus
= capitalEpiWord
;
763 static String infraGenus
= capitalEpiWord
+ oWs
+ InfraGenusMarker
+ oWs
+ capitalEpiWord
;
764 static String aggrOrGroup
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
+ oWs
+ aggrOrGroupMarker
;
765 static String species
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
;
766 static String infraSpecies
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
+ oWs
+ infraSpeciesMarker
+ oWs
+ nonCapitalEpiWord
;
767 static String oldInfraSpecies
= capitalEpiWord
+ oWs
+ nonCapitalEpiWord
+ oWs
+ oldInfraSpeciesMarker
+ oWs
+ nonCapitalEpiWord
;
768 static String autonym
= capitalEpiWord
+ oWs
+ "(" + nonCapitalEpiWord
+")" + oWs
+ fullBotanicAuthorString
+ oWs
+ infraSpeciesMarker
+ oWs
+ "\\1"; //2-nd word and last word are the same
770 static String anyBotanicName
= "(" + genusOrSupraGenus
+ "|" + infraGenus
+ "|" + aggrOrGroup
+ "|" + species
+ "|" +
771 infraSpecies
+ "|" + infraSpecies
+ "|" + oldInfraSpecies
+ "|" + autonym
+ ")+";
772 static String anyZooName
= "(" + genusOrSupraGenus
+ "|" + infraGenus
+ "|" + aggrOrGroup
+ "|" + species
+ "|" +
773 infraSpecies
+ "|" + infraSpecies
+ "|" + oldInfraSpecies
+ ")+";
774 static String anyBotanicFullName
= anyBotanicName
+ oWs
+ fullBotanicAuthorString
;
775 static String anyZooFullName
= anyZooName
+ oWs
+ fullZooAuthorString
;
776 static String anyFullName
= "(" + anyBotanicFullName
+ "|" + anyZooFullName
+ ")";
780 static Pattern oWsPattern
= Pattern
.compile(oWs
);
781 static Pattern teamSplitterPattern
= Pattern
.compile(teamSplitter
);
782 static Pattern cultivarPattern
= Pattern
.compile(cultivar
);
783 static Pattern cultivarMarkerPattern
= Pattern
.compile(cultivarMarker
);
784 static Pattern hybridPattern
= Pattern
.compile(hybrid
);
786 static Pattern genusOrSupraGenusPattern
= Pattern
.compile(start
+ genusOrSupraGenus
+ facultFullAuthorString2
+ end
);
787 static Pattern infraGenusPattern
= Pattern
.compile(start
+ infraGenus
+ facultFullAuthorString2
+ end
);
788 static Pattern aggrOrGroupPattern
= Pattern
.compile(start
+ aggrOrGroup
+ fWs
+ end
); //aggr. or group has no author string
789 static Pattern speciesPattern
= Pattern
.compile(start
+ species
+ facultFullAuthorString2
+ end
);
790 static Pattern infraSpeciesPattern
= Pattern
.compile(start
+ infraSpecies
+ facultFullAuthorString2
+ end
);
791 static Pattern oldInfraSpeciesPattern
= Pattern
.compile(start
+ oldInfraSpecies
+ facultFullAuthorString2
+ end
);
792 static Pattern autonymPattern
= Pattern
.compile(start
+ autonym
+ fWs
+ end
);
794 static Pattern botanicBotanicPattern
= Pattern
.compile(botanicBasionymAuthor
);
795 //static Pattern startsWithBasionymRE = Pattern.compile(basionymAuthor + anyEnd);
796 static Pattern exAuthorPattern
= Pattern
.compile(oWs
+ exString
);
798 static Pattern fullBotanicAuthorStringPattern
= Pattern
.compile(fullBotanicAuthorString
);
799 static Pattern fullZooAuthorStringPattern
= Pattern
.compile(fullZooAuthorString
);