has errors
[cdmlib.git] / cdmlib-model / src / main / java / eu / etaxonomy / cdm / strategy / parser / NonViralNameParserImpl.java
1 /**
2 *
3 */
4 package eu.etaxonomy.cdm.strategy.parser;
5
6 import java.util.regex.Matcher;
7 import java.util.regex.Pattern;
8
9 import org.apache.log4j.Logger;
10
11 import eu.etaxonomy.cdm.model.agent.INomenclaturalAuthor;
12 import eu.etaxonomy.cdm.model.agent.Person;
13 import eu.etaxonomy.cdm.model.agent.Team;
14 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
15 import eu.etaxonomy.cdm.model.name.BotanicalName;
16 import eu.etaxonomy.cdm.model.name.CultivarPlantName;
17 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
18 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
19 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
20 import eu.etaxonomy.cdm.model.name.NonViralName;
21 import eu.etaxonomy.cdm.model.name.Rank;
22 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
23 import eu.etaxonomy.cdm.model.name.ZoologicalName;
24 import eu.etaxonomy.cdm.model.reference.Article;
25 import eu.etaxonomy.cdm.model.reference.Book;
26 import eu.etaxonomy.cdm.model.reference.BookSection;
27 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
28 import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
29 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
30
31
32 /**
33 * @author a.mueller
34 *
35 */
36 public class NonViralNameParserImpl implements ITaxonNameParser<NonViralName> {
37 private static final Logger logger = Logger.getLogger(NonViralNameParserImpl.class);
38
39 // good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
40
41 public static NonViralNameParserImpl NewInstance(){
42 return new NonViralNameParserImpl();
43 }
44
45
46 /* (non-Javadoc)
47 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSimpleName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
48 */
49 public NonViralName parseSimpleName(String simpleName, Rank rank){
50 //TODO
51 logger.warn("parseSimpleName() not yet implemented. Uses parseFullName() instead");
52 return parseFullName(simpleName, rank);
53 }
54
55
56 /* (non-Javadoc)
57 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericSimpleName(java.lang.String)
58 */
59 public NonViralName parseSimpleName(String simpleName){
60 return parseSimpleName(simpleName, null);
61 }
62
63 /* (non-Javadoc)
64 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
65 */
66 public NonViralName parseFullReference(String fullReferenceString, NomenclaturalCode nomCode, Rank rank) {
67 if (fullReferenceString == null){
68 return null;
69 }else{
70 NonViralName result = null;
71 if (nomCode == null){
72 nomCode = getNomeclaturalCode(reference);
73 }
74 if (nomCode == null){
75 result = NonViralName.NewInstance(rank);
76 }else if (nomCode.equals(NomenclaturalCode.ICBN())){
77 result = BotanicalName.NewInstance(rank);
78 }else if (nomCode.equals(NomenclaturalCode.ICZN())){
79 result = ZoologicalName.NewInstance(rank);
80 }else if (nomCode.equals(NomenclaturalCode.ICNCP())){
81 logger.warn("ICNCP parsing not yet implemented");
82 }else if (nomCode.equals(NomenclaturalCode.BACTERIOLOGICAL())){
83 logger.warn("ICNCP not yet implemented");
84 }else if (nomCode.equals(NomenclaturalCode.VIRAL())){
85 logger.error("Viral name is not an NonViralName !!");
86 }else{
87 logger.error("Unknown Nomenclatural Code !!");
88 }
89 parseFullReference(result, fullReferenceString, rank, false);
90 return result;
91 }
92 }
93
94 public NomenclaturalCode getNomeclaturalCode(String reference){
95 logger.warn("not yet implemented");
96 return null;
97 }
98
99
100 /* (non-Javadoc)
101 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(eu.etaxonomy.cdm.model.name.BotanicalName, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, boolean)
102 */
103 public void parseFullReference(NonViralName nameToBeFilled, String fullReferenceString, Rank rank, boolean makeEmpty) {
104 if (fullReferenceString == null){
105 //return null;
106 return;
107 }
108 if (makeEmpty){
109 makeEmpty(nameToBeFilled);
110 }
111 fullReferenceString.replaceAll(oWs , " ");
112 fullReferenceString = fullReferenceString.trim();
113
114 //seperate name and reference part
115 String nameAndRefSeperator = "(^" + anyFullName + ")("+ referenceSeperator + ")";
116 Pattern nameAndRefSeperatorPattern = Pattern.compile(nameAndRefSeperator);
117 Matcher nameAndRefSeperatorMatcher = nameAndRefSeperatorPattern.matcher(fullReferenceString);
118
119 if (nameAndRefSeperatorMatcher.find() ){
120 String nameAndSeperator = nameAndRefSeperatorMatcher.group(0);
121 String name = nameAndRefSeperatorMatcher.group(1);
122 String reference = fullReferenceString.substring(nameAndRefSeperatorMatcher.end());
123
124 // inRef?
125 String seperator = nameAndSeperator.substring(name.length());
126 boolean isInReference = false;
127 if (seperator.matches(inReferenceSeperator)){
128 isInReference = true;
129 }
130
131 //status
132 reference = parseNomStatus(reference, nameToBeFilled);
133
134 //parse subparts
135 parseFullName(nameToBeFilled, name, rank, makeEmpty);
136 parseReference(nameToBeFilled, reference, isInReference);
137
138 }else{
139 //don't parse if name can't be seperated
140 nameToBeFilled.setHasProblem(true);
141 nameToBeFilled.setTitleCache(fullReferenceString);
142 logger.info("no applicable parsing rule could be found for \"" + fullReferenceString + "\"");
143 }
144 }
145
146 //TODO make it an Array of status
147 /**
148 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
149 * The nomenclatural status part ist deleted from the reference String.
150 * @return String the new (shortend) reference String
151 */
152 String parseNomStatus(String reference, NonViralName nameToBeFilled) {
153 String statusString;
154 Pattern hasStatusPattern = Pattern.compile("(" + pNomStatusPhrase + ")");
155 Matcher hasStatusMatcher = hasStatusPattern.matcher(reference);
156
157 if (hasStatusMatcher.find()) {
158 String statusPhrase = hasStatusMatcher.group(0);
159
160 Pattern statusPattern = Pattern.compile(pNomStatus);
161 Matcher statusMatcher = statusPattern.matcher(statusPhrase);
162 statusMatcher.find();
163 statusString = statusMatcher.group(0);
164 try {
165 NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(statusString);
166 NomenclaturalStatus nomStatus = NomenclaturalStatus.NewInstance(nomStatusType);
167 nameToBeFilled.addStatus(nomStatus);
168
169 reference = reference.replace(statusPhrase, "");
170 } catch (UnknownCdmTypeException e) {
171 //Do nothing
172 }
173 }
174 return reference;
175 }
176
177
178 private void parseReference(NonViralName nameToBeFilled, String reference, boolean isInReference){
179
180 if (referencePattern.matcher(reference).matches() ){
181 //End (just delete, may be ambigous for yearPhrase, but no real information gets lost
182 Pattern endPattern = Pattern.compile( referenceEnd + end);
183 Matcher endMatcher = endPattern.matcher(reference);
184 if (endMatcher.find()){
185 String endPart = endMatcher.group(0);
186 reference = reference.substring(0, reference.length() - endPart.length());
187 }
188
189 //year
190 String yearPart = null;
191 String pYearPhrase = yearSeperator + yearPhrase + end;
192 Pattern yearPhrasePattern = Pattern.compile(pYearPhrase);
193 Matcher yearPhraseMatcher = yearPhrasePattern.matcher(reference);
194 if (yearPhraseMatcher.find()){
195 yearPart = yearPhraseMatcher.group(0);
196 reference = reference.substring(0, reference.length() - yearPart.length());
197 yearPart = yearPart.replaceFirst(start + yearSeperator, "").trim();
198 }
199
200 //detail
201 String pDetailPhrase = detailSeperator + detail + end;
202 Pattern detailPhrasePattern = Pattern.compile(pDetailPhrase);
203 Matcher detailPhraseMatcher = detailPhrasePattern.matcher(reference);
204 if (detailPhraseMatcher.find()){
205 String detailPart = detailPhraseMatcher.group(0);
206 reference = reference.substring(0, reference.length() - detailPart.length());
207 detailPart = detailPart.replaceFirst(start + detailSeperator, "").trim();
208 nameToBeFilled.setNomenclaturalMicroReference(detailPart);
209 }
210 //Title (and author)
211 parseReferenceTitle(reference, yearPart);
212 }
213
214 }
215
216 /**
217 * Parses the referenceTitlePart, including the author volume and edition.
218 * @param reference
219 * @param year
220 * @return
221 */
222 private ReferenceBase parseReferenceTitle(String reference, String year){
223 ReferenceBase result = null;
224 Pattern bookPattern = Pattern.compile(bookReference);
225 Pattern articlePattern = Pattern.compile(articleReference);
226 Pattern bookSectionPattern = Pattern.compile(bookSectionReference);
227
228
229 Matcher articleMatcher = articlePattern.matcher(reference);
230 Matcher bookMatcher = bookPattern.matcher(reference);
231 Matcher bookSectionMatcher = bookSectionPattern.matcher(reference);
232
233
234 if (articleMatcher.matches()){
235 //if (articlePatter)
236 //(type, author, title, volume, editor, series;
237 Article article = new Article();
238 article.setTitleCache(reference);
239 result = article;
240 }else if(bookMatcher.matches()){
241 Book book = new Book();
242 book .setTitleCache(reference);
243 result = book;
244 }else if (bookSectionMatcher.matches()){
245 BookSection bookSection = new BookSection();
246 bookSection.setTitleCache(reference);
247 result = bookSection;
248 }else{
249 logger.warn("unknown reference type not yet implemented");
250 //ReferenceBase refBase =
251 }
252 return result;
253 }
254
255
256 /* (non-Javadoc)
257 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericFullName(java.lang.String)
258 */
259 public BotanicalName parseFullName(String fullNameString){
260 return parseFullName(fullNameString, null);
261 }
262
263
264 /* (non-Javadoc)
265 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
266 */
267 public BotanicalName parseFullName(String fullNameString, Rank rank) {
268 if (fullNameString == null){
269 return null;
270 }else{
271 BotanicalName result = BotanicalName.NewInstance(null);
272 parseFullName(result, fullNameString, rank, false);
273 return result;
274 }
275 }
276
277
278 public void parseFullName(NonViralName nameToBeFilled, String fullNameString, Rank rank, boolean makeEmpty) {
279 //TODO prol. etc.
280
281 String authorString = null;
282
283 if (fullNameString == null){
284 return;
285 }
286 if (makeEmpty){
287 makeEmpty(nameToBeFilled);
288 }
289 fullNameString.replaceAll(oWs , " ");
290 //TODO
291 // OLD: fullName = oWsRE.subst(fullName, " "); //substitute multiple whitespaces
292 fullNameString = fullNameString.trim();
293
294 String[] epi = pattern.split(fullNameString);
295 try {
296 //cultivars //TODO 2 implement cultivars
297 // if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
298 // result = parseCultivar(fullName);
299 // }
300 //hybrids //TODO 2 implement hybrids
301 //else
302 if (hybridPattern.matcher(fullNameString).matches() ){
303 nameToBeFilled = parseHybrid(fullNameString);
304 }
305 else if (genusOrSupraGenusPattern.matcher(fullNameString).matches()){
306 //supraGeneric
307 if (rank != null && rank.isSupraGeneric()){
308 nameToBeFilled.setRank(rank);
309 nameToBeFilled.setGenusOrUninomial(epi[0]);
310 }
311 //genus
312 else {
313 nameToBeFilled.setRank(Rank.GENUS());
314 nameToBeFilled.setGenusOrUninomial(epi[0]);
315 }
316 authorString = fullNameString.substring(epi[0].length());
317 }
318 //infra genus
319 else if (infraGenusPattern.matcher(fullNameString).matches()){
320 nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[1]));
321 nameToBeFilled.setGenusOrUninomial(epi[0]);
322 nameToBeFilled.setInfraGenericEpithet(epi[2]);
323 authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length()+ 1 + epi[2].length());
324 }
325 //aggr. or group
326 else if (aggrOrGroupPattern.matcher(fullNameString).matches()){
327 nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[2]));
328 nameToBeFilled.setGenusOrUninomial(epi[0]);
329 nameToBeFilled.setSpecificEpithet(epi[1]);
330 }
331 //species
332 else if (speciesPattern.matcher(fullNameString).matches()){
333 nameToBeFilled.setRank(Rank.SPECIES());
334 nameToBeFilled.setGenusOrUninomial(epi[0]);
335 nameToBeFilled.setSpecificEpithet(epi[1]);
336 authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length());
337 }
338 //autonym
339 else if (autonymPattern.matcher(fullNameString).matches()){
340 nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[epi.length - 2]));
341 nameToBeFilled.setGenusOrUninomial(epi[0]);
342 nameToBeFilled.setSpecificEpithet(epi[1]);
343 nameToBeFilled.setInfraSpecificEpithet(epi[epi.length - 1]);
344 int lenSpecies = 2 + epi[0].length()+epi[1].length();
345 int lenInfraSpecies = 2 + epi[epi.length - 2].length() + epi[epi.length - 1].length();
346 authorString = fullNameString.substring(lenSpecies, fullNameString.length() - lenInfraSpecies);
347 }
348 //infraSpecies
349 else if (infraSpeciesPattern.matcher(fullNameString).matches()){
350 String infraSpecRankEpi = epi[2];
351 String infraSpecEpi = epi[3];
352 if ("tax.".equals(infraSpecRankEpi)){
353 infraSpecRankEpi += " " + epi[3];
354 infraSpecEpi = epi[4];
355 }
356 nameToBeFilled.setRank(Rank.getRankByAbbreviation(infraSpecRankEpi));
357 nameToBeFilled.setGenusOrUninomial(epi[0]);
358 nameToBeFilled.setSpecificEpithet(epi[1]);
359 nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
360 authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecRankEpi.length() + 1 + infraSpecEpi.length());
361 }//old infraSpecies
362 else if (oldInfraSpeciesPattern.matcher(fullNameString).matches()){
363 boolean implemented = false;
364 if (implemented){
365 nameToBeFilled.setRank(Rank.getRankByNameOrAbbreviation(epi[2]));
366 nameToBeFilled.setGenusOrUninomial(epi[0]);
367 nameToBeFilled.setSpecificEpithet(epi[1]);
368 //TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
369 authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + epi[2].length() + 1 + epi[3].length());
370 }else{
371 nameToBeFilled.setHasProblem(true);
372 nameToBeFilled.setTitleCache(fullNameString);
373 logger.info("Name string " + fullNameString + " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
374 }
375 }
376 //none
377 else{
378 nameToBeFilled.setHasProblem(true);
379 nameToBeFilled.setTitleCache(fullNameString);
380 logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
381 }
382 //authors
383 if (nameToBeFilled != null && authorString != null && authorString.trim().length() > 0 ){
384 TeamOrPersonBase[] authors = null;
385 Integer[] years = null;
386 try {
387 fullAuthors(authorString, authors, years);
388 } catch (StringNotParsableException e) {
389 nameToBeFilled.setHasProblem(true);
390 nameToBeFilled.setTitleCache(fullNameString);
391 logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");;
392 }
393 nameToBeFilled.setCombinationAuthorTeam(authors[0]);
394 nameToBeFilled.setExCombinationAuthorTeam(authors[1]);
395 nameToBeFilled.setBasionymAuthorTeam(authors[2]);
396 nameToBeFilled.setExBasionymAuthorTeam(authors[3]);
397 if (nameToBeFilled instanceof ZoologicalName){
398 ZoologicalName zooName = (ZoologicalName)nameToBeFilled;
399 zooName.setPublicationYear(years[0]);
400 zooName.setOriginalPublicationYear(years[2]);
401 }
402 }
403 //return
404 if (nameToBeFilled != null){
405 //return(BotanicalName)result;
406 return;
407 }else{
408 nameToBeFilled.setHasProblem(true);
409 nameToBeFilled.setTitleCache(fullNameString);
410 logger.info("Name string " + fullNameString + " could not be parsed!");
411 //return result;
412 return;
413 }
414 } catch (UnknownCdmTypeException e) {
415 nameToBeFilled.setHasProblem(true);
416 nameToBeFilled.setTitleCache(fullNameString);
417 logger.info("unknown rank (" + (rank == null? "null":rank) + ") or abbreviation in string " + fullNameString);
418 //return result;
419 return;
420 }
421 }
422
423 private void makeEmpty(NonViralName nameToBeFilled){
424 nameToBeFilled.setRank(null);
425 nameToBeFilled.setTitleCache(null, false);
426 nameToBeFilled.setNameCache(null);
427
428 nameToBeFilled.setAppendedPhrase(null);
429 //TODO ??
430 //nameToBeFilled.setBasionym(basionym);
431 nameToBeFilled.setBasionymAuthorTeam(null);
432 nameToBeFilled.setCombinationAuthorTeam(null);
433 nameToBeFilled.setExBasionymAuthorTeam(null);
434 nameToBeFilled.setExCombinationAuthorTeam(null);
435 nameToBeFilled.setAuthorshipCache(null);
436
437
438 nameToBeFilled.setHasProblem(false);
439 // TODO ?
440 //nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
441
442
443 nameToBeFilled.setGenusOrUninomial(null);
444 nameToBeFilled.setInfraGenericEpithet(null);
445 nameToBeFilled.setSpecificEpithet(null);
446 nameToBeFilled.setInfraSpecificEpithet(null);
447
448 nameToBeFilled.setNomenclaturalMicroReference(null);
449 nameToBeFilled.setNomenclaturalReference(null);
450
451 if (nameToBeFilled instanceof BotanicalName){
452 BotanicalName botanicalName = (BotanicalName)nameToBeFilled;
453 botanicalName.setAnamorphic(false);
454 botanicalName.setHybridFormula(false);
455 botanicalName.setMonomHybrid(false);
456 botanicalName.setBinomHybrid(false);
457 botanicalName.setTrinomHybrid(false);
458 }
459
460 if (nameToBeFilled instanceof ZoologicalName){
461 ZoologicalName zoologicalName = (ZoologicalName)nameToBeFilled;
462 zoologicalName.setBreed(null);
463 zoologicalName.setOriginalPublicationYear(null);
464 }
465
466 //TODO adapt to @Version of versionable entity, throws still optimistic locking error
467 //nameToBeFilled.setUpdated(Calendar.getInstance());
468 // TODO nameToBeFilled.setUpdatedBy(updatedBy);
469
470 }
471
472
473 /**
474 * Parses the fullAuthorString
475 * @param fullAuthorString
476 * @return array of Teams containing the Team[0],
477 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
478 */
479 public void fullAuthors (String fullAuthorString, TeamOrPersonBase[] authors, Integer[] years)
480 throws StringNotParsableException{
481 fullAuthorString = fullAuthorString.trim();
482 if (! fullAuthorStringPattern.matcher(fullAuthorString).matches())
483 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
484 fullAuthorsChecked(fullAuthorString, authors, years);
485 }
486
487
488 /*
489 * like fullTeams but without trim and match check
490 */
491 private void fullAuthorsChecked (String fullAuthorString, TeamOrPersonBase[] authors, Integer[] years){
492 TeamOrPersonBase[] result = new TeamOrPersonBase[4];
493 int authorTeamStart = 0;
494 Matcher basionymMatcher = basionymPattern.matcher(fullAuthorString);
495 if (basionymMatcher.find(0)){
496
497 String basString = basionymMatcher.group();
498 basString = basString.replaceFirst(basStart, "");
499 basString = basString.replaceAll(basEnd, "").trim();
500 authorTeamStart = basionymMatcher.end(1) + 1;
501
502 TeamOrPersonBase[] basAuthors;
503 Integer[] basYears;
504 authorsAndEx(basString, basAuthors, basYears);
505 authors[2]= basAuthors[0];
506 years[2] = basYears[0];
507 authors[3]= basAuthors[1];
508 years[3] = basYears[1];
509 }
510 TeamOrPersonBase[] combinationAuthors;
511 Integer[] combinationYears;
512 authorsAndEx(fullAuthorString.substring(authorTeamStart), combinationAuthors, combinationYears);
513 authors[0]= combinationAuthors[0];
514 years[0] = combinationYears[0];
515 authors[1]= combinationAuthors[1];
516 years[1] = combinationYears[1];
517 }
518
519
520 /**
521 * Parses the author and ex-author String
522 * @param authorTeamString String representing the author and the ex-author team
523 * @return array of Teams containing the Team[0] and the ExTeam[1]
524 */
525 public void authorsAndEx (String authorTeamString, TeamOrPersonBase[] authors, Integer[] years){
526 TeamOrPersonBase[] result = new TeamOrPersonBase[2];
527 //TODO noch allgemeiner am anfang durch Replace etc.
528 authorTeamString = authorTeamString.trim();
529 authorTeamString = authorTeamString.replaceFirst(oWs + "ex" + oWs, " ex. " );
530 int authorEnd = authorTeamString.length();
531
532 Matcher exAuthorMatcher = exAuthorPattern.matcher(authorTeamString);
533 if (exAuthorMatcher.find(0)){
534 int exAuthorBegin = exAuthorMatcher.end(0);
535 String exString = authorTeamString.substring(exAuthorBegin).trim();
536 authorEnd = exAuthorMatcher.start(0);
537 authors [1] = author(exString);
538 }
539 authors [0] = author(authorTeamString.substring(0, authorEnd));
540 }
541
542
543 /**
544 * Parses an authorTeam String and returns the Team
545 * !!! TODO (atomization not yet implemented)
546 * @param authorTeamString String representing the author team
547 * @return an Team
548 */
549 public TeamOrPersonBase author (String authorString){
550 if (authorString == null){
551 return null;
552 }else if ((authorString = authorString.trim()).length() == 0){
553 return null;
554 }else if (! teamSplitterPattern.matcher(authorString).find()){
555 //1 Person
556 Person result = Person.NewInstance();
557 result.setNomenclaturalTitle(authorString);
558 return result;
559 }else{
560 return parsedTeam(authorString);
561 }
562
563 }
564
565 private Team parsedTeam(String authorString){
566 Team result = Team.NewInstance();
567 String[] authors = authorString.split(teamSplitter);
568 for (String author : authors){
569 Person person = Person.NewInstance();
570 person.setNomenclaturalTitle(author);
571 result.addTeamMember(person);
572 }
573 return result;
574 }
575
576
577 //Parsing of the given full name that has been identified as hybrid already somewhere else.
578 private BotanicalName parseHybrid(String fullName){
579 logger.warn("parseHybrid --> function not yet implemented");
580 BotanicalName result = BotanicalName.NewInstance(null);
581 result.setTitleCache(fullName);
582 return result;
583 }
584
585 // // Parsing of the given full name that has been identified as a cultivar already somwhere else.
586 // // The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
587 public BotanicalName parseCultivar(String fullName) throws StringNotParsableException{
588 CultivarPlantName result = null;
589 String[] words = oWsPattern.split(fullName);
590
591 /* ---------------------------------------------------------------------------------
592 * cultivar
593 * ---------------------------------------------------------------------------------*/
594 if (fullName.indexOf(" '") != 0){
595 //TODO location of 'xx' is probably not arbitrary
596 Matcher cultivarMatcher = cultivarPattern.matcher(fullName);
597 if (cultivarMatcher.find()){
598 String namePart = fullName.replaceFirst(cultivar, "");
599
600 String cultivarPart = cultivarMatcher.group(0).replace("'","").trim();
601 //OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
602
603 result = (CultivarPlantName)parseFullName(namePart);
604 result.setCultivarName(cultivarPart);
605 }
606 }else if (fullName.indexOf(" cv.") != 0){
607 // cv. is old form (not official)
608 throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
609 }
610
611 /* ---------------------------------------------------------------------------------
612 * cultivar group
613 * ---------------------------------------------------------------------------------
614 */
615 // TODO in work
616 //Ann. this is not the official way of noting cultivar groups
617 String group = oWs + "Group" + oWs + capitalEpiWord + end;
618 Pattern groupRE = Pattern.compile(group);
619 Matcher groupMatcher = groupRE.matcher(fullName);
620 if (groupMatcher.find()){
621 if (! words[words.length - 2].equals("group")){
622 throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName + " should be 'group'");
623 }else{
624
625 String namePart = fullName.substring(0, groupMatcher.start(0) - 0);
626 //OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
627
628 String cultivarPart = words[words.length -1];
629 result = (CultivarPlantName)parseFullName(namePart);
630 if (result != null){
631 result.setCultivarName(cultivarPart);
632
633 //OLD: result.setCultivarGroupName(cultivarPart);
634 }
635 }
636
637 }
638 // // ---------------------------------------------------------------------------------
639 // if ( result = "" ){
640 // return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
641 // }else{
642 // return result;
643 // }
644 return result; //TODO
645 }
646
647
648
649 //splitter
650 static String epiSplitter = "(\\s+|\\(|\\))"; //( ' '+| '(' | ')' )
651 static Pattern pattern = Pattern.compile(epiSplitter);
652
653 //some useful non-terminals
654 static String start = "^";
655 static String end = "$";
656 static String anyEnd = ".*" + end;
657 static String oWs = "\\s+"; //obligatory whitespaces
658 static String fWs = "\\s*"; //facultative whitespcace
659
660 static String capitalWord = "\\p{javaUpperCase}\\p{javaLowerCase}*";
661 static String nonCapitalWord = "\\p{javaLowerCase}+";
662
663 static String capitalDotWord = capitalWord + "\\.?"; //capitalWord with facultativ '.' at the end
664 static String nonCapitalDotWord = nonCapitalWord + "\\.?"; //nonCapitalWord with facultativ '.' at the end
665 static String dotWord = "(" + capitalWord + "|" + nonCapitalWord + ")\\.?"; //word (capital or non-capital) with facultativ '.' at the end
666 //Words used in an epethiton for a TaxonName
667 static String nonCapitalEpiWord = "[a-zï\\-]+"; //TODO solve checkin Problem with Unicode character "[a-z�\\-]+";
668 static String capitalEpiWord = "[A-Z]"+ nonCapitalEpiWord;
669
670
671 //years
672 static String month = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)";
673 static String singleYear = "\\b" + "(?:17|18|19|20)" + "\\d{2}" + "\\b"; // word boundary followed by either 17,18,19, or 20 (not captured) followed by 2 digits
674 static String yearPhrase = "(" + singleYear + "(-" + singleYear + ")?" +
675 "(" + month + ")?)" ; // optional month
676
677 //seperator
678 static String yearSeperator = "." + oWs;
679 static String detailSeperator = ":" + oWs;
680 static String referenceSeperator1 = "," + oWs ;
681 static String inReferenceSeperator = oWs + "in" + oWs;
682 static String referenceSeperator = "(" + referenceSeperator1 +"|" + inReferenceSeperator + ")" ;
683 static String referenceAuthorSeperator = ","+ oWs;
684 static String volumeSeperator = "," + fWs ;
685 static String referenceEnd = ".";
686
687
688 //status
689 static String status = "";
690
691 //marker
692 static String InfraGenusMarker = "(subgen.|subg.|sect.|subsect.|ser.|subser.|t.infgen.)";
693 static String aggrOrGroupMarker = "(aggr.|agg.|group)";
694 static String infraSpeciesMarker = "(subsp.|convar.|var.|subvar.|f.|subf.|f.spec.|tax." + fWs + "infrasp.)";
695 static String oldInfraSpeciesMarker = "(prol.|proles|race|taxon|sublusus)";
696
697
698 //AuthorString
699 static String authorPart = "(" + "(D'|L'|'t\\s)?" + capitalDotWord + "('" + nonCapitalDotWord + ")?" + "|da|de(n|l|\\sla)?)" ;
700 static String author = "(" + authorPart + "(" + fWs + "|-)" + ")+" + "(f.|fil.|secundus)?";
701 static String teamSplitter = fWs + "(&)" + fWs;
702 static String authorTeam = fWs + "(" + author + teamSplitter + ")*" + author + "(" + teamSplitter + "al.)?" + fWs;
703 static String exString = "(ex.?)";
704 static String authorAndExTeam = authorTeam + "(" + oWs + exString + oWs + authorTeam + ")?";
705 static String basStart = "\\(";
706 static String basEnd = "\\)";
707 static String botanicBasionymAuthor = basStart + "(" + authorAndExTeam + ")" + basEnd; // '(' and ')' is for evaluation with RE.paren(x)
708 static String fullBotanicAuthorString = fWs + "(" + botanicBasionymAuthor +")?" + fWs + authorAndExTeam + fWs;
709 static String facultFullBotanicAuthorString = "(" + fullBotanicAuthorString + ")?" ;
710
711 //Zoo. Author
712 //TODO does zoo author have ex-Author?
713 static String zooAuthorTeam = authorTeam + fWs + "," + fWs + singleYear;
714 static String zooBasionymAuthor = basStart + "(" + zooAuthorTeam + ")" + basEnd;
715 static String fullZooAuthorString = fWs + "(" + zooBasionymAuthor +")?" + fWs + zooAuthorTeam + fWs;
716 static String facultFullZooAuthorString = "(" + fullZooAuthorString + ")?" ;
717
718 static String facultFullAuthorString2 = "(" + facultFullBotanicAuthorString + "|" + facultFullZooAuthorString + ")";
719
720
721 //details
722 //TODO still very simple
723 static String pageNumber = "\\d{1,5}";
724 static String detail = "(" + pageNumber + ")";
725
726 //reference
727 static String volume = "\\d{4}" + "\\(\\d{4}\\)?";
728
729 static String referenceTitle = "(" + dotWord + fWs + ")" + "{2,}";
730 static String bookReference = referenceTitle + volumeSeperator + volume;
731 static String bookSectionReference = authorTeam + referenceAuthorSeperator;
732 static String articleReference = inReferenceSeperator + bookReference ;
733 static String reference = "(" + articleReference + "|" + bookReference +")" +
734 detailSeperator + detail + yearSeperator + yearPhrase +
735 referenceEnd;
736
737 static Pattern referencePattern = Pattern.compile(reference);
738
739 static String pNomStatusNom = "nom\\." + fWs + "(superfl\\.|nud\\.|illeg\\.|inval\\.|cons\\.|alternativ\\.|subnud.|"+
740 "rej\\.|rej\\."+ fWs + "prop\\.|provis\\.)";
741 static String pNomStatusOrthVar = "orth\\." + fWs + "var\\.";
742 static String pNomStatus = "(" + pNomStatusNom + "|" + pNomStatusOrthVar + ")";
743 static String pNomStatusPhrase1 = "," + fWs + pNomStatus;
744 static String pNomStatusPhrase2 = "\\[" + fWs + pNomStatus + "\\]";
745
746 static String pNomStatusPhrase = "(?:" + pNomStatusPhrase1 + "|" + pNomStatusPhrase2 + ")";
747
748 // Soraya
749 //opus utique oppr.
750 //pro syn.
751 //provisional synonym
752 //fossil name
753
754
755
756 //cultivars and hybrids
757 static String cultivar = oWs + "'..+'"; //Achtung mit Hochkomma in AuthorNamen
758 static String cultivarMarker = oWs + "(cv.|')";
759 static String hybrid = oWs + "((x|X)" + oWs + "|notho)";//= ( x )|( X )|( notho)
760
761 // Name String
762 static String genusOrSupraGenus = capitalEpiWord;
763 static String infraGenus = capitalEpiWord + oWs + InfraGenusMarker + oWs + capitalEpiWord;
764 static String aggrOrGroup = capitalEpiWord + oWs + nonCapitalEpiWord + oWs + aggrOrGroupMarker;
765 static String species = capitalEpiWord + oWs + nonCapitalEpiWord;
766 static String infraSpecies = capitalEpiWord + oWs + nonCapitalEpiWord + oWs + infraSpeciesMarker + oWs + nonCapitalEpiWord;
767 static String oldInfraSpecies = capitalEpiWord + oWs + nonCapitalEpiWord + oWs + oldInfraSpeciesMarker + oWs + nonCapitalEpiWord;
768 static String autonym = capitalEpiWord + oWs + "(" + nonCapitalEpiWord +")" + oWs + fullBotanicAuthorString + oWs + infraSpeciesMarker + oWs + "\\1"; //2-nd word and last word are the same
769
770 static String anyBotanicName = "(" + genusOrSupraGenus + "|" + infraGenus + "|" + aggrOrGroup + "|" + species + "|" +
771 infraSpecies + "|" + infraSpecies + "|" + oldInfraSpecies + "|" + autonym + ")+";
772 static String anyZooName = "(" + genusOrSupraGenus + "|" + infraGenus + "|" + aggrOrGroup + "|" + species + "|" +
773 infraSpecies + "|" + infraSpecies + "|" + oldInfraSpecies + ")+";
774 static String anyBotanicFullName = anyBotanicName + oWs + fullBotanicAuthorString;
775 static String anyZooFullName = anyZooName + oWs + fullZooAuthorString;
776 static String anyFullName = "(" + anyBotanicFullName + "|" + anyZooFullName + ")";
777
778
779 //Pattern
780 static Pattern oWsPattern = Pattern.compile(oWs);
781 static Pattern teamSplitterPattern = Pattern.compile(teamSplitter);
782 static Pattern cultivarPattern = Pattern.compile(cultivar);
783 static Pattern cultivarMarkerPattern = Pattern.compile(cultivarMarker);
784 static Pattern hybridPattern = Pattern.compile(hybrid);
785
786 static Pattern genusOrSupraGenusPattern = Pattern.compile(start + genusOrSupraGenus + facultFullAuthorString2 + end);
787 static Pattern infraGenusPattern = Pattern.compile(start + infraGenus + facultFullAuthorString2 + end);
788 static Pattern aggrOrGroupPattern = Pattern.compile(start + aggrOrGroup + fWs + end); //aggr. or group has no author string
789 static Pattern speciesPattern = Pattern.compile(start + species + facultFullAuthorString2 + end);
790 static Pattern infraSpeciesPattern = Pattern.compile(start + infraSpecies + facultFullAuthorString2 + end);
791 static Pattern oldInfraSpeciesPattern = Pattern.compile(start + oldInfraSpecies + facultFullAuthorString2 + end);
792 static Pattern autonymPattern = Pattern.compile(start + autonym + fWs + end);
793
794 static Pattern botanicBotanicPattern = Pattern.compile(botanicBasionymAuthor);
795 //static Pattern startsWithBasionymRE = Pattern.compile(basionymAuthor + anyEnd);
796 static Pattern exAuthorPattern = Pattern.compile(oWs + exString);
797
798 static Pattern fullBotanicAuthorStringPattern = Pattern.compile(fullBotanicAuthorString);
799 static Pattern fullZooAuthorStringPattern = Pattern.compile(fullZooAuthorString);
800
801 }