(no commit message)
[cdmlib.git] / cdmlib-model / src / main / java / eu / etaxonomy / cdm / strategy / parser / NonViralNameParserImpl.java
1 /**
2 *
3 */
4 package eu.etaxonomy.cdm.strategy.parser;
5
6 import java.util.regex.Matcher;
7 import java.util.regex.Pattern;
8
9 import org.apache.log4j.Logger;
10
11 import eu.etaxonomy.cdm.model.agent.Person;
12 import eu.etaxonomy.cdm.model.agent.Team;
13 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
14 import eu.etaxonomy.cdm.model.name.BacterialName;
15 import eu.etaxonomy.cdm.model.name.BotanicalName;
16 import eu.etaxonomy.cdm.model.name.CultivarPlantName;
17 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
18 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
19 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
20 import eu.etaxonomy.cdm.model.name.NonViralName;
21 import eu.etaxonomy.cdm.model.name.Rank;
22 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
23 import eu.etaxonomy.cdm.model.name.ZoologicalName;
24 import eu.etaxonomy.cdm.model.reference.Article;
25 import eu.etaxonomy.cdm.model.reference.Book;
26 import eu.etaxonomy.cdm.model.reference.BookSection;
27 import eu.etaxonomy.cdm.model.reference.Generic;
28 import eu.etaxonomy.cdm.model.reference.INomenclaturalReference;
29 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
30 import eu.etaxonomy.cdm.model.reference.StrictReferenceBase;
31 import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
32 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
33
34
35 /**
36 * @author a.mueller
37 *
38 */
39 public class NonViralNameParserImpl implements INonViralNameParser<NonViralName> {
40 private static final Logger logger = Logger.getLogger(NonViralNameParserImpl.class);
41
42 // good intro: http://java.sun.com/docs/books/tutorial/essential/regex/index.html
43
44 final static boolean MAKE_EMPTY = true;
45 final static boolean MAKE_NOT_EMPTY = false;
46
47
48 public static NonViralNameParserImpl NewInstance(){
49 return new NonViralNameParserImpl();
50 }
51
52 /* (non-Javadoc)
53 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSimpleName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
54 */
55 public NonViralName parseSimpleName(String simpleName, Rank rank){
56 //TODO
57 logger.warn("parseSimpleName() not yet implemented. Uses parseFullName() instead");
58 return parseFullName(simpleName, null, rank);
59 }
60
61
62 /* (non-Javadoc)
63 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericSimpleName(java.lang.String)
64 */
65 public NonViralName parseSimpleName(String simpleName){
66 return parseSimpleName(simpleName, null);
67 }
68
69 public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code){
70 return getNonViralNameInstance(fullString, code, null);
71 }
72
73 public NonViralName getNonViralNameInstance(String fullString, NomenclaturalCode code, Rank rank){
74 NonViralName result = null;
75 if (code == null){
76 boolean isBotanicalName = anyBotanicFullNamePattern.matcher(fullString).find();
77 boolean isZoologicalName = anyZooFullNamePattern.matcher(fullString).find();;
78 boolean isBacteriologicalName = false;
79 boolean isCultivatedPlantName = false;
80 if ( (isBotanicalName || isCultivatedPlantName) && ! isZoologicalName && !isBacteriologicalName){
81 if (isBotanicalName){
82 result = BotanicalName.NewInstance(rank);
83 }else{
84 result = CultivarPlantName.NewInstance(rank);
85 }
86 }else if ( isZoologicalName /*&& ! isBotanicalName*/ && !isBacteriologicalName && !isCultivatedPlantName){
87 result = ZoologicalName.NewInstance(rank);
88 }else if ( isZoologicalName && ! isBotanicalName && !isBacteriologicalName && !isCultivatedPlantName){
89 result = BacterialName.NewInstance(rank);
90 }else {
91 result = NonViralName.NewInstance(rank);
92 }
93 }else if (code.equals(NomenclaturalCode.ICBN())){
94 result = BotanicalName.NewInstance(rank);
95 }else if (code.equals(NomenclaturalCode.ICZN())){
96 result = ZoologicalName.NewInstance(rank);
97 }else if (code.equals(NomenclaturalCode.ICNCP())){
98 logger.warn("ICNCP parsing not yet implemented");
99 result = CultivarPlantName.NewInstance(rank);
100 }else if (code.equals(NomenclaturalCode.BACTERIOLOGICAL())){
101 logger.warn("ICNCP not yet implemented");
102 result = BacterialName.NewInstance(rank);
103 }else if (code.equals(NomenclaturalCode.VIRAL())){
104 logger.error("Viral name is not a NonViralName !!");
105 }else{
106 logger.error("Unknown Nomenclatural Code !!");
107 }
108 return result;
109 }
110
111
112 /* (non-Javadoc)
113 * @see eu.etaxonomy.cdm.strategy.parser.INonViralNameParser#parseFullReference(java.lang.String)
114 */
115 public NonViralName parseFullReference(String fullReferenceString) {
116 return parseFullReference(fullReferenceString, null, null);
117 }
118
119 /* (non-Javadoc)
120 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
121 */
122 public NonViralName parseFullReference(String fullReferenceString, NomenclaturalCode nomCode, Rank rank) {
123 if (fullReferenceString == null){
124 return null;
125 }else{
126 NonViralName result = getNonViralNameInstance(fullReferenceString, nomCode, rank);
127 parseFullReference(result, fullReferenceString, rank, MAKE_EMPTY);
128 return result;
129 }
130 }
131
132 /* (non-Javadoc)
133 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullReference(eu.etaxonomy.cdm.model.name.BotanicalName, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, boolean)
134 */
135 public void parseFullReference(NonViralName nameToBeFilled, String fullReferenceString, Rank rank, boolean makeEmpty) {
136 if (fullReferenceString == null){
137 //return null;
138 return;
139 }
140 if (makeEmpty){
141 makeEmpty(nameToBeFilled);
142 }
143 fullReferenceString.replaceAll(oWs , " ");
144 fullReferenceString = fullReferenceString.trim();
145
146 String localFullName;
147 if (nameToBeFilled instanceof ZoologicalName){
148 localFullName = anyZooFullName;
149 }else{
150 localFullName = anyBotanicFullName;
151 }
152 //seperate name and reference part
153 String nameAndRefSeperator = "(^" + localFullName + ")("+ referenceSeperator + ")";
154 Pattern nameAndRefSeperatorPattern = Pattern.compile(nameAndRefSeperator);
155 Matcher nameAndRefSeperatorMatcher = nameAndRefSeperatorPattern.matcher(fullReferenceString);
156
157 if (nameAndRefSeperatorMatcher.find() ){
158 String nameAndSeperator = nameAndRefSeperatorMatcher.group(0);
159 String name = nameAndRefSeperatorMatcher.group(1);
160 String referenceString = fullReferenceString.substring(nameAndRefSeperatorMatcher.end());
161
162 // inRef?
163 String seperator = nameAndSeperator.substring(name.length());
164 boolean isInReference = false;
165 if (seperator.matches(inReferenceSeperator)){
166 isInReference = true;
167 }
168
169 //status
170 referenceString = parseNomStatus(referenceString, nameToBeFilled);
171
172 //parse subparts
173 parseFullName(nameToBeFilled, name, rank, makeEmpty);
174 parseReference(nameToBeFilled, referenceString, isInReference);
175 INomenclaturalReference ref = nameToBeFilled.getNomenclaturalReference();
176 if (ref != null && ref.getHasProblem()){
177 nameToBeFilled.setHasProblem(true);
178 }
179 }else{
180 //don't parse if name can't be seperated
181 nameToBeFilled.setHasProblem(true);
182 nameToBeFilled.setTitleCache(fullReferenceString);
183 logger.info("no applicable parsing rule could be found for \"" + fullReferenceString + "\"");
184 }
185 }
186
187 //TODO make it an Array of status
188 /**
189 * Extracts a {@link NomenclaturalStatus} from the reference String and adds it to the @link {@link TaxonNameBase}.
190 * The nomenclatural status part ist deleted from the reference String.
191 * @return String the new (shortend) reference String
192 */
193 private String parseNomStatus(String reference, NonViralName nameToBeFilled) {
194 String statusString;
195 Pattern hasStatusPattern = Pattern.compile("(" + pNomStatusPhrase + ")");
196 Matcher hasStatusMatcher = hasStatusPattern.matcher(reference);
197
198 if (hasStatusMatcher.find()) {
199 String statusPhrase = hasStatusMatcher.group(0);
200
201 Pattern statusPattern = Pattern.compile(pNomStatus);
202 Matcher statusMatcher = statusPattern.matcher(statusPhrase);
203 statusMatcher.find();
204 statusString = statusMatcher.group(0);
205 try {
206 NomenclaturalStatusType nomStatusType = NomenclaturalStatusType.getNomenclaturalStatusTypeByAbbreviation(statusString);
207 NomenclaturalStatus nomStatus = NomenclaturalStatus.NewInstance(nomStatusType);
208 nameToBeFilled.addStatus(nomStatus);
209
210 reference = reference.replace(statusPhrase, "");
211 } catch (UnknownCdmTypeException e) {
212 //Do nothing
213 }
214 }
215 return reference;
216 }
217
218
219 private void parseReference(NonViralName nameToBeFilled, String reference, boolean isInReference){
220
221 if (referencePattern.matcher(reference).matches() ){
222 //End (just delete, may be ambigous for yearPhrase, but no real information gets lost
223 Pattern endPattern = Pattern.compile( referenceEnd + end);
224 Matcher endMatcher = endPattern.matcher(reference);
225 if (endMatcher.find()){
226 String endPart = endMatcher.group(0);
227 reference = reference.substring(0, reference.length() - endPart.length());
228 }
229
230 //year
231 String yearPart = null;
232 String pYearPhrase = yearSeperator + yearPhrase + end;
233 Pattern yearPhrasePattern = Pattern.compile(pYearPhrase);
234 Matcher yearPhraseMatcher = yearPhrasePattern.matcher(reference);
235 if (yearPhraseMatcher.find()){
236 yearPart = yearPhraseMatcher.group(0);
237 reference = reference.substring(0, reference.length() - yearPart.length());
238 yearPart = yearPart.replaceFirst(start + yearSeperator, "").trim();
239 }
240
241 //detail
242 String pDetailPhrase = detailSeperator + detail + end;
243 Pattern detailPhrasePattern = Pattern.compile(pDetailPhrase);
244 Matcher detailPhraseMatcher = detailPhrasePattern.matcher(reference);
245 if (detailPhraseMatcher.find()){
246 String detailPart = detailPhraseMatcher.group(0);
247 reference = reference.substring(0, reference.length() - detailPart.length());
248 detailPart = detailPart.replaceFirst(start + detailSeperator, "").trim();
249 nameToBeFilled.setNomenclaturalMicroReference(detailPart);
250 }
251 //Title (and author)
252 parseReferenceTitle(reference, yearPart);
253 }else{
254 Generic ref = Generic.NewInstance();
255 ref.setTitleCache(reference);
256 ref.setHasProblem(true);
257 nameToBeFilled.setNomenclaturalReference(ref);
258 }
259
260 }
261
262 /**
263 * Parses the referenceTitlePart, including the author volume and edition.
264 * @param reference
265 * @param year
266 * @return
267 */
268 private ReferenceBase parseReferenceTitle(String reference, String year){
269 ReferenceBase result = null;
270 Pattern bookPattern = Pattern.compile(bookReference);
271 Pattern articlePattern = Pattern.compile(articleReference);
272 Pattern bookSectionPattern = Pattern.compile(bookSectionReference);
273
274
275 Matcher articleMatcher = articlePattern.matcher(reference);
276 Matcher bookMatcher = bookPattern.matcher(reference);
277 Matcher bookSectionMatcher = bookSectionPattern.matcher(reference);
278
279
280 if (articleMatcher.matches()){
281 //if (articlePatter)
282 //(type, author, title, volume, editor, series;
283 Article article = new Article();
284 article.setTitleCache(reference);
285 result = article;
286 }else if(bookMatcher.matches()){
287 Book book = new Book();
288 book .setTitleCache(reference);
289 result = book;
290 }else if (bookSectionMatcher.matches()){
291 BookSection bookSection = new BookSection();
292 bookSection.setTitleCache(reference);
293 result = bookSection;
294 }else{
295 logger.warn("unknown reference type not yet implemented");
296 //ReferenceBase refBase =
297 }
298 return result;
299 }
300
301
302 /* (non-Javadoc)
303 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseSubGenericFullName(java.lang.String)
304 */
305 public NonViralName parseFullName(String fullNameString){
306 return parseFullName(fullNameString, null, null);
307 }
308
309
310 /* (non-Javadoc)
311 * @see eu.etaxonomy.cdm.strategy.ITaxonNameParser#parseFullName(java.lang.String, eu.etaxonomy.cdm.model.name.Rank)
312 */
313 public NonViralName parseFullName(String fullNameString, NomenclaturalCode nomCode, Rank rank) {
314 if (fullNameString == null){
315 return null;
316 }else{
317 NonViralName result = getNonViralNameInstance(fullNameString, nomCode, rank);
318 parseFullName(result, fullNameString, rank, false);
319 return result;
320 }
321 }
322
323
324 public void parseFullName(NonViralName nameToBeFilled, String fullNameString, Rank rank, boolean makeEmpty) {
325 //TODO prol. etc.
326
327 String authorString = null;
328
329 if (fullNameString == null){
330 return;
331 }
332 if (makeEmpty){
333 makeEmpty(nameToBeFilled);
334 }
335 fullNameString.replaceAll(oWs , " ");
336 //TODO
337 // OLD: fullName = oWsRE.subst(fullName, " "); //substitute multiple whitespaces
338 fullNameString = fullNameString.trim();
339
340 String[] epi = pattern.split(fullNameString);
341 try {
342 //cultivars //TODO 2 implement cultivars
343 // if ( cultivarMarkerRE.match(fullName) ){ funktioniert noch nicht, da es z.B. auch Namen gibt, wie 't Hart
344 // result = parseCultivar(fullName);
345 // }
346 //hybrids //TODO 2 implement hybrids
347 //else
348 if (hybridPattern.matcher(fullNameString).matches() ){
349 nameToBeFilled = parseHybrid(fullNameString);
350 }
351 else if (genusOrSupraGenusPattern.matcher(fullNameString).matches()){
352 //supraGeneric
353 if (rank != null && rank.isSupraGeneric()){
354 nameToBeFilled.setRank(rank);
355 nameToBeFilled.setGenusOrUninomial(epi[0]);
356 }
357 //genus
358 else {
359 nameToBeFilled.setRank(Rank.GENUS());
360 nameToBeFilled.setGenusOrUninomial(epi[0]);
361 }
362 authorString = fullNameString.substring(epi[0].length());
363 }
364 //infra genus
365 else if (infraGenusPattern.matcher(fullNameString).matches()){
366 nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[1]));
367 nameToBeFilled.setGenusOrUninomial(epi[0]);
368 nameToBeFilled.setInfraGenericEpithet(epi[2]);
369 authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length()+ 1 + epi[2].length());
370 }
371 //aggr. or group
372 else if (aggrOrGroupPattern.matcher(fullNameString).matches()){
373 nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[2]));
374 nameToBeFilled.setGenusOrUninomial(epi[0]);
375 nameToBeFilled.setSpecificEpithet(epi[1]);
376 }
377 //species
378 else if (speciesPattern.matcher(fullNameString).matches()){
379 nameToBeFilled.setRank(Rank.SPECIES());
380 nameToBeFilled.setGenusOrUninomial(epi[0]);
381 nameToBeFilled.setSpecificEpithet(epi[1]);
382 authorString = fullNameString.substring(epi[0].length() + 1 + epi[1].length());
383 }
384 //autonym
385 else if (autonymPattern.matcher(fullNameString).matches()){
386 nameToBeFilled.setRank(Rank.getRankByAbbreviation(epi[epi.length - 2]));
387 nameToBeFilled.setGenusOrUninomial(epi[0]);
388 nameToBeFilled.setSpecificEpithet(epi[1]);
389 nameToBeFilled.setInfraSpecificEpithet(epi[epi.length - 1]);
390 int lenSpecies = 2 + epi[0].length()+epi[1].length();
391 int lenInfraSpecies = 2 + epi[epi.length - 2].length() + epi[epi.length - 1].length();
392 authorString = fullNameString.substring(lenSpecies, fullNameString.length() - lenInfraSpecies);
393 }
394 //infraSpecies
395 else if (infraSpeciesPattern.matcher(fullNameString).matches()){
396 String infraSpecRankEpi = epi[2];
397 String infraSpecEpi = epi[3];
398 if ("tax.".equals(infraSpecRankEpi)){
399 infraSpecRankEpi += " " + epi[3];
400 infraSpecEpi = epi[4];
401 }
402 nameToBeFilled.setRank(Rank.getRankByAbbreviation(infraSpecRankEpi));
403 nameToBeFilled.setGenusOrUninomial(epi[0]);
404 nameToBeFilled.setSpecificEpithet(epi[1]);
405 nameToBeFilled.setInfraSpecificEpithet(infraSpecEpi);
406 authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + infraSpecRankEpi.length() + 1 + infraSpecEpi.length());
407 }//old infraSpecies
408 else if (oldInfraSpeciesPattern.matcher(fullNameString).matches()){
409 boolean implemented = false;
410 if (implemented){
411 nameToBeFilled.setRank(Rank.getRankByNameOrAbbreviation(epi[2]));
412 nameToBeFilled.setGenusOrUninomial(epi[0]);
413 nameToBeFilled.setSpecificEpithet(epi[1]);
414 //TODO result.setUnnamedNamePhrase(epi[2] + " " + epi[3]);
415 authorString = fullNameString.substring(epi[0].length()+ 1 + epi[1].length() +1 + epi[2].length() + 1 + epi[3].length());
416 }else{
417 nameToBeFilled.setHasProblem(true);
418 nameToBeFilled.setTitleCache(fullNameString);
419 logger.info("Name string " + fullNameString + " could not be parsed because UnnnamedNamePhrase is not yet implemented!");
420 }
421 }
422 //none
423 else{
424 nameToBeFilled.setHasProblem(true);
425 nameToBeFilled.setTitleCache(fullNameString);
426 logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");
427 }
428 //authors
429 if (nameToBeFilled != null && authorString != null && authorString.trim().length() > 0 ){
430 TeamOrPersonBase[] authors = new TeamOrPersonBase[4];
431 Integer[] years = new Integer[4];
432 try {
433 fullAuthors(authorString, authors, years, nameToBeFilled.getClass());
434 } catch (StringNotParsableException e) {
435 nameToBeFilled.setHasProblem(true);
436 nameToBeFilled.setTitleCache(fullNameString);
437 logger.info("no applicable parsing rule could be found for \"" + fullNameString + "\"");;
438 }
439 nameToBeFilled.setCombinationAuthorTeam(authors[0]);
440 nameToBeFilled.setExCombinationAuthorTeam(authors[1]);
441 nameToBeFilled.setBasionymAuthorTeam(authors[2]);
442 nameToBeFilled.setExBasionymAuthorTeam(authors[3]);
443 if (nameToBeFilled instanceof ZoologicalName){
444 ZoologicalName zooName = (ZoologicalName)nameToBeFilled;
445 zooName.setPublicationYear(years[0]);
446 zooName.setOriginalPublicationYear(years[2]);
447 }
448 }
449 //return
450 if (nameToBeFilled != null){
451 //return(BotanicalName)result;
452 return;
453 }else{
454 nameToBeFilled.setHasProblem(true);
455 nameToBeFilled.setTitleCache(fullNameString);
456 logger.info("Name string " + fullNameString + " could not be parsed!");
457 //return result;
458 return;
459 }
460 } catch (UnknownCdmTypeException e) {
461 nameToBeFilled.setHasProblem(true);
462 nameToBeFilled.setTitleCache(fullNameString);
463 logger.info("unknown rank (" + (rank == null? "null":rank) + ") or abbreviation in string " + fullNameString);
464 //return result;
465 return;
466 }
467 }
468
469
470
471 /**
472 * Parses the fullAuthorString
473 * @param fullAuthorString
474 * @return array of Teams containing the Team[0],
475 * ExTeam[1], BasionymTeam[2], ExBasionymTeam[3]
476 */
477 protected void fullAuthors (String fullAuthorString, TeamOrPersonBase[] authors, Integer[] years, Class clazz)
478 throws StringNotParsableException{
479 fullAuthorString = fullAuthorString.trim();
480 if (fullAuthorString == null || clazz == null){
481 return;
482 }
483 //Botanic
484 if ( BotanicalName.class.isAssignableFrom(clazz) ){
485 if (! fullBotanicAuthorStringPattern.matcher(fullAuthorString).matches() ){
486 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
487 }
488 }
489 //Zoo
490 else if ( ZoologicalName.class.isAssignableFrom(clazz) ){
491 if (! fullZooAuthorStringPattern.matcher(fullAuthorString).matches() ){
492 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
493 }
494 }else {
495 //TODO
496 logger.warn ("not yet implemented");
497 throw new StringNotParsableException("fullAuthorString (" +fullAuthorString+") not parsable: ");
498 }
499 fullAuthorsChecked(fullAuthorString, authors, years);
500 }
501
502 /*
503 * like fullTeams but without trim and match check
504 */
505 protected void fullAuthorsChecked (String fullAuthorString, TeamOrPersonBase[] authors, Integer[] years){
506 int authorTeamStart = 0;
507 Matcher basionymMatcher = basionymPattern.matcher(fullAuthorString);
508
509 if (basionymMatcher.find(0)){
510
511 String basString = basionymMatcher.group();
512 basString = basString.replaceFirst(basStart, "");
513 basString = basString.replaceAll(basEnd, "").trim();
514 authorTeamStart = basionymMatcher.end(1) + 1;
515
516 TeamOrPersonBase[] basAuthors = new TeamOrPersonBase[2];
517 Integer[] basYears = new Integer[2];
518 authorsAndEx(basString, basAuthors, basYears);
519 authors[2]= basAuthors[0];
520 years[2] = basYears[0];
521 authors[3]= basAuthors[1];
522 years[3] = basYears[1];
523 }
524 TeamOrPersonBase[] combinationAuthors = new TeamOrPersonBase[2];;
525 Integer[] combinationYears = new Integer[2];
526 authorsAndEx(fullAuthorString.substring(authorTeamStart), combinationAuthors, combinationYears);
527 authors[0]= combinationAuthors[0] ;
528 years[0] = combinationYears[0];
529 authors[1]= combinationAuthors[1];
530 years[1] = combinationYears[1];
531 }
532
533
534 /**
535 * Parses the author and ex-author String
536 * @param authorTeamString String representing the author and the ex-author team
537 * @return array of Teams containing the Team[0] and the ExTeam[1]
538 */
539 protected void authorsAndEx (String authorTeamString, TeamOrPersonBase[] authors, Integer[] years){
540 //TODO noch allgemeiner am anfang durch Replace etc.
541 authorTeamString = authorTeamString.trim();
542 authorTeamString = authorTeamString.replaceFirst(oWs + "ex" + oWs, " ex. " );
543 int authorEnd = authorTeamString.length();
544
545 Matcher exAuthorMatcher = exAuthorPattern.matcher(authorTeamString);
546 if (exAuthorMatcher.find(0)){
547 int exAuthorBegin = exAuthorMatcher.end(0);
548 String exString = authorTeamString.substring(exAuthorBegin).trim();
549 authorEnd = exAuthorMatcher.start(0);
550 authors [1] = author(exString);
551 }
552 zooOrBotanicAuthor(authorTeamString.substring(0, authorEnd), authors, years );
553 }
554
555 /**
556 * Parses the authorString and if it matches an botanical or zoological authorTeam it fills
557 * the computes the AuthorTeam and fills it into the first field of the team array. Same applies
558 * to the year in case of an zoological name.
559 * @param authorString
560 * @param team
561 * @param year
562 */
563 protected void zooOrBotanicAuthor(String authorString, TeamOrPersonBase[] team, Integer[] year){
564 if (authorString == null){
565 return;
566 }else if ((authorString = authorString.trim()).length() == 0){
567 return;
568 }
569 Matcher zooAuthorAddidtionMatcher = zooAuthorAddidtionPattern.matcher(authorString);
570 if (zooAuthorAddidtionMatcher.find()){
571 int index = zooAuthorAddidtionMatcher.start(0);
572 String strYear = authorString.substring(index);
573 strYear = strYear.replaceAll(zooAuthorYearSeperator, "").trim();
574 year[0] = Integer.valueOf(strYear);
575 authorString = authorString.substring(0, index).trim();
576 }
577 team[0] = author(authorString);
578 }
579
580
581 /**
582 * Parses an authorTeam String and returns the Team
583 * !!! TODO (atomization not yet implemented)
584 * @param authorTeamString String representing the author team
585 * @return an Team
586 */
587 protected TeamOrPersonBase author (String authorString){
588 if (authorString == null){
589 return null;
590 }else if ((authorString = authorString.trim()).length() == 0){
591 return null;
592 }else if (! teamSplitterPattern.matcher(authorString).find()){
593 //1 Person
594 Person result = Person.NewInstance();
595 result.setNomenclaturalTitle(authorString);
596 return result;
597 }else{
598 return parsedTeam(authorString);
599 }
600
601 }
602
603 /**
604 * Parses an authorString (reprsenting a team into the single authors and add
605 * them to the return Team.
606 * @param authorString
607 * @return Team
608 */
609 protected Team parsedTeam(String authorString){
610 Team result = Team.NewInstance();
611 String[] authors = authorString.split(teamSplitter);
612 for (String author : authors){
613 Person person = Person.NewInstance();
614 person.setNomenclaturalTitle(author);
615 result.addTeamMember(person);
616 }
617 return result;
618 }
619
620
621 //Parsing of the given full name that has been identified as hybrid already somewhere else.
622 private BotanicalName parseHybrid(String fullName){
623 logger.warn("parseHybrid --> function not yet implemented");
624 BotanicalName result = BotanicalName.NewInstance(null);
625 result.setTitleCache(fullName);
626 return result;
627 }
628
629 // // Parsing of the given full name that has been identified as a cultivar already somwhere else.
630 // // The ... cv. ... syntax is not covered here as it is not according the rules for naming cultivars.
631 public BotanicalName parseCultivar(String fullName) throws StringNotParsableException{
632 CultivarPlantName result = null;
633 String[] words = oWsPattern.split(fullName);
634
635 /* ---------------------------------------------------------------------------------
636 * cultivar
637 * ---------------------------------------------------------------------------------*/
638 if (fullName.indexOf(" '") != 0){
639 //TODO location of 'xx' is probably not arbitrary
640 Matcher cultivarMatcher = cultivarPattern.matcher(fullName);
641 if (cultivarMatcher.find()){
642 String namePart = fullName.replaceFirst(cultivar, "");
643
644 String cultivarPart = cultivarMatcher.group(0).replace("'","").trim();
645 //OLD: String cultivarPart = cultivarRE.getParen(0).replace("'","").trim();
646
647 result = (CultivarPlantName)parseFullName(namePart);
648 result.setCultivarName(cultivarPart);
649 }
650 }else if (fullName.indexOf(" cv.") != 0){
651 // cv. is old form (not official)
652 throw new StringNotParsableException("Cultivars with only cv. not yet implemented in name parser!");
653 }
654
655 /* ---------------------------------------------------------------------------------
656 * cultivar group
657 * ---------------------------------------------------------------------------------
658 */
659 // TODO in work
660 //Ann. this is not the official way of noting cultivar groups
661 String group = oWs + "Group" + oWs + capitalEpiWord + end;
662 Pattern groupRE = Pattern.compile(group);
663 Matcher groupMatcher = groupRE.matcher(fullName);
664 if (groupMatcher.find()){
665 if (! words[words.length - 2].equals("group")){
666 throw new StringNotParsableException ("fct ParseHybrid --> term before cultivar group name in " + fullName + " should be 'group'");
667 }else{
668
669 String namePart = fullName.substring(0, groupMatcher.start(0) - 0);
670 //OLD: String namePart = fullName.substring(0, groupRE.getParenStart(0) - 0);
671
672 String cultivarPart = words[words.length -1];
673 result = (CultivarPlantName)parseFullName(namePart);
674 if (result != null){
675 result.setCultivarName(cultivarPart);
676
677 //OLD: result.setCultivarGroupName(cultivarPart);
678 }
679 }
680
681 }
682 // // ---------------------------------------------------------------------------------
683 // if ( result = "" ){
684 // return "I: fct ParseCultivar: --> could not parse cultivar " + fullName;
685 // }else{
686 // return result;
687 // }
688 return result; //TODO
689 }
690
691
692 private void makeEmpty(NonViralName nameToBeFilled){
693 nameToBeFilled.setRank(null);
694 nameToBeFilled.setTitleCache(null, false);
695 nameToBeFilled.setNameCache(null);
696
697 nameToBeFilled.setAppendedPhrase(null);
698 //TODO ??
699 //nameToBeFilled.setBasionym(basionym);
700 nameToBeFilled.setBasionymAuthorTeam(null);
701 nameToBeFilled.setCombinationAuthorTeam(null);
702 nameToBeFilled.setExBasionymAuthorTeam(null);
703 nameToBeFilled.setExCombinationAuthorTeam(null);
704 nameToBeFilled.setAuthorshipCache(null);
705
706
707 nameToBeFilled.setHasProblem(false);
708 // TODO ?
709 //nameToBeFilled.setHomotypicalGroup(newHomotypicalGroup);
710
711
712 nameToBeFilled.setGenusOrUninomial(null);
713 nameToBeFilled.setInfraGenericEpithet(null);
714 nameToBeFilled.setSpecificEpithet(null);
715 nameToBeFilled.setInfraSpecificEpithet(null);
716
717 nameToBeFilled.setNomenclaturalMicroReference(null);
718 nameToBeFilled.setNomenclaturalReference(null);
719
720 if (nameToBeFilled instanceof BotanicalName){
721 BotanicalName botanicalName = (BotanicalName)nameToBeFilled;
722 botanicalName.setAnamorphic(false);
723 botanicalName.setHybridFormula(false);
724 botanicalName.setMonomHybrid(false);
725 botanicalName.setBinomHybrid(false);
726 botanicalName.setTrinomHybrid(false);
727 }
728
729 if (nameToBeFilled instanceof ZoologicalName){
730 ZoologicalName zoologicalName = (ZoologicalName)nameToBeFilled;
731 zoologicalName.setBreed(null);
732 zoologicalName.setOriginalPublicationYear(null);
733 }
734
735 //TODO adapt to @Version of versionable entity, throws still optimistic locking error
736 //nameToBeFilled.setUpdated(Calendar.getInstance());
737 // TODO nameToBeFilled.setUpdatedBy(updatedBy);
738 }
739
740
741
742 //splitter
743 static String epiSplitter = "(\\s+|\\(|\\))"; //( ' '+| '(' | ')' )
744 static Pattern pattern = Pattern.compile(epiSplitter);
745
746 //some useful non-terminals
747 static String start = "^";
748 static String end = "$";
749 static String anyEnd = ".*" + end;
750 static String oWs = "\\s+"; //obligatory whitespaces
751 static String fWs = "\\s*"; //facultative whitespcace
752
753 static String capitalWord = "\\p{javaUpperCase}\\p{javaLowerCase}*";
754 static String nonCapitalWord = "\\p{javaLowerCase}+";
755
756 static String capitalDotWord = capitalWord + "\\.?"; //capitalWord with facultativ '.' at the end
757 static String nonCapitalDotWord = nonCapitalWord + "\\.?"; //nonCapitalWord with facultativ '.' at the end
758 static String dotWord = "(" + capitalWord + "|" + nonCapitalWord + ")\\.?"; //word (capital or non-capital) with facultativ '.' at the end
759 //Words used in an epethiton for a TaxonName
760 static String nonCapitalEpiWord = "[a-zï\\-]+"; //TODO solve checkin Problem with Unicode character "[a-z�\\-]+";
761 static String capitalEpiWord = "[A-Z]"+ nonCapitalEpiWord;
762
763
764 //years
765 static String month = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)";
766 static String singleYear = "\\b" + "(?:17|18|19|20)" + "\\d{2}" + "\\b"; // word boundary followed by either 17,18,19, or 20 (not captured) followed by 2 digits
767 static String yearPhrase = "(" + singleYear + "(-" + singleYear + ")?" +
768 "(" + month + ")?)" ; // optional month
769
770 //seperator
771 static String yearSeperator = "." + oWs;
772 static String detailSeperator = ":" + oWs;
773 static String referenceSeperator1 = "," + oWs ;
774 static String inReferenceSeperator = oWs + "in" + oWs;
775 static String referenceSeperator = "(" + referenceSeperator1 +"|" + inReferenceSeperator + ")" ;
776 static String referenceAuthorSeperator = ","+ oWs;
777 static String volumeSeperator = "," + fWs ;
778 static String referenceEnd = ".";
779
780
781 //status
782 static String status = "";
783
784 //marker
785 static String InfraGenusMarker = "(subgen.|subg.|sect.|subsect.|ser.|subser.|t.infgen.)";
786 static String aggrOrGroupMarker = "(aggr.|agg.|group)";
787 static String infraSpeciesMarker = "(subsp.|convar.|var.|subvar.|f.|subf.|f.spec.|tax." + fWs + "infrasp.)";
788 static String oldInfraSpeciesMarker = "(prol.|proles|race|taxon|sublusus)";
789
790
791 //AuthorString
792 static String authorPart = "(" + "(D'|L'|'t\\s)?" + capitalDotWord + "('" + nonCapitalDotWord + ")?" + "|da|de(n|l|\\sla)?)" ;
793 static String author = "(" + authorPart + "(" + fWs + "|-)" + ")+" + "(f.|fil.|secundus)?";
794 static String teamSplitter = fWs + "(&)" + fWs;
795 static String authorTeam = fWs + "(" + author + teamSplitter + ")*" + author + "(" + teamSplitter + "al.)?" + fWs;
796 static String exString = "(ex.?)";
797 static String authorAndExTeam = authorTeam + "(" + oWs + exString + oWs + authorTeam + ")?";
798 static String basStart = "\\(";
799 static String basEnd = "\\)";
800 static String botanicBasionymAuthor = basStart + "(" + authorAndExTeam + ")" + basEnd; // '(' and ')' is for evaluation with RE.paren(x)
801 static String fullBotanicAuthorString = fWs + "(" + botanicBasionymAuthor +")?" + fWs + authorAndExTeam + fWs;
802 static String facultFullBotanicAuthorString = "(" + fullBotanicAuthorString + ")?" ;
803
804 //Zoo. Author
805 //TODO does zoo author have ex-Author?
806 static String zooAuthorYearSeperator = ",";
807 static String zooAuthorAddidtion = fWs + zooAuthorYearSeperator + fWs + singleYear;
808 static String zooAuthorTeam = authorTeam + zooAuthorAddidtion;
809 static String zooBasionymAuthor = basStart + "(" + zooAuthorTeam + ")" + basEnd;
810 static String fullZooAuthorString = fWs + "(" + zooBasionymAuthor +")?" + fWs + zooAuthorTeam + fWs;
811 static String facultFullZooAuthorString = "(" + fullZooAuthorString + ")?" ;
812
813 static String facultFullAuthorString2 = "(" + facultFullBotanicAuthorString + "|" + facultFullZooAuthorString + ")";
814
815 static String basionymAuthor = "(" + botanicBasionymAuthor + "|" + zooBasionymAuthor+ ")";
816 static String fullAuthorString = "(" + fullBotanicAuthorString + "|" + fullZooAuthorString+ ")";
817
818 //details
819 //TODO still very simple
820 static String pageNumber = "\\d{1,5}";
821 static String detail = "(" + pageNumber + ")";
822
823 //reference
824 static String volume = "\\d{4}" + "\\(\\d{4}\\)?";
825
826 static String referenceTitle = "(" + dotWord + fWs + ")" + "{2,}";
827 static String bookReference = referenceTitle + volumeSeperator + volume;
828 static String bookSectionReference = authorTeam + referenceAuthorSeperator;
829 static String articleReference = inReferenceSeperator + bookReference ;
830 static String reference = "(" + articleReference + "|" + bookReference +")" +
831 detailSeperator + detail + yearSeperator + yearPhrase +
832 referenceEnd;
833
834 static Pattern referencePattern = Pattern.compile(reference);
835
836 static String pNomStatusNom = "nom\\." + fWs + "(superfl\\.|nud\\.|illeg\\.|inval\\.|cons\\.|alternativ\\.|subnud.|"+
837 "rej\\.|rej\\."+ fWs + "prop\\.|provis\\.)";
838 static String pNomStatusOrthVar = "orth\\." + fWs + "var\\.";
839 static String pNomStatus = "(" + pNomStatusNom + "|" + pNomStatusOrthVar + ")";
840 static String pNomStatusPhrase1 = "," + fWs + pNomStatus;
841 static String pNomStatusPhrase2 = "\\[" + fWs + pNomStatus + "\\]";
842
843 static String pNomStatusPhrase = "(?:" + pNomStatusPhrase1 + "|" + pNomStatusPhrase2 + ")";
844
845 // Soraya
846 //opus utique oppr.
847 //pro syn.
848 //provisional synonym
849 //fossil name
850
851
852
853 //cultivars and hybrids
854 static String cultivar = oWs + "'..+'"; //Achtung mit Hochkomma in AuthorNamen
855 static String cultivarMarker = oWs + "(cv.|')";
856 static String hybrid = oWs + "((x|X)" + oWs + "|notho)";//= ( x )|( X )|( notho)
857
858 // Name String
859 static String genusOrSupraGenus = capitalEpiWord;
860 static String infraGenus = capitalEpiWord + oWs + InfraGenusMarker + oWs + capitalEpiWord;
861 static String aggrOrGroup = capitalEpiWord + oWs + nonCapitalEpiWord + oWs + aggrOrGroupMarker;
862 static String species = capitalEpiWord + oWs + nonCapitalEpiWord;
863 static String infraSpecies = capitalEpiWord + oWs + nonCapitalEpiWord + oWs + infraSpeciesMarker + oWs + nonCapitalEpiWord;
864 static String oldInfraSpecies = capitalEpiWord + oWs + nonCapitalEpiWord + oWs + oldInfraSpeciesMarker + oWs + nonCapitalEpiWord;
865 static String autonym = capitalEpiWord + oWs + "(" + nonCapitalEpiWord +")" + oWs + fullBotanicAuthorString + oWs + infraSpeciesMarker + oWs + "\\1"; //2-nd word and last word are the same
866
867 static String anyBotanicName = "(" + genusOrSupraGenus + "|" + infraGenus + "|" + aggrOrGroup + "|" + species + "|" +
868 infraSpecies + "|" + infraSpecies + "|" + oldInfraSpecies + "|" + autonym + ")+";
869 static String anyZooName = "(" + genusOrSupraGenus + "|" + infraGenus + "|" + aggrOrGroup + "|" + species + "|" +
870 infraSpecies + "|" + infraSpecies + "|" + oldInfraSpecies + ")+";
871 static String anyBotanicFullName = anyBotanicName + oWs + fullBotanicAuthorString;
872 static String anyZooFullName = anyZooName + oWs + fullZooAuthorString;
873 static String anyFullName = "(" + anyBotanicFullName + "|" + anyZooFullName + ")";
874
875 //Pattern
876 static Pattern oWsPattern = Pattern.compile(oWs);
877 static Pattern teamSplitterPattern = Pattern.compile(teamSplitter);
878 static Pattern cultivarPattern = Pattern.compile(cultivar);
879 static Pattern cultivarMarkerPattern = Pattern.compile(cultivarMarker);
880 static Pattern hybridPattern = Pattern.compile(hybrid);
881
882 static Pattern genusOrSupraGenusPattern = Pattern.compile(start + genusOrSupraGenus + facultFullAuthorString2 + end);
883 static Pattern infraGenusPattern = Pattern.compile(start + infraGenus + facultFullAuthorString2 + end);
884 static Pattern aggrOrGroupPattern = Pattern.compile(start + aggrOrGroup + fWs + end); //aggr. or group has no author string
885 static Pattern speciesPattern = Pattern.compile(start + species + facultFullAuthorString2 + end);
886 static Pattern infraSpeciesPattern = Pattern.compile(start + infraSpecies + facultFullAuthorString2 + end);
887 static Pattern oldInfraSpeciesPattern = Pattern.compile(start + oldInfraSpecies + facultFullAuthorString2 + end);
888 static Pattern autonymPattern = Pattern.compile(start + autonym + fWs + end);
889
890 static Pattern botanicBasionymPattern = Pattern.compile(botanicBasionymAuthor);
891 static Pattern zooBasionymPattern = Pattern.compile(zooBasionymAuthor);
892 static Pattern basionymPattern = Pattern.compile(basionymAuthor);
893
894 static Pattern zooAuthorPattern = Pattern.compile(zooAuthorTeam);
895 static Pattern zooAuthorAddidtionPattern = Pattern.compile(zooAuthorAddidtion);
896
897 static Pattern exAuthorPattern = Pattern.compile(oWs + exString);
898
899 static Pattern fullBotanicAuthorStringPattern = Pattern.compile(fullBotanicAuthorString);
900 static Pattern fullZooAuthorStringPattern = Pattern.compile(fullZooAuthorString);
901 static Pattern fullAuthorStringPattern = Pattern.compile(fullAuthorString);
902
903 static Pattern anyBotanicFullNamePattern = Pattern.compile(anyBotanicFullName);
904 static Pattern anyZooFullNamePattern = Pattern.compile(anyZooFullName);
905
906
907
908
909
910 }