ref #6369 adapt existing occurrences of interface to removed generics in cdmlib-app
[cdmlib-apps.git] / cdm-eflora / src / main / java / eu / etaxonomy / cdm / io / eflora / EfloraTaxonImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.eflora;
11
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.UUID;
19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern;
21
22 import org.apache.commons.lang.CharUtils;
23 import org.apache.commons.lang.StringUtils;
24 import org.apache.log4j.Logger;
25 import org.jdom.Attribute;
26 import org.jdom.Element;
27 import org.springframework.stereotype.Component;
28 import org.springframework.transaction.TransactionStatus;
29
30 import eu.etaxonomy.cdm.common.CdmUtils;
31 import eu.etaxonomy.cdm.common.ResultWrapper;
32 import eu.etaxonomy.cdm.common.XmlHelp;
33 import eu.etaxonomy.cdm.io.common.ICdmIO;
34 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
35 import eu.etaxonomy.cdm.io.eflora.UnmatchedLeads.UnmatchedLeadsKey;
36 import eu.etaxonomy.cdm.model.agent.Person;
37 import eu.etaxonomy.cdm.model.agent.Team;
38 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
39 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
40 import eu.etaxonomy.cdm.model.common.Annotation;
41 import eu.etaxonomy.cdm.model.common.AnnotationType;
42 import eu.etaxonomy.cdm.model.common.CdmBase;
43 import eu.etaxonomy.cdm.model.common.Credit;
44 import eu.etaxonomy.cdm.model.common.ExtensionType;
45 import eu.etaxonomy.cdm.model.common.ISourceable;
46 import eu.etaxonomy.cdm.model.common.Language;
47 import eu.etaxonomy.cdm.model.common.Marker;
48 import eu.etaxonomy.cdm.model.common.MarkerType;
49 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
50 import eu.etaxonomy.cdm.model.common.TimePeriod;
51 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
52 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
53 import eu.etaxonomy.cdm.model.description.Feature;
54 import eu.etaxonomy.cdm.model.description.KeyStatement;
55 import eu.etaxonomy.cdm.model.description.PolytomousKey;
56 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
57 import eu.etaxonomy.cdm.model.description.TaxonDescription;
58 import eu.etaxonomy.cdm.model.description.TextData;
59 import eu.etaxonomy.cdm.model.name.BotanicalName;
60 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
61 import eu.etaxonomy.cdm.model.name.INonViralName;
62 import eu.etaxonomy.cdm.model.name.NameRelationshipType;
63 import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
64 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
65 import eu.etaxonomy.cdm.model.name.Rank;
66 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
67 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
68 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
69 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
70 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
71 import eu.etaxonomy.cdm.model.reference.IBook;
72 import eu.etaxonomy.cdm.model.reference.IJournal;
73 import eu.etaxonomy.cdm.model.reference.Reference;
74 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
75 import eu.etaxonomy.cdm.model.reference.ReferenceType;
76 import eu.etaxonomy.cdm.model.taxon.Classification;
77 import eu.etaxonomy.cdm.model.taxon.SynonymType;
78 import eu.etaxonomy.cdm.model.taxon.Taxon;
79 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
80 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
81 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
82 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
83 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
84
85
86 /**
87 * @author a.mueller
88 *
89 */
90 @Component
91 public class EfloraTaxonImport extends EfloraImportBase implements ICdmIO<EfloraImportState> {
92 private static final Logger logger = Logger.getLogger(EfloraTaxonImport.class);
93
94 private static int modCount = 30000;
95 private final NonViralNameParserImpl parser = new NonViralNameParserImpl();
96
97 public EfloraTaxonImport(){
98 super();
99 }
100
101
102 @Override
103 public boolean doCheck(EfloraImportState state){
104 boolean result = true;
105 return result;
106 }
107
108 //TODO make part of state, but state is renewed when invoking the import a second time
109 private UnmatchedLeads unmatchedLeads;
110
111 @Override
112 public void doInvoke(EfloraImportState state){
113 logger.info("start make Taxa ...");
114
115 //FIXME reset state
116 state.putTree(null, null);
117 // UnmatchedLeads unmatchedLeads = state.getOpenKeys();
118 if (unmatchedLeads == null){
119 unmatchedLeads = UnmatchedLeads.NewInstance();
120 }
121 state.setUnmatchedLeads(unmatchedLeads);
122
123 TransactionStatus tx = startTransaction();
124 unmatchedLeads.saveToSession(getPolytomousKeyNodeService());
125
126
127 //TODO generally do not store the reference object in the config
128 Reference sourceReference = state.getConfig().getSourceReference();
129 getReferenceService().saveOrUpdate(sourceReference);
130
131 Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
132 ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
133
134 Element elbody= getBodyElement(state.getConfig());
135 List<Element> elTaxonList = elbody.getChildren();
136
137 int i = 0;
138
139 Set<String> unhandledTitleClassess = new HashSet<String>();
140 Set<String> unhandledNomeclatureChildren = new HashSet<String>();
141 Set<String> unhandledDescriptionChildren = new HashSet<String>();
142
143 Taxon lastTaxon = getLastTaxon(state);
144
145 //for each taxon
146 for (Element elTaxon : elTaxonList){
147 try {
148 if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
149 if (! elTaxon.getName().equalsIgnoreCase("taxon")){
150 logger.warn("body has element other than 'taxon'");
151 }
152
153 BotanicalName botanicalName = BotanicalName.NewInstance(Rank.SPECIES());
154 Taxon taxon = Taxon.NewInstance(botanicalName, state.getConfig().getSourceReference());
155
156 handleTaxonAttributes(elTaxon, taxon, state);
157
158
159 List<Element> children = elTaxon.getChildren();
160 handleTaxonElement(state, unhandledTitleClassess, unhandledNomeclatureChildren, unhandledDescriptionChildren, taxon, children);
161 handleTaxonRelation(state, taxon, lastTaxon);
162 lastTaxon = taxon;
163 taxaToSave.add(taxon);
164 state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
165
166 } catch (Exception e) {
167 logger.warn("Exception occurred in Sapindacea taxon import: " + e);
168 e.printStackTrace();
169 }
170
171 }
172
173 System.out.println(state.getUnmatchedLeads().toString());
174 logger.warn("There are taxa with attributes 'excluded' and 'dubious'");
175
176 logger.info("Children for nomenclature are: " + unhandledNomeclatureChildren);
177 logger.info("Children for description are: " + unhandledDescriptionChildren);
178 logger.info("Children for homotypes are: " + unhandledHomotypeChildren);
179 logger.info("Children for nom are: " + unhandledNomChildren);
180
181
182 //invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
183 logger.info(i + " taxa handled. Saving ...");
184 getTaxonService().saveOrUpdate(taxaToSave);
185 getFeatureTreeService().saveOrUpdateFeatureNodesAll(state.getFeatureNodesToSave());
186 state.getFeatureNodesToSave().clear();
187 commitTransaction(tx);
188
189 logger.info("end makeTaxa ...");
190 logger.info("start makeKey ...");
191 // invokeDoKey(state);
192 logger.info("end makeKey ...");
193
194 if (! success.getValue()){
195 state.setUnsuccessfull();
196 }
197 return ;
198 }
199
200
201 private void handleTaxonAttributes(Element elTaxon, Taxon taxon, EfloraImportState state) {
202 List<Attribute> attrList = elTaxon.getAttributes();
203 for (Attribute attr : attrList){
204 String attrName = attr.getName();
205 String attrValue = attr.getValue();
206 if ("class".equals(attrName)){
207 if (attrValue.equalsIgnoreCase("dubious") || attrValue.equalsIgnoreCase("DUBIOUS GENUS") || attrValue.equalsIgnoreCase("DOUBTFUL SPECIES") ){
208 taxon.setDoubtful(true);
209 }else{
210 MarkerType markerType = getMarkerType(state, attrValue);
211 if (markerType == null){
212 logger.warn("Class attribute value for taxon not yet supported: " + attrValue);
213 }else{
214 taxon.addMarker(Marker.NewInstance(markerType, true));
215 }
216 }
217 }else if ("num".equals(attrName)){
218 logger.warn("num not yet supported");
219 }else{
220 logger.warn("Attribute " + attrName + " not yet supported for element taxon");
221 }
222 }
223
224 }
225
226
227 private Taxon getLastTaxon(EfloraImportState state) {
228 if (state.getConfig().getLastTaxonUuid() == null){
229 return null;
230 }else{
231 return (Taxon)getTaxonService().find(state.getConfig().getLastTaxonUuid());
232 }
233 }
234
235
236 // private void invokeDoKey(SapindaceaeImportState state) {
237 // TransactionStatus tx = startTransaction();
238 //
239 // Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
240 // ITaxonService taxonService = getTaxonService();
241 // ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
242 //
243 // Element elbody= getBodyElement(state.getConfig());
244 // List<Element> elTaxonList = elbody.getChildren();
245 //
246 // int i = 0;
247 //
248 // //for each taxon
249 // for (Element elTaxon : elTaxonList){
250 // if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
251 // if (! elTaxon.getName().equalsIgnoreCase("taxon")){
252 // continue;
253 // }
254 //
255 // List<Element> children = elTaxon.getChildren("key");
256 // for (Element element : children){
257 // handleKeys(state, element, null);
258 // }
259 // nodesToSave.add(taxon);
260 //
261 // }
262 //
263 // }
264
265
266 // body/taxon/*
267 private void handleTaxonElement(EfloraImportState state, Set<String> unhandledTitleClassess, Set<String> unhandledNomeclatureChildren, Set<String> unhandledDescriptionChildren, Taxon taxon, List<Element> children) {
268 AnnotatableEntity lastEntity = null;
269 for (Element element : children){
270 String elName = element.getName();
271
272 if (elName.equalsIgnoreCase("title")){
273 handleTitle(state, element, taxon, unhandledTitleClassess);
274 lastEntity = null;
275 }else if(elName.equalsIgnoreCase("nomenclature")){
276 handleNomenclature(state, element, taxon, unhandledNomeclatureChildren);
277 lastEntity = null;
278 }else if(elName.equalsIgnoreCase("description")){
279 handleDescription(state, element, taxon, unhandledDescriptionChildren);
280 lastEntity = null;
281 }else if(elName.equalsIgnoreCase("habitatecology")){
282 lastEntity = handleEcology(state, element, taxon);
283 }else if(elName.equalsIgnoreCase("distribution")){
284 lastEntity = handleDistribution(state, element, taxon);
285 }else if(elName.equalsIgnoreCase("uses")){
286 lastEntity = handleUses(state, element, taxon);
287 }else if(elName.equalsIgnoreCase("notes")){
288 lastEntity = handleTaxonNotes(state, element, taxon);
289 }else if(elName.equalsIgnoreCase("chromosomes")){
290 lastEntity = handleChromosomes(state, element, taxon);
291 }else if(elName.equalsIgnoreCase("vernacularnames")){
292 handleVernaculars(state, element, taxon);
293 }else if(elName.equalsIgnoreCase("key")){
294 lastEntity = handleKeys(state, element, taxon);
295 }else if(elName.equalsIgnoreCase("references")){
296 handleReferences(state, element, taxon, lastEntity);
297 lastEntity = null;
298 }else if(elName.equalsIgnoreCase("taxon")){
299 logger.warn("A taxon should not be part of a taxon");
300 }else if(elName.equalsIgnoreCase("homotypes")){
301 logger.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
302 }else{
303 logger.warn("Unexpected child for taxon: " + elName);
304 }
305 }
306 }
307
308
309 private void handleVernaculars(EfloraImportState state, Element elVernacular, Taxon taxon) {
310 verifyNoAttribute(elVernacular);
311 verifyNoChildren(elVernacular, false);
312 String value = elVernacular.getTextNormalize();
313 Feature feature = Feature.COMMON_NAME();
314 value = replaceStart(value, "Noms vernaculaires");
315 String[] dialects = value.split(";");
316 for (String singleDialect : dialects){
317 handleSingleDialect(taxon, singleDialect, feature, state);
318 }
319 return;
320 }
321
322
323 private void handleSingleDialect(Taxon taxon, String singleDialect, Feature feature, EfloraImportState state) {
324 singleDialect = singleDialect.trim();
325 TaxonDescription description = getDescription(taxon);
326 String reDialect = "\\(dial\\.\\s.*\\)";
327 // String reDialect = "\\(.*\\)";
328 Pattern patDialect = Pattern.compile(reDialect);
329 Matcher matcher = patDialect.matcher(singleDialect);
330 if (matcher.find()){
331 String dialect = singleDialect.substring(matcher.start(), matcher.end());
332 dialect = dialect.replace("(dial. ", "").replace(")", "");
333
334 Language language = null;
335 try {
336 language = this.getLanguage(state, state.getTransformer().getLanguageUuid(dialect), dialect, dialect, dialect);
337 } catch (UndefinedTransformerMethodException e) {
338 logger.error(e.getMessage());
339 }
340
341 String commonNames = singleDialect.substring(0, matcher.start());
342 String[] splitNames = commonNames.split(",");
343 for (String commonNameString : splitNames){
344 commonNameString = commonNameString.trim();
345 CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameString, language);
346 description.addElement(commonName);
347 }
348 }else{
349 logger.warn("No dialect match: " + singleDialect);
350 }
351 }
352
353
354 private void handleReferences(EfloraImportState state, Element elReferences, Taxon taxon, AnnotatableEntity lastEntity) {
355 verifyNoAttribute(elReferences);
356 verifyNoChildren(elReferences, true);
357 String refString = elReferences.getTextNormalize();
358 if (lastEntity == null){
359 logger.warn("No last entity defined: " + refString);
360 return;
361 }
362
363 Annotation annotation = Annotation.NewInstance(refString, AnnotationType.EDITORIAL(), Language.DEFAULT());
364 lastEntity.addAnnotation(annotation);
365 }
366
367
368 private PolytomousKey handleKeys(EfloraImportState state, Element elKey, Taxon taxon) {
369 UnmatchedLeads openKeys = state.getUnmatchedLeads();
370
371 //title
372 String title = makeKeyTitle(elKey);
373
374 //key
375 PolytomousKey key = PolytomousKey.NewTitledInstance(title);
376
377 //TODO add covered taxa etc.
378 verifyNoAttribute(elKey);
379
380 //notes
381 makeKeyNotes(elKey, key);
382
383 //keycouplets
384 List<Element> keychoices = new ArrayList<Element>();
385 keychoices.addAll(elKey.getChildren("keycouplet"));
386 keychoices.addAll(elKey.getChildren("keychoice"));
387
388
389 for (Element elKeychoice : keychoices){
390 handleKeyChoices(state, openKeys, key, elKeychoice, taxon);
391 elKey.removeContent(elKeychoice);
392 }
393
394 //
395 verifyNoChildren(elKey);
396 logger.info("Unmatched leads after key handling:" + openKeys.toString());
397
398
399 if (state.getConfig().isDoPrintKeys()){
400 key.print(System.err);
401 }
402 getPolytomousKeyService().save(key);
403 return key;
404 }
405
406
407 /**
408 * @param state
409 * @param elKey
410 * @param openKeys
411 * @param key
412 * @param elKeychoice
413 * @param taxon
414 */
415 private void handleKeyChoices(EfloraImportState state, UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, Taxon taxon) {
416
417 //char Attribute
418 //TODO it's still unclear if char is a feature and needs to be a new attribute
419 //or if it is handled as question. Therefore both cases are handled but feature
420 //is finally not yet set
421 KeyStatement question = handleKeychoiceChar(state, elKeychoice);
422 Feature feature = handleKeychoiceCharAsFeature(state, elKeychoice);
423
424 //lead
425 List<PolytomousKeyNode> childNodes = handleKeychoiceLeads(state, key, elKeychoice, taxon, question, feature);
426
427 //num -> match with unmatched leads
428 handleKeychoiceNum(openKeys, key, elKeychoice, childNodes);
429
430 //others
431 verifyNoAttribute(elKeychoice);
432 }
433
434
435 /**
436 * @param openKeys
437 * @param key
438 * @param elKeychoice
439 * @param childNodes
440 */
441 private void handleKeychoiceNum(UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, List<PolytomousKeyNode> childNodes) {
442 Attribute numAttr = elKeychoice.getAttribute("num");
443 String num = CdmUtils.removeTrailingDot(numAttr == null? "":numAttr.getValue());
444 UnmatchedLeadsKey okk = UnmatchedLeadsKey.NewInstance(key, num);
445 Set<PolytomousKeyNode> matchingNodes = openKeys.getNodes(okk);
446 for (PolytomousKeyNode matchingNode : matchingNodes){
447 for (PolytomousKeyNode childNode : childNodes){
448 matchingNode.addChild(childNode);
449 }
450 openKeys.removeNode(okk, matchingNode);
451 }
452 if (matchingNodes.isEmpty()){
453 for (PolytomousKeyNode childNode : childNodes){
454 key.getRoot().addChild(childNode);
455 }
456 }
457
458 elKeychoice.removeAttribute("num");
459 }
460
461
462 /**
463 * @param state
464 * @param key
465 * @param elKeychoice
466 * @param taxon
467 * @param feature
468 * @return
469 */
470 private List<PolytomousKeyNode> handleKeychoiceLeads( EfloraImportState state, PolytomousKey key, Element elKeychoice, Taxon taxon, KeyStatement question, Feature feature) {
471 List<PolytomousKeyNode> childNodes = new ArrayList<PolytomousKeyNode>();
472 List<Element> leads = elKeychoice.getChildren("lead");
473 for(Element elLead : leads){
474 PolytomousKeyNode childNode = handleLead(state, key, elLead, taxon, question, feature);
475 childNodes.add(childNode);
476 }
477 return childNodes;
478 }
479
480
481 /**
482 * @param state
483 * @param elKeychoice
484 * @return
485 */
486 private KeyStatement handleKeychoiceChar(EfloraImportState state, Element elKeychoice) {
487 KeyStatement statement = null;
488 Attribute charAttr = elKeychoice.getAttribute("char");
489 if (charAttr != null){
490 String charStr = charAttr.getValue();
491 if (StringUtils.isNotBlank(charStr)){
492 statement = KeyStatement.NewInstance(charStr);
493 }
494 elKeychoice.removeAttribute("char");
495 }
496 return statement;
497 }
498
499 /**
500 * @param state
501 * @param elKeychoice
502 * @return
503 */
504 private Feature handleKeychoiceCharAsFeature(EfloraImportState state, Element elKeychoice) {
505 Feature feature = null;
506 Attribute charAttr = elKeychoice.getAttribute("char");
507 if (charAttr != null){
508 String charStr = charAttr.getValue();
509 feature = getFeature(charStr, state);
510 elKeychoice.removeAttribute("char");
511 }
512 return feature;
513 }
514
515
516 private PolytomousKeyNode handleLead(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, KeyStatement question, Feature feature) {
517 PolytomousKeyNode node = PolytomousKeyNode.NewInstance();
518 //TODO the char attribute in the keychoice is more a feature than a question
519 //needs to be discussed on model side
520 node.setQuestion(question);
521 // node.setFeature(feature);
522
523 //text
524 String text = handleLeadText(elLead, node);
525
526 //num
527 handleLeadNum(elLead, text);
528
529 //goto
530 handleLeadGoto(state, key, elLead, taxon, node);
531
532 //others
533 verifyNoAttribute(elLead);
534
535 return node;
536 }
537
538
539 /**
540 * @param elLead
541 * @param node
542 * @return
543 */
544 private String handleLeadText(Element elLead, PolytomousKeyNode node) {
545 String text = elLead.getAttributeValue("text").trim();
546 if (StringUtils.isBlank(text)){
547 logger.warn("Empty text in lead");
548 }
549 elLead.removeAttribute("text");
550 KeyStatement statement = KeyStatement.NewInstance(text);
551 node.setStatement(statement);
552 return text;
553 }
554
555
556 /**
557 * @param state
558 * @param key
559 * @param elLead
560 * @param taxon
561 * @param node
562 */
563 private void handleLeadGoto(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, PolytomousKeyNode node) {
564 Attribute gotoAttr = elLead.getAttribute("goto");
565 if (gotoAttr != null){
566 String strGoto = gotoAttr.getValue().trim();
567 //create key
568 UnmatchedLeadsKey gotoKey = null;
569 if (isInternalNode(strGoto)){
570 gotoKey = UnmatchedLeadsKey.NewInstance(key, strGoto);
571 }else{
572 String taxonKey = makeTaxonKey(strGoto, taxon);
573 gotoKey = UnmatchedLeadsKey.NewInstance(taxonKey);
574 }
575 //
576 UnmatchedLeads openKeys = state.getUnmatchedLeads();
577 if (gotoKey.isInnerLead()){
578 Set<PolytomousKeyNode> existingNodes = openKeys.getNodes(gotoKey);
579 for (PolytomousKeyNode existingNode : existingNodes){
580 node.addChild(existingNode);
581 }
582 }
583 openKeys.addKey(gotoKey, node);
584 //remove attribute (need for consistency check)
585 elLead.removeAttribute("goto");
586 }else{
587 logger.warn("lead has no goto attribute");
588 }
589 }
590
591
592 /**
593 * @param elLead
594 * @param text
595 */
596 private void handleLeadNum(Element elLead, String text) {
597 Attribute numAttr = elLead.getAttribute("num");
598 if (numAttr != null){
599 //TODO num
600 String num = numAttr.getValue();
601 elLead.removeAttribute("num");
602 }else{
603 logger.info("Keychoice has no num attribute: " + text);
604 }
605 }
606
607
608 private String makeTaxonKey(String strGoto, Taxon taxon) {
609 String result = "";
610 if (strGoto == null){
611 return "";
612 }
613 String strGenusName = taxon.getName().getGenusOrUninomial();
614 strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", ""); //replace all brackets
615 strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
616
617 strGoto = strGoto.trim();
618 String[] split = strGoto.split("\\s");
619 for (int i = 0; i<split.length; i++){
620 String single = split[i];
621 if (isGenusAbbrev(single, strGenusName)){
622 split[i] = strGenusName;
623 }
624 // if (isInfraSpecificMarker(single)){
625 // String strSpeciesName = taxon.getName().getSpecificEpithet();
626 // split[i] = strGenusName + " " + strSpeciesName + " ";
627 // }
628 result = (result + " " + split[i]).trim();
629 }
630 return result;
631 }
632
633
634 private boolean isInfraSpecificMarker(String single) {
635 try {
636 if (Rank.getRankByIdInVoc(single).isInfraSpecific()){
637 return true;
638 }
639 } catch (UnknownCdmTypeException e) {
640 return false;
641 }
642 return false;
643 }
644
645
646 private boolean isGenusAbbrev(String single, String strGenusName) {
647 if (! single.matches("[A-Z]\\.?")) {
648 return false;
649 }else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
650 return false;
651 }else{
652 return single.charAt(0) == strGenusName.charAt(0);
653 }
654 }
655
656
657 private boolean isInternalNode(String strGoto) {
658 return CdmUtils.isNumeric(strGoto);
659 }
660
661
662 private void makeKeyNotes(Element keyElement, PolytomousKey key) {
663 Element elNotes = keyElement.getChild("notes");
664 if (elNotes != null){
665 keyElement.removeContent(elNotes);
666 String notes = elNotes.getTextNormalize();
667 if (StringUtils.isNotBlank(notes)){
668 key.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
669 }
670 }
671 }
672
673
674 private String makeKeyTitle(Element keyElement) {
675 String title = "- no title - ";
676 Attribute titleAttr = keyElement.getAttribute("title");
677 keyElement.removeAttribute(titleAttr);
678 if (titleAttr == null){
679 Element elTitle = keyElement.getChild("keytitle");
680 keyElement.removeContent(elTitle);
681 if (elTitle != null){
682 title = elTitle.getTextNormalize();
683 }
684 }else{
685 title = titleAttr.getValue();
686 }
687 return title;
688 }
689
690
691 /**
692 * @param state
693 * @param element
694 * @param taxon
695 */
696 private TextData handleChromosomes(EfloraImportState state, Element element, Taxon taxon) {
697 Feature chromosomeFeature = getFeature("chromosomes", state);
698 verifyNoAttribute(element);
699 verifyNoChildren(element);
700 String value = element.getTextNormalize();
701 value = replaceStart(value, "Chromosomes");
702 String chromosomesPart = getChromosomesPart(value);
703 String references = value.replace(chromosomesPart, "").trim();
704 chromosomesPart = chromosomesPart.replace(":", "").trim();
705 return addDescriptionElement(state, taxon, chromosomesPart, chromosomeFeature, references);
706 }
707
708
709 /**
710 * @param ref
711 * @param string
712 * @return
713 */
714 private void makeOriginalSourceReferences(ISourceable sourcable, String splitter, String refAll) {
715 String[] splits = refAll.split(splitter);
716 for (String strRef: splits){
717 Reference ref = ReferenceFactory.newGeneric();
718 ref.setTitleCache(strRef, true);
719 String refDetail = parseReferenceYearAndDetail(ref);
720 sourcable.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, ref, refDetail);
721 }
722
723
724 //TODO use regex instead
725 /* String detailResult = null;
726 String titleToParse = ref.getTitleCache();
727 String reReference = "^\\.{1,}";
728 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
729 String reYear = "\\([1-2]{1}[0-9]{3}\\)";
730 String reYearPeriod = reYear + "(-" + reYear + ")+";
731 String reDetail = "\\.{1,10}$";
732 */
733 }
734
735
736 /**
737 * @param value
738 * @return
739 */
740 private String getChromosomesPart(String str) {
741 Pattern pattern = Pattern.compile("2n\\s*=\\s*\\d{1,2}:");
742 Matcher matcher = pattern.matcher(str);
743 if (matcher.find()){
744 return matcher.group(0);
745 }else{
746 logger.warn("Chromosomes could not be parsed: " + str);
747 }
748 return str;
749 }
750
751
752 /**
753 * @param state
754 * @param element
755 * @param taxon
756 */
757 private TextData handleTaxonNotes(EfloraImportState state, Element element, Taxon taxon) {
758 TextData result = null;
759 verifyNoChildren(element, true);
760 //verifyNoAttribute(element);
761 List<Attribute> attributes = element.getAttributes();
762 for (Attribute attribute : attributes){
763 if (! attribute.getName().equalsIgnoreCase("class")){
764 logger.warn("Char has unhandled attribute " + attribute.getName());
765 }else{
766 String classValue = attribute.getValue();
767 result = handleDescriptiveElement(state, element, taxon, classValue);
768 }
769 }
770 //if no class attribute exists, handle as note
771 if (attributes.isEmpty()){
772 result = handleDescriptiveElement(state, element, taxon, "Note");
773 }
774
775 //Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
776 //taxon.addAnnotation(annotation);
777 return result; //annotation;
778 }
779
780
781 /**
782 * @param state
783 * @param element
784 * @param taxon
785 * @param result
786 * @param attribute
787 * @return
788 */
789 private TextData handleDescriptiveElement(EfloraImportState state, Element element, Taxon taxon, String classValue) {
790 TextData result = null;
791 Feature feature = getFeature(classValue, state);
792 if (feature == null){
793 logger.warn("Unhandled feature: " + classValue);
794 }else{
795 String value = element.getValue();
796 value = replaceStart(value, "Notes");
797 value = replaceStart(value, "Note");
798 result = addDescriptionElement(state, taxon, value, feature, null);
799 }
800 return result;
801 }
802
803
804 private void removeBr(Element element) {
805 element.removeChildren("Br");
806 element.removeChildren("br");
807 element.removeChildren("BR");
808 }
809
810
811 /**
812 * @param state
813 * @param element
814 * @param taxon
815 */
816 private TextData handleUses(EfloraImportState state, Element element, Taxon taxon) {
817 verifyNoAttribute(element);
818 verifyNoChildren(element, true);
819 String value = element.getTextNormalize();
820 value = replaceStart(value, "Uses");
821 Feature feature = Feature.USES();
822 return addDescriptionElement(state, taxon, value, feature, null);
823
824 }
825
826
827 /**
828 * @param state
829 * @param element
830 * @param taxon
831 * @param unhandledDescriptionChildren
832 */
833 private DescriptionElementBase handleDistribution(EfloraImportState state, Element element, Taxon taxon) {
834 verifyNoAttribute(element);
835 verifyNoChildren(element, true);
836 String value = element.getTextNormalize();
837 value = replaceStart(value, "Distribution");
838 Feature feature = Feature.DISTRIBUTION();
839 //distribution parsing almost impossible as there is lots of freetext in the distribution tag
840 return addDescriptionElement(state, taxon, value, feature, null);
841 }
842
843
844 /**
845 * @param state
846 * @param element
847 * @param taxon
848 * @param unhandledDescriptionChildren
849 */
850 private TextData handleEcology(EfloraImportState state, Element elEcology, Taxon taxon) {
851 verifyNoAttribute(elEcology);
852 verifyNoChildren(elEcology, true);
853 String value = elEcology.getTextNormalize();
854 Feature feature = Feature.ECOLOGY();
855 if (value.startsWith("Habitat & Ecology")){
856 feature = getFeature("Habitat & Ecology", state);
857 value = replaceStart(value, "Habitat & Ecology");
858 }else if (value.startsWith("Habitat")){
859 value = replaceStart(value, "Habitat");
860 feature = getFeature("Habitat", state);
861 }
862 return addDescriptionElement(state, taxon, value, feature, null);
863 }
864
865
866
867 /**
868 * @param value
869 * @param replacementString
870 */
871 private String replaceStart(String value, String replacementString) {
872 if (value.startsWith(replacementString) ){
873 value = value.substring(replacementString.length()).trim();
874 }
875 while (value.startsWith("-") || value.startsWith("–") ){
876 value = value.substring("-".length()).trim();
877 }
878 return value;
879 }
880
881
882 /**
883 * @param value
884 * @param replacementString
885 */
886 protected String removeTrailing(String value, String replacementString) {
887 if (value == null){
888 return null;
889 }
890 if (value.endsWith(replacementString) ){
891 value = value.substring(0, value.length() - replacementString.length()).trim();
892 }
893 return value;
894 }
895
896 /**
897 * @param state
898 * @param element
899 * @param taxon
900 * @param unhandledNomeclatureChildren
901 */
902 private void handleNomenclature(EfloraImportState state, Element elNomenclature, Taxon taxon, Set<String> unhandledChildren) {
903 verifyNoAttribute(elNomenclature);
904
905 List<Element> elements = elNomenclature.getChildren();
906 for (Element element : elements){
907 if (element.getName().equals("homotypes")){
908 handleHomotypes(state, element, taxon);
909 }else if (element.getName().equals("notes")){
910 handleNomenclatureNotes(state, element, taxon);
911 }else{
912 unhandledChildren.add(element.getName());
913 }
914 }
915
916 }
917
918
919
920 private void handleNomenclatureNotes(EfloraImportState state, Element elNotes, Taxon taxon) {
921 verifyNoAttribute(elNotes);
922 verifyNoChildren(elNotes);
923 String notesText = elNotes.getTextNormalize();
924 Annotation annotation = Annotation.NewInstance(notesText, AnnotationType.EDITORIAL(), Language.DEFAULT());
925 taxon.addAnnotation(annotation);
926 }
927
928
929
930 private static Set<String> unhandledHomotypeChildren = new HashSet<String>();
931 /**
932 * @param state
933 * @param element
934 * @param taxon
935 */
936 private void handleHomotypes(EfloraImportState state, Element elHomotypes, Taxon taxon) {
937 verifyNoAttribute(elHomotypes);
938
939 List<Element> elements = elHomotypes.getChildren();
940 HomotypicalGroup homotypicalGroup = null;
941 for (Element element : elements){
942 if (element.getName().equals("nom")){
943 homotypicalGroup = handleNom(state, element, taxon, homotypicalGroup);
944 }else{
945 unhandledHomotypeChildren.add(element.getName());
946 }
947 }
948
949 }
950
951 private static Set<String> unhandledNomChildren = new HashSet<String>();
952
953 /**
954 * @param state
955 * @param element
956 * @param taxon
957 */
958 private HomotypicalGroup handleNom(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
959 List<Attribute> attributes = elNom.getAttributes();
960
961 boolean taxonBaseClassType = false;
962 for (Attribute attribute : attributes){
963 if (! attribute.getName().equalsIgnoreCase("class")){
964 logger.warn("Nom has unhandled attribute " + attribute.getName());
965 }else{
966 String classValue = attribute.getValue();
967 if (classValue.equalsIgnoreCase("acceptedname")){
968 homotypicalGroup = handleNomTaxon(state, elNom, taxon,homotypicalGroup, false);
969 taxonBaseClassType = true;
970 }else if (classValue.equalsIgnoreCase("synonym")){
971 homotypicalGroup = handleNomTaxon(state, elNom, taxon, homotypicalGroup, true);
972 taxonBaseClassType = true;
973 }else if (classValue.equalsIgnoreCase("typeref")){
974 handleTypeRef(state, elNom, taxon, homotypicalGroup);
975 }else{
976 logger.warn("Unhandled class value for nom: " + classValue);
977 }
978
979 }
980 }
981
982 List<Element> elements = elNom.getChildren();
983 for (Element element : elements){
984 if (element.getName().equals("name") || element.getName().equals("homonym") ){
985 if (taxonBaseClassType == false){
986 logger.warn("Name or homonym tag not allowed in non taxon nom tag");
987 }
988 }else{
989 unhandledNomChildren.add(element.getName());
990 }
991 }
992
993 return homotypicalGroup;
994
995 }
996
997 /**
998 * @param state
999 * @param elNom
1000 * @param taxon
1001 * @param homotypicalGroup
1002 */
1003 protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
1004 verifyNoChildren(elNom);
1005 String typeRef = elNom.getTextNormalize();
1006 typeRef = removeStartingTypeRefMinus(typeRef);
1007
1008 String[] split = typeRef.split(":");
1009 if (split.length < 2){
1010 logger.warn("typeRef has no ':' : " + typeRef);
1011 }else if (split.length > 2){
1012 logger.warn("typeRef has more than 1 ':' : " + typeRef);
1013 }else{
1014 StringBuffer typeType = new StringBuffer(split[0]);
1015 String typeText = split[1].trim();
1016 TypeDesignationBase typeDesignation = getTypeDesignationAndReference(typeType);
1017
1018 //Name Type Desitnations
1019 if (typeDesignation instanceof NameTypeDesignation){
1020 makeNameTypeDesignations(typeType, typeText, typeDesignation);
1021 }
1022 //SpecimenTypeDesignations
1023 else if (typeDesignation instanceof SpecimenTypeDesignation){
1024 makeSpecimenTypeDesignation(typeType, typeText, typeDesignation);
1025 }else{
1026 logger.error("Unhandled type designation class" + typeDesignation.getClass().getName());
1027 }
1028 for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
1029 name.addTypeDesignation(typeDesignation, true);
1030 }
1031 }
1032 }
1033
1034
1035 /**
1036 * @param typeRef
1037 * @return
1038 */
1039 protected String removeStartingTypeRefMinus(String typeRef) {
1040 typeRef = replaceStart(typeRef, "-");
1041 typeRef = replaceStart(typeRef, "—");
1042 typeRef = replaceStart(typeRef, "\u002d");
1043 typeRef = replaceStart(typeRef, "\u2013");
1044 typeRef = replaceStart(typeRef, "--");
1045 return typeRef;
1046 }
1047
1048 /**
1049 * @param typeType
1050 * @param typeText
1051 * @param typeDesignation
1052 */
1053 private void makeNameTypeDesignations(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1054 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1055 //do nothing
1056 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1057 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1058 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype")){
1059 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1060 }else{
1061 logger.warn("Unhandled type string: " + typeType + "(" + CharUtils.unicodeEscaped(typeType.charAt(0)) + ")");
1062 }
1063 //clean
1064 typeText = cleanNameType(typeText);
1065 //create name
1066 BotanicalName nameType = (BotanicalName)parser.parseFullName(typeText, NomenclaturalCode.ICNAFP, Rank.SPECIES());
1067 ((NameTypeDesignation) typeDesignation).setTypeName(nameType);
1068 //TODO wie können NameTypes den Namen zugeordnet werden? - wird aber vom Portal via NameCache matching gemacht
1069 }
1070
1071
1072 private String cleanNameType(String typeText) {
1073 String result;
1074 String[] split = typeText.split("\\[.*\\].?");
1075 result = split[0];
1076 return result;
1077 }
1078
1079
1080 /**
1081 * @param typeType
1082 * @param typeText
1083 * @param typeDesignation
1084 */
1085 protected void makeSpecimenTypeDesignation(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1086 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1087 //do nothing
1088 }else if (typeType.toString().trim().equalsIgnoreCase("Neotype") || typeType.toString().trim().equalsIgnoreCase("Neotypes")){
1089 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.NEOTYPE());
1090 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype") || typeType.toString().trim().equalsIgnoreCase("Syntypes")){
1091 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1092 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1093 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1094 }else if (typeType.toString().trim().equalsIgnoreCase("Paratype")){
1095 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.PARATYPE());
1096 }else{
1097 logger.warn("Unhandled type string: " + typeType);
1098 }
1099 DerivedUnit specimen = DerivedUnit.NewPreservedSpecimenInstance();
1100 if (typeText.length() > 255){
1101 specimen.setTitleCache(typeText.substring(0, 252) + "...", true);
1102 }else{
1103 specimen.setTitleCache(typeText, true);
1104 }
1105 specimen.putDefinition(Language.ENGLISH(), typeText);
1106 ((SpecimenTypeDesignation) typeDesignation).setTypeSpecimen(specimen);
1107 }
1108
1109 private TypeDesignationBase getTypeDesignationAndReference(StringBuffer typeType) {
1110 TypeDesignationBase result;
1111 Reference ref = parseTypeDesignationReference(typeType);
1112 if (typeType.indexOf(" species")>-1 || typeType.indexOf("genus")>-1){
1113 if (typeType.indexOf(" species")>-1 ){
1114 result = NameTypeDesignation.NewInstance();
1115 int start = typeType.indexOf(" species");
1116 typeType.replace(start, start + " species".length(), "");
1117 }else {
1118 result = NameTypeDesignation.NewInstance();
1119 int start = typeType.indexOf(" genus");
1120 typeType.replace(start, start + " genus".length(), "");
1121 }
1122 }else{
1123 result = SpecimenTypeDesignation.NewInstance();
1124 }
1125 result.setCitation(ref);
1126 return result;
1127 }
1128
1129
1130 private Reference parseTypeDesignationReference(StringBuffer typeType) {
1131 Reference result = null;
1132 String reBracketReference = "\\(.*\\)";
1133 Pattern patBracketReference = Pattern.compile(reBracketReference);
1134 Matcher matcher = patBracketReference.matcher(typeType);
1135 if (matcher.find()){
1136 String refString = matcher.group();
1137 int start = typeType.indexOf(refString);
1138 typeType.replace(start, start + refString.length(), "");
1139 refString = refString.replace("(", "").replace(")", "").trim();
1140 Reference ref = ReferenceFactory.newGeneric();
1141 ref.setTitleCache(refString, true);
1142 result = ref;
1143 }
1144 return result;
1145 }
1146
1147
1148 /**
1149 * @param state
1150 * @param elNom
1151 * @param taxon
1152 */
1153 //body/taxon/
1154 private HomotypicalGroup handleNomTaxon(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1155 INonViralName nvn = makeName(taxon, homotypicalGroup, isSynonym);
1156 TaxonNameBase<?,?> name = TaxonNameBase.castAndDeproxy(nvn);
1157 String num = null;
1158
1159 boolean hasGenusInfo = false;
1160 TeamOrPersonBase<?> lastTeam = null;
1161
1162 //genus
1163 List<Element> elGenus = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "genus");
1164 if (elGenus.size() > 0){
1165 hasGenusInfo = true;
1166 }else{
1167 logger.debug ("No Synonym Genus");
1168 }
1169 //infra rank -> needed to handle authors correctly
1170 List<Element> elInfraRank = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "infrank");
1171 Rank infraRank = null;
1172 infraRank = handleInfRank(name, elInfraRank, infraRank);
1173
1174 //get left over elements
1175 List<Element> elements = elNom.getChildren();
1176 elements.removeAll(elInfraRank);
1177
1178 for (Element element : elements){
1179 if (element.getName().equals("name")){
1180 String classValue = element.getAttributeValue("class");
1181 String value = element.getValue().trim();
1182 if (classValue.equalsIgnoreCase("genus") || classValue.equalsIgnoreCase("family") ){
1183 name.setGenusOrUninomial(value);
1184 }else if (classValue.equalsIgnoreCase("family") ){
1185 name.setGenusOrUninomial(value);
1186 name.setRank(Rank.FAMILY());
1187 }else if (classValue.equalsIgnoreCase("subgenus")){
1188 //name.setInfraGenericEpithet(value);
1189 name.setNameCache(value.replace(":", "").trim());
1190 name.setRank(Rank.SUBGENUS());
1191 }else if (classValue.equalsIgnoreCase("epithet") ){
1192 if (hasGenusInfo == true){
1193 name.setSpecificEpithet(value);
1194 }else{
1195 handleInfraspecificEpithet(element, classValue, name);
1196 }
1197 }else if (classValue.equalsIgnoreCase("author")){
1198 handleNameAuthors(element, name);
1199 }else if (classValue.equalsIgnoreCase("paraut")){
1200 handleBasionymAuthor(state, element, name, false);
1201 }else if (classValue.equalsIgnoreCase("infrauthor") || classValue.equalsIgnoreCase("infraut")){
1202 handleInfrAuthor(state, element, name, true);
1203 }else if (classValue.equalsIgnoreCase("infrapar") || classValue.equalsIgnoreCase("infrpar") || classValue.equalsIgnoreCase("parauthor") ){
1204 handleBasionymAuthor(state, element, name, true);
1205 }else if (classValue.equalsIgnoreCase("infrepi")){
1206 handleInfrEpi(name, infraRank, value);
1207 }else if (classValue.equalsIgnoreCase("pub")){
1208 lastTeam = handleNomenclaturalReference(name, value);
1209 }else if (classValue.equalsIgnoreCase("usage")){
1210 lastTeam = handleNameUsage(taxon, name, value, lastTeam);
1211 }else if (classValue.equalsIgnoreCase("note")){
1212 handleNameNote(name, value);
1213 }else if (classValue.equalsIgnoreCase("num")){
1214 if (num != null){
1215 logger.warn("Duplicate num: " + value);
1216 }else{
1217 num = value;
1218 }
1219 if (isSynonym == true){
1220 logger.warn("Synonym should not have a num");
1221 }
1222 }else if (classValue.equalsIgnoreCase("typification")){
1223 logger.warn("Typification should not be a nom class");
1224 }else{
1225 logger.warn("Unhandled name class: " + classValue);
1226 }
1227 }else if(element.getName().equals("homonym")){
1228 handleHomonym(state, element, name);
1229 }else{
1230 // child element is not "name"
1231 unhandledNomChildren.add(element.getName());
1232 }
1233 }
1234
1235 //handle key
1236 if (! isSynonym){
1237 String taxonString = name.getNameCache();
1238 //try to find matching lead nodes
1239 UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, taxonString);
1240 Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
1241 //same without using the num
1242 if (num != null){
1243 UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", taxonString);
1244 handleMatchingNodes(state, taxon, noNumLeadsKey);
1245 }
1246 if (matchingNodes.isEmpty() && num != null){
1247 logger.warn("Taxon has num but no matching nodes exist: " + num+ ", Key: " + leadsKey.toString());
1248 }
1249 }
1250
1251 //test nom element has no text
1252 if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("—", "").replace("\u002d","").replace("\u2013", ""))){
1253 String strElNom = elNom.getTextNormalize();
1254 if ("?".equals(strElNom)){
1255 handleQuestionMark(name, taxon);
1256 }
1257 // Character c = strElNom.charAt(0);
1258 //System.out.println(CharUtils.unicodeEscaped(c));
1259 logger.warn("Nom tag has text: " + strElNom);
1260 }
1261
1262 return name.getHomotypicalGroup();
1263 }
1264
1265
1266 private void handleQuestionMark(INonViralName name, Taxon taxon) {
1267 int count = name.getTaxonBases().size();
1268 if (count != 1){
1269 logger.warn("Name has " + count + " taxa. This is not handled for question mark");
1270 }else{
1271 TaxonBase taxonBase = name.getTaxonBases().iterator().next();
1272 taxonBase.setDoubtful(true);
1273 }
1274 }
1275
1276
1277 //merge with handleNomTaxon
1278 private void handleHomonym(EfloraImportState state, Element elHomonym, TaxonNameBase upperName) {
1279 verifyNoAttribute(elHomonym);
1280
1281 //hommonym name
1282 BotanicalName homonymName = BotanicalName.NewInstance(upperName.getRank());
1283 homonymName.setGenusOrUninomial(upperName.getGenusOrUninomial());
1284 homonymName.setInfraGenericEpithet(upperName.getInfraGenericEpithet());
1285 homonymName.setSpecificEpithet(upperName.getSpecificEpithet());
1286 homonymName.setInfraSpecificEpithet(upperName.getInfraSpecificEpithet());
1287
1288 for (Element elName : (List<Element>)elHomonym.getChildren("name")){
1289 String classValue = elName.getAttributeValue("class");
1290 String value = elName.getValue().trim();
1291 if (classValue.equalsIgnoreCase("genus") ){
1292 homonymName.setGenusOrUninomial(value);
1293 }else if (classValue.equalsIgnoreCase("epithet") ){
1294 homonymName.setSpecificEpithet(value);
1295 }else if (classValue.equalsIgnoreCase("author")){
1296 handleNameAuthors(elName, homonymName);
1297 }else if (classValue.equalsIgnoreCase("paraut")){
1298 handleBasionymAuthor(state, elName, homonymName, true);
1299 }else if (classValue.equalsIgnoreCase("pub")){
1300 handleNomenclaturalReference(homonymName, value);
1301 }else if (classValue.equalsIgnoreCase("note")){
1302 handleNameNote(homonymName, value);
1303 }else{
1304 logger.warn("Unhandled class value: " + classValue);
1305 }
1306 }
1307 //TODO verify other information
1308
1309
1310 //rel
1311 boolean homonymIsLater = false;
1312 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1313 if (upperName.getNomenclaturalReference() != null && homonymName.getNomenclaturalReference() != null){
1314 TimePeriod homonymYear = homonymName.getNomenclaturalReference().getDatePublished();
1315 TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();
1316 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1317 }else{
1318 if (upperName.getNomenclaturalReference() == null){
1319 logger.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName.getTitleCache());
1320 }
1321 if (homonymName.getNomenclaturalReference() == null){
1322 logger.warn("Homonym does not have a nomenclatural reference or year: " + homonymName.getTitleCache());
1323 }
1324 }
1325 if (homonymIsLater){
1326 homonymName.addRelationshipToName(upperName, relType, null);
1327 }else{
1328 upperName.addRelationshipToName(homonymName, relType, null);
1329 }
1330 }
1331
1332
1333 /**
1334 * @param state
1335 * @param taxon
1336 * @param leadsKey
1337 * @return
1338 */
1339 private Set<PolytomousKeyNode> handleMatchingNodes(EfloraImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
1340 Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
1341 for (PolytomousKeyNode matchingNode : matchingNodes){
1342 state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
1343 matchingNode.setTaxon(taxon);
1344 state.getPolytomousKeyNodesToSave().add(matchingNode);
1345 }
1346 return matchingNodes;
1347 }
1348
1349
1350 private void handleNameNote(INonViralName name, String value) {
1351 logger.warn("Name note: " + value + ". Available in portal?");
1352 Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.DEFAULT());
1353 name.addAnnotation(annotation);
1354 }
1355
1356
1357 /**
1358 * @param taxon
1359 * @param name
1360 * @param value
1361 */
1362 protected TeamOrPersonBase handleNameUsage(Taxon taxon, INonViralName name, String referenceTitle, TeamOrPersonBase lastTeam) {
1363 Reference ref = ReferenceFactory.newGeneric();
1364 referenceTitle = removeStartingSymbols(referenceTitle, ref);
1365
1366 ref.setTitleCache(referenceTitle, true);
1367 String microReference = parseReferenceYearAndDetail(ref);
1368 TeamOrPersonBase<?> team = getReferenceAuthor(ref);
1369 parseReferenceType(ref);
1370 if (team == null){
1371 team = lastTeam;
1372 }
1373 ref.setAuthorship(team);
1374
1375 TaxonDescription description = getDescription(taxon);
1376 TextData textData = TextData.NewInstance(Feature.CITATION());
1377 textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, ref, microReference, (TaxonNameBase)name, null);
1378 description.addElement(textData);
1379 return team;
1380 }
1381
1382
1383 /**
1384 * @param referenceTitle
1385 * @param ref
1386 * @return
1387 */
1388 private String removeStartingSymbols(String referenceTitle, Reference ref) {
1389 if (referenceTitle.startsWith(";") || referenceTitle.startsWith(",") || referenceTitle.startsWith(":")){
1390 referenceTitle = referenceTitle.substring(1).trim();
1391 ref.setTitleCache(referenceTitle);
1392 }
1393 return referenceTitle;
1394 }
1395
1396
1397 private void parseReferenceType(Reference ref) {
1398 String title = ref.getTitle();
1399 if (title == null){
1400 return;
1401 }
1402 title = title.trim();
1403 //no in reference
1404 if (! title.startsWith("in ")){
1405 ref.setType(ReferenceType.Book);
1406 return;
1407 }
1408
1409 title = title.substring(3);
1410 //in reference
1411 //no ,
1412 if (title.indexOf(",") == -1){
1413 ref.setType(ReferenceType.Article);
1414 IJournal journal = ReferenceFactory.newJournal();
1415 journal.setTitle(title);
1416 ref.setTitle(null);
1417 ref.setInJournal(journal);
1418 //return;
1419 }else{
1420 //,-references
1421 ref.setType(ReferenceType.BookSection);
1422 String[] split = (title).split(",\\s*[A-Z]");
1423 if (split.length <= 1){
1424 logger.warn("Can not fully decide what reference type. Guess it is a book section: " + title );
1425 }
1426 IBook book = ReferenceFactory.newBook();
1427 Team bookTeam = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1428 try {
1429 title = title.substring(split[0].length() + 1).trim();
1430 } catch (Exception e) {
1431 logger.error("ERROR occurred when trying to split title: " + title + "; split[0]: + " + split[0]);
1432 }
1433 book.setTitle(title);
1434 book.setAuthorship(bookTeam);
1435 book.setDatePublished(ref.getDatePublished());
1436 ref.setTitle(null);
1437 ref.setInBook(book);
1438 }
1439 }
1440
1441
1442 protected Team getReferenceAuthor (Reference ref) {
1443 boolean isCache = false;
1444 String referenceTitle = ref.getTitle();
1445 if (referenceTitle == null){
1446 isCache = true;
1447 referenceTitle = ref.getTitleCache();
1448 }
1449 //in references
1450 String[] split = (" " + referenceTitle).split(" in ");
1451 if (split.length > 1){
1452 if (StringUtils.isNotBlank(split[0])){
1453 //' in ' is within the reference string, take the preceding string as the team
1454 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1455 if (! isCache){
1456 ref.setTitle("in " + split[1]);
1457 }
1458 return team;
1459 }else{
1460 //string starts with in therefore no author is given
1461 return null;
1462 }
1463 }
1464 //no ,-reference
1465 split = referenceTitle.split(",");
1466 if (split.length < 2){
1467 //no author is given
1468 return null;
1469 }
1470
1471 //,-references
1472 split = (referenceTitle).split(",\\s*[A-Z]");
1473 if (split.length > 1){
1474 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1475 if (! isCache){
1476 ref.setTitle(referenceTitle.substring(split[0].length()+1).trim());
1477 }
1478 return team;
1479 }else{
1480 logger.warn("Can't decide if a usage has an author: " + referenceTitle );
1481 return null;
1482 }
1483 }
1484
1485
1486 /**
1487 * Replaced by <homonym> tag but still in use for exceptions
1488 * @param detail
1489 * @param name
1490 * @return
1491 */
1492 protected String parseHomonym(String detail, TaxonNameBase name) {
1493 String result;
1494 if (detail == null){
1495 return detail;
1496 }
1497
1498
1499 //non RE
1500 String reNon = "(\\s|,)non\\s";
1501 Pattern patReference = Pattern.compile(reNon);
1502 Matcher matcher = patReference.matcher(detail);
1503 if (matcher.find()){
1504 int start = matcher.start();
1505 int end = matcher.end();
1506
1507 if (detail != null){
1508 logger.warn("Unhandled non part: " + detail.substring(start));
1509 return detail;
1510 }
1511
1512 result = detail.substring(0, start);
1513
1514 //homonym string
1515 String homonymString = detail.substring(end);
1516
1517 //hommonym name
1518 BotanicalName homonymName = BotanicalName.NewInstance(name.getRank());
1519 homonymName.setGenusOrUninomial(name.getGenusOrUninomial());
1520 homonymName.setInfraGenericEpithet(name.getInfraGenericEpithet());
1521 homonymName.setSpecificEpithet(name.getSpecificEpithet());
1522 homonymName.setInfraSpecificEpithet(name.getInfraSpecificEpithet());
1523 Reference homonymNomRef = ReferenceFactory.newGeneric();
1524 homonymNomRef.setTitleCache(homonymString, true);
1525 String homonymNomRefDetail = parseReferenceYearAndDetail(homonymNomRef);
1526 homonymName.setNomenclaturalMicroReference(homonymNomRefDetail);
1527 String authorTitle = homonymNomRef.getTitleCache();
1528 Team team = Team.NewTitledInstance(authorTitle, authorTitle);
1529 homonymNomRef.setAuthorship(team);
1530 homonymNomRef.setTitle("");
1531 homonymNomRef.setProtectedTitleCache(false);
1532
1533 //rel
1534 boolean homonymIsLater = false;
1535 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1536 TimePeriod homonymYear = homonymNomRef.getDatePublished();
1537 if (name.getNomenclaturalReference() != null){
1538 TimePeriod nameYear = name.getNomenclaturalReference().getDatePublished();
1539 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1540 }else{
1541 logger.warn("Classification name has no nomenclatural reference");
1542 }
1543 if (homonymIsLater){
1544 homonymName.addRelationshipToName(name, relType, null);
1545 }else{
1546 name.addRelationshipToName(homonymName, relType, null);
1547 }
1548
1549 }else{
1550 return detail;
1551 }
1552 return result;
1553 }
1554
1555
1556 /**
1557 * @Xpath body/taxon/nomenclature/homotypes/nom/name[@class="pub"]
1558 * @param name
1559 * @param value
1560 */
1561 protected TeamOrPersonBase handleNomenclaturalReference(TaxonNameBase name, String value) {
1562 Reference nomRef = ReferenceFactory.newGeneric();
1563 nomRef.setTitleCache(value, true);
1564 parseNomStatus(nomRef, name);
1565 String microReference = parseReferenceYearAndDetail(nomRef);
1566 name.setNomenclaturalReference(nomRef);
1567 microReference = parseHomonym(microReference, name);
1568 name.setNomenclaturalMicroReference(microReference);
1569 TeamOrPersonBase<?> team = name.getCombinationAuthorship();
1570 if (team == null){
1571 logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
1572 }else{
1573 nomRef.setAuthorship(team);
1574 }
1575 return team;
1576 }
1577
1578 private void handleInfrAuthor(EfloraImportState state, Element elAuthor, INonViralName name, boolean overwrite) {
1579 String strAuthor = elAuthor.getValue().trim();
1580 if (strAuthor.endsWith(",")){
1581 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1582 }
1583 TeamOrPersonBase[] team = getTeam(strAuthor);
1584 if (name.getCombinationAuthorship() != null && overwrite == false){
1585 logger.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1586 }else{
1587 name.setCombinationAuthorship(team[0]);
1588 name.setExCombinationAuthorship(team[1]);
1589 }
1590
1591
1592 }
1593
1594
1595 /**
1596 * Sets the names rank according to the infrank value
1597 * @param name
1598 * @param elements
1599 * @param elInfraRank
1600 * @param infraRank
1601 * @return
1602 */
1603 private Rank handleInfRank(INonViralName name, List<Element> elInfraRank, Rank infraRank) {
1604 if (elInfraRank.size() == 1){
1605 String strRank = elInfraRank.get(0).getTextNormalize();
1606 try {
1607 infraRank = Rank.getRankByNameOrIdInVoc(strRank);
1608 } catch (UnknownCdmTypeException e) {
1609 try{
1610 infraRank = Rank.getRankByNameOrIdInVoc(strRank + ".");
1611 } catch (UnknownCdmTypeException e2) {
1612 logger.warn("Unknown infrank " + strRank + ". Set infraRank to (null).");
1613 }
1614 }
1615 }else if (elInfraRank.size() > 1){
1616 logger.warn ("There is more than 1 infrank");
1617 }
1618 if (infraRank != null){
1619 name.setRank(infraRank);
1620 }
1621 return infraRank;
1622 }
1623
1624
1625 private void handleInfrEpi(INonViralName name, Rank infraRank, String value) {
1626 if (infraRank != null && infraRank.isInfraSpecific()){
1627 name.setInfraSpecificEpithet(value);
1628 if (CdmUtils.isCapital(value)){
1629 logger.warn("Infraspecific epithet starts with a capital letter: " + value);
1630 }
1631 }else if (infraRank != null && infraRank.isInfraGeneric()){
1632 name.setInfraGenericEpithet(value);
1633 if (! CdmUtils.isCapital(value)){
1634 logger.warn("Infrageneric epithet does not start with a capital letter: " + value);
1635 }
1636 }else{
1637 logger.warn("Infrepi could not be handled: " + value);
1638 }
1639 }
1640
1641
1642
1643 /**
1644 * Returns the (empty) with the correct homotypical group depending on the taxon status
1645 * @param taxon
1646 * @param homotypicalGroup
1647 * @param isSynonym
1648 * @return
1649 */
1650 private TaxonNameBase makeName(Taxon taxon,HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1651 TaxonNameBase<?,?> name;
1652 if (isSynonym){
1653 name = BotanicalName.NewInstance(Rank.SPECIES(), homotypicalGroup);
1654 SynonymType synonymType = SynonymType.HETEROTYPIC_SYNONYM_OF();
1655 if (taxon.getHomotypicGroup().equals(homotypicalGroup)){
1656 synonymType = SynonymType.HOMOTYPIC_SYNONYM_OF();
1657 }
1658 taxon.addSynonymName(name, synonymType);
1659 }else{
1660 name = taxon.getName();
1661 }
1662 return name;
1663 }
1664
1665
1666 /**
1667 * @param element
1668 * @param taxon
1669 */
1670 private void handleInfraspecificEpithet(Element element, String attrValue, INonViralName name) {
1671 String value = element.getTextNormalize();
1672 if (value.indexOf("subsp.") != -1){
1673 //TODO genus and species epi
1674 String infrEpi = value.substring(value.indexOf("subsp.") + 6).trim();
1675 name.setInfraSpecificEpithet(infrEpi);
1676 name.setRank(Rank.SUBSPECIES());
1677 }else if (value.indexOf("var.") != -1){
1678 //TODO genus and species epi
1679 String infrEpi = value.substring(value.indexOf("var.") + 4).trim();
1680 name.setInfraSpecificEpithet(infrEpi);
1681 name.setRank(Rank.VARIETY());
1682 }else{
1683 logger.warn("Unhandled infraspecific type: " + value);
1684 }
1685 }
1686
1687
1688 /**
1689 * @param state
1690 * @param element
1691 * @param name
1692 */
1693 private void handleBasionymAuthor(EfloraImportState state, Element elBasionymAuthor, INonViralName name, boolean overwrite) {
1694 String strAuthor = elBasionymAuthor.getValue().trim();
1695 Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");
1696 if (reBasionymAuthor.matcher(strAuthor).matches()){
1697 strAuthor = strAuthor.substring(1, strAuthor.length()-1);
1698 }else{
1699 logger.warn("Brackets are missing for original combination author " + strAuthor);
1700 }
1701 TeamOrPersonBase[] basionymTeam = getTeam(strAuthor);
1702 if (name.getBasionymAuthorship() != null && overwrite == false){
1703 logger.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1704 }else{
1705 name.setBasionymAuthorship(basionymTeam[0]);
1706 name.setExBasionymAuthorship(basionymTeam[1]);
1707
1708 }
1709 }
1710
1711 private final Map<String, UUID> teamMap = new HashMap<String, UUID>();
1712 /**
1713 * @param elAuthors
1714 * @param name
1715 * @param elNom
1716 */
1717 private void handleNameAuthors(Element elAuthor, INonViralName name) {
1718 if (name.getCombinationAuthorship() != null){
1719 logger.warn("Name already has a combination author. Name: " + name.getTitleCache() + ", Author: " + elAuthor.getTextNormalize());
1720 }
1721 String strAuthor = elAuthor.getValue().trim();
1722 if (strAuthor.endsWith(",")){
1723 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1724 }
1725 if (strAuthor.indexOf("(") > -1 || strAuthor.indexOf(")") > -1){
1726 logger.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor);
1727 }
1728 TeamOrPersonBase[] team = getTeam(strAuthor);
1729 name.setCombinationAuthorship(team[0]);
1730 name.setExCombinationAuthorship(team[1]);
1731 }
1732
1733
1734 /**
1735 * @param strAuthor
1736 * @return
1737 */
1738 private TeamOrPersonBase[] getTeam(String strAuthor) {
1739 TeamOrPersonBase[] result = new TeamOrPersonBase[2];
1740 String[] split = strAuthor.split(" ex ");
1741 String strBaseAuthor = null;
1742 String strExAuthor = null;
1743
1744 if (split.length == 2){
1745 strBaseAuthor = split[1];
1746 strExAuthor = split[0];
1747 }else if (split.length == 1){
1748 strBaseAuthor = split[0];
1749 }else{
1750 logger.warn("Could not parse (ex) author: " + strAuthor);
1751 }
1752 result[0] = getUuidTeam(strBaseAuthor);
1753 if (result[0] == null){
1754 result[0] = parseSingleTeam(strBaseAuthor);
1755 teamMap.put(strBaseAuthor, result[0].getUuid());
1756 }
1757 if (strExAuthor != null){
1758 result[1] = getUuidTeam(strExAuthor);
1759 if (result[1] == null){
1760 result[1] = Team.NewInstance();
1761 result[1].setTitleCache(strExAuthor, true);
1762 teamMap.put(strExAuthor, result[1].getUuid());
1763 }
1764
1765 }
1766 return result;
1767 }
1768
1769
1770 protected TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {
1771 TeamOrPersonBase result;
1772 String[] split = strBaseAuthor.split("&");
1773 if (split.length > 1){
1774 result = Team.NewInstance();
1775 for (String personString : split){
1776 Person person = makePerson(personString);
1777 ((Team)result).addTeamMember(person);
1778 }
1779 }else{
1780 result = makePerson(strBaseAuthor.trim());
1781 }
1782 return result;
1783 }
1784
1785
1786 /**
1787 * @param personString
1788 * @return
1789 */
1790 private Person makePerson(String personString) {
1791 personString = personString.trim();
1792 Person person = Person.NewTitledInstance(personString);
1793 person.setNomenclaturalTitle(personString);
1794 return person;
1795 }
1796
1797
1798 /**
1799 * @param result
1800 * @param strBaseAuthor
1801 */
1802 private TeamOrPersonBase getUuidTeam(String strBaseAuthor) {
1803 UUID uuidTeam = teamMap.get(strBaseAuthor);
1804 return CdmBase.deproxy(getAgentService().find(uuidTeam), TeamOrPersonBase.class);
1805 }
1806
1807
1808 private void handleDescription(EfloraImportState state, Element elDescription, Taxon taxon, Set<String> unhandledChildren) {
1809 verifyNoAttribute(elDescription);
1810
1811 List<Element> elements = elDescription.getChildren();
1812 for (Element element : elements){
1813 if (element.getName().equalsIgnoreCase("char")){
1814 handleChar(state, element, taxon);
1815 }else{
1816 logger.warn("Unhandled description child: " + element.getName());
1817 }
1818 }
1819
1820 }
1821
1822
1823 /**
1824 * @param state
1825 * @param element
1826 * @param taxon
1827 */
1828 private void handleChar(EfloraImportState state, Element element, Taxon taxon) {
1829 List<Attribute> attributes = element.getAttributes();
1830 for (Attribute attribute : attributes){
1831 if (! attribute.getName().equalsIgnoreCase("class")){
1832 logger.warn("Char has unhandled attribute " + attribute.getName());
1833 }else{
1834 String classValue = attribute.getValue();
1835 Feature feature = getFeature(classValue, state);
1836 if (feature == null){
1837 logger.warn("Unhandled feature: " + classValue);
1838 }else{
1839 String value = element.getValue();
1840 addDescriptionElement(state, taxon, value, feature, null);
1841 }
1842
1843 }
1844 }
1845
1846 List<Element> elements = element.getChildren();
1847 if (! elements.isEmpty()){
1848 logger.warn("Char has unhandled children");
1849 }
1850 }
1851
1852
1853 /**
1854 * @param taxon
1855 * @return
1856 */
1857 protected TaxonDescription getDescription(Taxon taxon) {
1858 for (TaxonDescription description : taxon.getDescriptions()){
1859 if (! description.isImageGallery()){
1860 return description;
1861 }
1862 }
1863 TaxonDescription newDescription = TaxonDescription.NewInstance(taxon);
1864 return newDescription;
1865 }
1866
1867
1868 /**
1869 * @param classValue
1870 * @param state
1871 * @return
1872 * @throws UndefinedTransformerMethodException
1873 */
1874 private Feature getFeature(String classValue, EfloraImportState state) {
1875 UUID uuid;
1876 try {
1877 uuid = state.getTransformer().getFeatureUuid(classValue);
1878 if (uuid == null){
1879 logger.info("Uuid is null for " + classValue);
1880 }
1881 String featureText = StringUtils.capitalize(classValue);
1882 //TODO eFlora feature vocabulary
1883 Feature feature = getFeature(state, uuid, featureText, featureText, classValue, null);
1884 if (feature == null){
1885 throw new NullPointerException(classValue + " not recognized as a feature");
1886 }
1887 return feature;
1888 } catch (Exception e) {
1889 logger.warn("Could not create feature for " + classValue + ": " + e.getMessage()) ;
1890 return Feature.UNKNOWN();
1891 }
1892 }
1893
1894
1895 /**
1896 * @param state
1897 * @param element
1898 * @param taxon
1899 * @param unhandledTitleClassess
1900 */
1901 private void handleTitle(EfloraImportState state, Element element, Taxon taxon, Set<String> unhandledTitleClassess) {
1902 // attributes
1903 List<Attribute> attributes = element.getAttributes();
1904 for (Attribute attribute : attributes){
1905 if (! attribute.getName().equalsIgnoreCase("class") ){
1906 if (! attribute.getName().equalsIgnoreCase("num")){
1907 logger.warn("Title has unhandled attribute " + attribute.getName());
1908 }else{
1909 //TODO num attribute in taxon
1910 }
1911 }else{
1912 String classValue = attribute.getValue();
1913 try {
1914 Rank rank;
1915 try {
1916 rank = Rank.getRankByNameOrIdInVoc(classValue);
1917 } catch (Exception e) {
1918 //TODO nc
1919 rank = Rank.getRankByEnglishName(classValue, NomenclaturalCode.ICNAFP, false);
1920 }
1921 taxon.getName().setRank(rank);
1922 if (rank.equals(Rank.FAMILY()) || rank.equals(Rank.GENUS())){
1923 handleGenus(element.getValue(), taxon.getName());
1924 }else if (rank.equals(Rank.SUBGENUS())){
1925 handleSubGenus(element.getValue(), taxon.getName());
1926 }else if (rank.equals(Rank.SECTION_BOTANY())){
1927 handleSection(element.getValue(), taxon.getName());
1928 }else if (rank.equals(Rank.SPECIES())){
1929 handleSpecies(element.getValue(), taxon.getName());
1930 }else if (rank.equals(Rank.SUBSPECIES())){
1931 handleSubSpecies(element.getValue(), taxon.getName());
1932 }else if (rank.equals(Rank.VARIETY())){
1933 handleVariety(element.getValue(), taxon.getName());
1934 }else{
1935 logger.warn("Unhandled rank: " + rank.getLabel());
1936 }
1937 } catch (UnknownCdmTypeException e) {
1938 logger.warn("Unknown rank " + classValue);
1939 unhandledTitleClassess.add(classValue);
1940 }
1941 }
1942 }
1943 List<Element> elements = element.getChildren();
1944 if (! elements.isEmpty()){
1945 logger.warn("Title has unexpected children");
1946 }
1947 UUID uuidTitle = EfloraTransformer.uuidTitle;
1948 ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "title", "title", "title");
1949 taxon.addExtension(element.getTextNormalize(), titleExtension);
1950
1951 }
1952
1953
1954 /**
1955 * @param value
1956 * @param taxonNameBase
1957 */
1958 private void handleSubGenus(String value, INonViralName taxonNameBase) {
1959 String name = value.replace("Subgenus", "").trim();
1960 taxonNameBase.setInfraGenericEpithet(name);
1961 }
1962
1963 /**
1964 * @param value
1965 * @param taxonNameBase
1966 */
1967 private void handleSection(String value, INonViralName taxonNameBase) {
1968 String name = value.replace("Section", "").trim();
1969 taxonNameBase.setInfraGenericEpithet(name);
1970 }
1971
1972 /**
1973 * @param value
1974 * @param taxonNameBase
1975 */
1976 private void handleSpecies(String value, TaxonNameBase taxonNameBase) {
1977 //do nothing
1978 }
1979
1980 /**
1981 * @param value
1982 * @param taxonNameBase
1983 */
1984 private void handleVariety(String value, TaxonNameBase taxonNameBase) {
1985 //do nothing
1986 }
1987
1988 /**
1989 * @param value
1990 * @param taxonNameBase
1991 */
1992 private void handleSubSpecies(String value, TaxonNameBase taxonNameBase) {
1993 //do nothing
1994 }
1995
1996
1997 private final Pattern rexGenusAuthor = Pattern.compile("(\\[|\\().*(\\]|\\))");
1998
1999 /**
2000 * @param value
2001 * @param taxonNameBase
2002 */
2003 protected void handleGenus(String value, INonViralName taxonName) {
2004 Matcher matcher = rexGenusAuthor.matcher(value);
2005 if (matcher.find()){
2006 String author = matcher.group();
2007 // String genus = value.replace(author, "");
2008 author = author.substring(1, author.length() - 1);
2009 Team team = Team.NewInstance();
2010 team.setTitleCache(author, true);
2011 Credit credit = Credit.NewInstance(team, null);
2012 taxonName.addCredit(credit);
2013 // taxonName.setCombinationAuthorship(team);
2014 // taxonName.setGenusOrUninomial(genus);
2015 }else{
2016 logger.info("No Author match for " + value);
2017 }
2018 }
2019
2020
2021 /**
2022 * @param taxon
2023 * @param lastTaxon
2024 */
2025 private void handleTaxonRelation(EfloraImportState state, Taxon taxon, Taxon lastTaxon) {
2026
2027 Classification tree = getTree(state);
2028 if (lastTaxon == null){
2029 tree.addChildTaxon(taxon, null, null);
2030 return;
2031 }
2032 Rank thisRank = taxon.getName().getRank();
2033 Rank lastRank = lastTaxon.getName().getRank();
2034 if (lastTaxon.getTaxonNodes().size() > 0){
2035 TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
2036 if (thisRank.isLower(lastRank ) ){
2037 lastNode.addChildTaxon(taxon, null, null);
2038 fillMissingEpithetsForTaxa(lastTaxon, taxon);
2039 }else if (thisRank.equals(lastRank)){
2040 TaxonNode parent = lastNode.getParent();
2041 if (parent != null){
2042 parent.addChildTaxon(taxon, null, null);
2043 fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
2044 }else{
2045 tree.addChildTaxon(taxon, null, null);
2046 }
2047 }else if (thisRank.isHigher(lastRank)){
2048 handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2049 // TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2050 // parentNode.addChildTaxon(taxon, null, null, null);
2051 }
2052 }else{
2053 logger.warn("Last taxon has no node");
2054 }
2055 }
2056
2057
2058
2059 /**
2060 * @param state
2061 * @return
2062 */
2063 private Classification getTree(EfloraImportState state) {
2064 Classification result = state.getTree(null);
2065 if (result == null){
2066 UUID uuid = state.getConfig().getClassificationUuid();
2067 if (uuid == null){
2068 logger.warn("No classification uuid is defined");
2069 result = getNewClassification(state);
2070 }else{
2071 result = getClassificationService().find(uuid);
2072 if (result == null){
2073 result = getNewClassification(state);
2074 result.setUuid(uuid);
2075 }
2076 }
2077 state.putTree(null, result);
2078 }
2079 return result;
2080 }
2081
2082
2083 private Classification getNewClassification(EfloraImportState state) {
2084 Classification result;
2085 result = Classification.NewInstance(state.getConfig().getClassificationTitle());
2086 state.putTree(null, result);
2087 return result;
2088 }
2089
2090
2091 /**
2092 * @param state
2093 * @param taxon
2094 * @param value
2095 * @param feature
2096 * @return
2097 */
2098 private TextData addDescriptionElement(EfloraImportState state, Taxon taxon, String value, Feature feature, String references) {
2099 TextData textData = TextData.NewInstance(feature);
2100 Language textLanguage = getDefaultLanguage(state);
2101 textData.putText(textLanguage, value);
2102 TaxonDescription description = getDescription(taxon);
2103 description.addElement(textData);
2104 if (references != null){
2105 makeOriginalSourceReferences(textData, ";", references);
2106 }
2107 return textData;
2108 }
2109
2110 private Language getDefaultLanguage(EfloraImportState state) {
2111 UUID defaultLanguageUuid = state.getConfig().getDefaultLanguageUuid();
2112 if (defaultLanguageUuid != null){
2113 Language result = state.getDefaultLanguage();
2114 if (result == null || ! result.getUuid().equals(defaultLanguageUuid)){
2115 result = (Language)getTermService().find(defaultLanguageUuid);
2116 state.setDefaultLanguage(result);
2117 if (result == null){
2118 logger.warn("Default language for " + defaultLanguageUuid + " does not exist.");
2119 }
2120 }
2121 return result;
2122 }else{
2123 return Language.DEFAULT();
2124 }
2125 }
2126
2127
2128 /**
2129 * @param elNomenclature
2130 */
2131 private void verifyNoAttribute(Element element) {
2132 List<Attribute> attributes = element.getAttributes();
2133 if (! attributes.isEmpty()){
2134 logger.warn(element.getName() + " has unhandled attributes: " + attributes.get(0).getValue() + "..." );
2135 }
2136 }
2137
2138 /**
2139 * @param elNomenclature
2140 */
2141 protected void verifyNoChildren(Element element) {
2142 verifyNoChildren(element, false);
2143 }
2144
2145 /**
2146 * @param elNomenclature
2147 */
2148 private void verifyNoChildren(Element element, boolean ignoreLineBreak) {
2149 List<Element> children = element.getChildren();
2150 if (! children.isEmpty()){
2151 if (ignoreLineBreak == true){
2152 for (Element child : children){
2153 if (! child.getName().equalsIgnoreCase("BR")){
2154 logger.warn(element.getName() + " has unhandled child: " + child.getName());
2155 }
2156 }
2157 }else{
2158 logger.warn(element.getName() + " has unhandled children");
2159 }
2160 }
2161 }
2162
2163
2164
2165 /**
2166 * Parses the nomenclatural status from the references titleCache. If a nomenclatural status
2167 * exists it is added to the name and the nom. status part of the references title cache is
2168 * removed. Requires protected title cache.
2169 * @param ref
2170 * @param nonViralName
2171 */
2172 protected void parseNomStatus(Reference ref, TaxonNameBase nonViralName) {
2173 String titleToParse = ref.getTitleCache();
2174
2175 String noStatusTitle = parser.parseNomStatus(titleToParse, nonViralName, true);
2176 if (! noStatusTitle.equals(titleToParse)){
2177 ref.setTitleCache(noStatusTitle, true);
2178 }
2179 }
2180
2181
2182 /**
2183 * Extracts the date published part and returns micro reference
2184 * @param ref
2185 * @return
2186 */
2187 private String parseReferenceYearAndDetail(Reference ref){
2188 String detailResult = null;
2189 String titleToParse = ref.getTitleCache();
2190 titleToParse = removeStartingSymbols(titleToParse, ref);
2191 String reReference = "^\\.{1,}";
2192 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2193 String oneMonth = "(Feb.|Dec.|March|June|July)";
2194 String reYear = oneMonth + "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2195 String secondYear = "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2196
2197 String reYearPeriod = "\\(" + reYear + "(\\-" + secondYear + ")?\\)";
2198 String reDetail = "\\.{1,10}$";
2199
2200 //pattern for the whole string
2201 Pattern patReference = Pattern.compile(/*reReference +*/ reYearPeriod /*+ reDetail */);
2202 Matcher matcher = patReference.matcher(titleToParse);
2203 if (matcher.find()){
2204 int start = matcher.start();
2205 int end = matcher.end();
2206
2207 //title and other information precedes the year part
2208 String title = titleToParse.substring(0, start).trim();
2209 //detail follows the year part
2210 String detail = titleToParse.substring(end).trim();
2211
2212 //time period
2213 String strPeriod = matcher.group().trim();
2214 strPeriod = strPeriod.substring(1, strPeriod.length()-1); //remove brackets
2215 Pattern patStartMonth = Pattern.compile("^" + oneMonth);
2216 matcher = patStartMonth.matcher(strPeriod);
2217 strPeriod = strPeriod.replace(" ", "");
2218 Integer startMonth = null;
2219 if (matcher.find()){
2220 end = matcher.end();
2221 strPeriod = strPeriod.substring(0, end) + " " + strPeriod.substring(end);
2222 startMonth = getMonth(strPeriod.substring(0, end));
2223 }
2224
2225 TimePeriod datePublished = TimePeriodParser.parseString(strPeriod);
2226 if (startMonth != null){
2227 datePublished.setStartMonth(startMonth);
2228 }
2229 ref.setDatePublished(datePublished);
2230 ref.setTitle(title);
2231 detailResult = CdmUtils.removeTrailingDot(detail);
2232 if (detailResult.endsWith(".") || detailResult.endsWith(";") || detailResult.endsWith(",") ){
2233 detailResult = detailResult.substring(0, detailResult.length() -1);
2234 }
2235 ref.setProtectedTitleCache(false);
2236 }else{
2237 logger.warn("Could not parse reference: " + titleToParse);
2238 }
2239 return detailResult;
2240
2241 }
2242
2243
2244
2245 private Integer getMonth(String month) {
2246 if (month.startsWith("Jan")){
2247 return 1;
2248 }else if (month.startsWith("Feb")){
2249 return 2;
2250 }else if (month.startsWith("Mar")){
2251 return 3;
2252 }else if (month.startsWith("Apr")){
2253 return 4;
2254 }else if (month.startsWith("May")){
2255 return 5;
2256 }else if (month.startsWith("Jun")){
2257 return 6;
2258 }else if (month.startsWith("Jul")){
2259 return 7;
2260 }else if (month.startsWith("Aug")){
2261 return 8;
2262 }else if (month.startsWith("Sep")){
2263 return 9;
2264 }else if (month.startsWith("Oct")){
2265 return 10;
2266 }else if (month.startsWith("Nov")){
2267 return 11;
2268 }else if (month.startsWith("Dec")){
2269 return 12;
2270 }else{
2271 logger.warn("Month not yet supported: " + month);
2272 return null;
2273 }
2274 }
2275
2276
2277 /* (non-Javadoc)
2278 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2279 */
2280 @Override
2281 protected boolean isIgnore(EfloraImportState state){
2282 return ! state.getConfig().isDoTaxa();
2283 }
2284
2285 }