9dacad3fb9fa80e3ac27343ca2a3a03c84f594c0
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / eflora / EfloraTaxonImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.eflora;
11
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.UUID;
19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern;
21
22 import org.apache.commons.lang.CharUtils;
23 import org.apache.commons.lang.StringUtils;
24 import org.apache.log4j.Logger;
25 import org.jdom.Attribute;
26 import org.jdom.Element;
27 import org.springframework.stereotype.Component;
28 import org.springframework.transaction.TransactionStatus;
29
30 import eu.etaxonomy.cdm.common.CdmUtils;
31 import eu.etaxonomy.cdm.common.ResultWrapper;
32 import eu.etaxonomy.cdm.common.XmlHelp;
33 import eu.etaxonomy.cdm.io.common.ICdmIO;
34 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
35 import eu.etaxonomy.cdm.io.eflora.UnmatchedLeads.UnmatchedLeadsKey;
36 import eu.etaxonomy.cdm.model.agent.Person;
37 import eu.etaxonomy.cdm.model.agent.Team;
38 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
39 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
40 import eu.etaxonomy.cdm.model.common.Annotation;
41 import eu.etaxonomy.cdm.model.common.AnnotationType;
42 import eu.etaxonomy.cdm.model.common.CdmBase;
43 import eu.etaxonomy.cdm.model.common.Credit;
44 import eu.etaxonomy.cdm.model.common.ExtensionType;
45 import eu.etaxonomy.cdm.model.common.ISourceable;
46 import eu.etaxonomy.cdm.model.common.Language;
47 import eu.etaxonomy.cdm.model.common.Marker;
48 import eu.etaxonomy.cdm.model.common.MarkerType;
49 import eu.etaxonomy.cdm.model.common.Representation;
50 import eu.etaxonomy.cdm.model.common.TimePeriod;
51 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
52 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
53 import eu.etaxonomy.cdm.model.description.Feature;
54 import eu.etaxonomy.cdm.model.description.FeatureNode;
55 import eu.etaxonomy.cdm.model.description.PolytomousKey;
56 import eu.etaxonomy.cdm.model.description.TaxonDescription;
57 import eu.etaxonomy.cdm.model.description.TextData;
58 import eu.etaxonomy.cdm.model.name.BotanicalName;
59 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
60 import eu.etaxonomy.cdm.model.name.NameRelationshipType;
61 import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
62 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
63 import eu.etaxonomy.cdm.model.name.NonViralName;
64 import eu.etaxonomy.cdm.model.name.Rank;
65 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
66 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
67 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
68 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
69 import eu.etaxonomy.cdm.model.occurrence.Specimen;
70 import eu.etaxonomy.cdm.model.reference.IBook;
71 import eu.etaxonomy.cdm.model.reference.IJournal;
72 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
73 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
74 import eu.etaxonomy.cdm.model.reference.ReferenceType;
75 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
76 import eu.etaxonomy.cdm.model.taxon.Taxon;
77 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
78 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
79 import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;
80 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
81 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
82
83
84 /**
85 * @author a.mueller
86 *
87 */
88 @Component
89 public class EfloraTaxonImport extends EfloraImportBase implements ICdmIO<EfloraImportState> {
90 private static final Logger logger = Logger.getLogger(EfloraTaxonImport.class);
91
92 private static int modCount = 30000;
93 private NonViralNameParserImpl parser = new NonViralNameParserImpl();
94
95 public EfloraTaxonImport(){
96 super();
97 }
98
99
100 @Override
101 public boolean doCheck(EfloraImportState state){
102 boolean result = true;
103 return result;
104 }
105
106 //TODO make part of state, but state is renewed when invoking the import a second time
107 private UnmatchedLeads unmatchedLeads;
108
109 @Override
110 public boolean doInvoke(EfloraImportState state){
111 logger.info("start make Taxa ...");
112
113 //FIXME reset state
114 state.putTree(null, null);
115 // UnmatchedLeads unmatchedLeads = state.getOpenKeys();
116 if (unmatchedLeads == null){
117 unmatchedLeads = UnmatchedLeads.NewInstance();
118 }
119 state.setUnmatchedLeads(unmatchedLeads);
120
121 TransactionStatus tx = startTransaction();
122 unmatchedLeads.saveToSession(getFeatureTreeService());
123
124
125 //TODO generally do not store the reference object in the config
126 ReferenceBase sourceReference = state.getConfig().getSourceReference();
127 getReferenceService().saveOrUpdate(sourceReference);
128
129 Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
130 ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
131
132 Element elbody= getBodyElement(state.getConfig());
133 List<Element> elTaxonList = elbody.getChildren();
134
135 int i = 0;
136
137 Set<String> unhandledTitleClassess = new HashSet<String>();
138 Set<String> unhandledNomeclatureChildren = new HashSet<String>();
139 Set<String> unhandledDescriptionChildren = new HashSet<String>();
140
141 Taxon lastTaxon = getLastTaxon(state);
142
143 //for each taxon
144 for (Element elTaxon : elTaxonList){
145 try {
146 if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
147 if (! elTaxon.getName().equalsIgnoreCase("taxon")){
148 logger.warn("body has element other than 'taxon'");
149 }
150
151 BotanicalName botanicalName = BotanicalName.NewInstance(Rank.SPECIES());
152 Taxon taxon = Taxon.NewInstance(botanicalName, state.getConfig().getSourceReference());
153
154 handleTaxonAttributes(elTaxon, taxon, state);
155
156
157 List<Element> children = elTaxon.getChildren();
158 handleTaxonElement(state, unhandledTitleClassess, unhandledNomeclatureChildren, unhandledDescriptionChildren, taxon, children);
159 handleTaxonRelation(state, taxon, lastTaxon);
160 lastTaxon = taxon;
161 taxaToSave.add(taxon);
162 state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
163
164 } catch (Exception e) {
165 logger.warn("Exception occurred in Sapindacea taxon import: " + e);
166 e.printStackTrace();
167 }
168
169 }
170
171 System.out.println(state.getUnmatchedLeads().toString());
172 logger.warn("There are taxa with attributes 'excluded' and 'dubious'");
173
174 logger.info("Children for nomenclature are: " + unhandledNomeclatureChildren);
175 logger.info("Children for description are: " + unhandledDescriptionChildren);
176 logger.info("Children for homotypes are: " + unhandledHomotypeChildren);
177 logger.info("Children for nom are: " + unhandledNomChildren);
178
179
180 //invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
181 logger.info(i + " taxa handled. Saving ...");
182 getTaxonService().saveOrUpdate(taxaToSave);
183 getFeatureTreeService().saveOrUpdateFeatureNodesAll(state.getFeatureNodesToSave());
184 state.getFeatureNodesToSave().clear();
185 commitTransaction(tx);
186
187 logger.info("end makeTaxa ...");
188 logger.info("start makeKey ...");
189 // invokeDoKey(state);
190 logger.info("end makeKey ...");
191
192 return success.getValue();
193 }
194
195
196 private void handleTaxonAttributes(Element elTaxon, Taxon taxon, EfloraImportState state) {
197 List<Attribute> attrList = elTaxon.getAttributes();
198 for (Attribute attr : attrList){
199 String attrName = attr.getName();
200 String attrValue = attr.getValue();
201 if ("class".equals(attrName)){
202 if (attrValue.equalsIgnoreCase("dubious") || attrValue.equalsIgnoreCase("DUBIOUS GENUS") || attrValue.equalsIgnoreCase("DOUBTFUL SPECIES") ){
203 taxon.setDoubtful(true);
204 }else{
205 MarkerType markerType = getMarkerType(state, attrValue);
206 if (markerType == null){
207 logger.warn("Class attribute value for taxon not yet supported: " + attrValue);
208 }else{
209 taxon.addMarker(Marker.NewInstance(markerType, true));
210 }
211 }
212 }else if ("num".equals(attrName)){
213 logger.warn("num not yet supported");
214 }else{
215 logger.warn("Attribute " + attrName + " not yet supported for element taxon");
216 }
217 }
218
219 }
220
221
222 private Taxon getLastTaxon(EfloraImportState state) {
223 if (state.getConfig().getLastTaxonUuid() == null){
224 return null;
225 }else{
226 return (Taxon)getTaxonService().find(state.getConfig().getLastTaxonUuid());
227 }
228 }
229
230
231 // private void invokeDoKey(SapindaceaeImportState state) {
232 // TransactionStatus tx = startTransaction();
233 //
234 // Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
235 // ITaxonService taxonService = getTaxonService();
236 // ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
237 //
238 // Element elbody= getBodyElement(state.getConfig());
239 // List<Element> elTaxonList = elbody.getChildren();
240 //
241 // int i = 0;
242 //
243 // //for each taxon
244 // for (Element elTaxon : elTaxonList){
245 // if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
246 // if (! elTaxon.getName().equalsIgnoreCase("taxon")){
247 // continue;
248 // }
249 //
250 // List<Element> children = elTaxon.getChildren("key");
251 // for (Element element : children){
252 // handleKeys(state, element, null);
253 // }
254 // nodesToSave.add(taxon);
255 //
256 // }
257 //
258 // }
259
260
261 // body/taxon/*
262 private void handleTaxonElement(EfloraImportState state, Set<String> unhandledTitleClassess, Set<String> unhandledNomeclatureChildren, Set<String> unhandledDescriptionChildren, Taxon taxon, List<Element> children) {
263 AnnotatableEntity lastEntity = null;
264 for (Element element : children){
265 String elName = element.getName();
266
267 if (elName.equalsIgnoreCase("title")){
268 handleTitle(state, element, taxon, unhandledTitleClassess);
269 lastEntity = null;
270 }else if(elName.equalsIgnoreCase("nomenclature")){
271 handleNomenclature(state, element, taxon, unhandledNomeclatureChildren);
272 lastEntity = null;
273 }else if(elName.equalsIgnoreCase("description")){
274 handleDescription(state, element, taxon, unhandledDescriptionChildren);
275 lastEntity = null;
276 }else if(elName.equalsIgnoreCase("habitatecology")){
277 lastEntity = handleEcology(state, element, taxon);
278 }else if(elName.equalsIgnoreCase("distribution")){
279 lastEntity = handleDistribution(state, element, taxon);
280 }else if(elName.equalsIgnoreCase("uses")){
281 lastEntity = handleUses(state, element, taxon);
282 }else if(elName.equalsIgnoreCase("notes")){
283 lastEntity = handleTaxonNotes(state, element, taxon);
284 }else if(elName.equalsIgnoreCase("chromosomes")){
285 lastEntity = handleChromosomes(state, element, taxon);
286 }else if(elName.equalsIgnoreCase("vernacularnames")){
287 handleVernaculars(state, element, taxon);
288 }else if(elName.equalsIgnoreCase("key")){
289 lastEntity = handleKeys(state, element, taxon);
290 }else if(elName.equalsIgnoreCase("references")){
291 handleReferences(state, element, taxon, lastEntity);
292 lastEntity = null;
293 }else if(elName.equalsIgnoreCase("taxon")){
294 logger.warn("A taxon should not be part of a taxon");
295 }else if(elName.equalsIgnoreCase("homotypes")){
296 logger.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
297 }else{
298 logger.warn("Unexpected child for taxon: " + elName);
299 }
300 }
301 }
302
303
304 private void handleVernaculars(EfloraImportState state, Element elVernacular, Taxon taxon) {
305 verifyNoAttribute(elVernacular);
306 verifyNoChildren(elVernacular, false);
307 String value = elVernacular.getTextNormalize();
308 Feature feature = Feature.COMMON_NAME();
309 value = replaceStart(value, "Noms vernaculaires");
310 String[] dialects = value.split(";");
311 for (String singleDialect : dialects){
312 handleSingleDialect(taxon, singleDialect, feature, state);
313 }
314 return;
315 }
316
317
318 private void handleSingleDialect(Taxon taxon, String singleDialect, Feature feature, EfloraImportState state) {
319 singleDialect = singleDialect.trim();
320 TaxonDescription description = getDescription(taxon);
321 String reDialect = "\\(dial\\.\\s.*\\)";
322 // String reDialect = "\\(.*\\)";
323 Pattern patDialect = Pattern.compile(reDialect);
324 Matcher matcher = patDialect.matcher(singleDialect);
325 if (matcher.find()){
326 String dialect = singleDialect.substring(matcher.start(), matcher.end());
327 dialect = dialect.replace("(dial. ", "").replace(")", "");
328
329 Language language = null;
330 try {
331 language = this.getLanguage(state, state.getTransformer().getLanguageUuid(dialect), dialect, dialect, dialect);
332 } catch (UndefinedTransformerMethodException e) {
333 logger.error(e.getMessage());
334 }
335
336 String commonNames = singleDialect.substring(0, matcher.start());
337 String[] splitNames = commonNames.split(",");
338 for (String commonNameString : splitNames){
339 commonNameString = commonNameString.trim();
340 CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameString, language);
341 description.addElement(commonName);
342 }
343 }else{
344 logger.warn("No dialect match: " + singleDialect);
345 }
346 }
347
348
349 private void handleReferences(EfloraImportState state, Element elReferences, Taxon taxon, AnnotatableEntity lastEntity) {
350 verifyNoAttribute(elReferences);
351 verifyNoChildren(elReferences, true);
352 String refString = elReferences.getTextNormalize();
353 if (lastEntity == null){
354 logger.warn("No last entity defined: " + refString);
355 return;
356 }
357
358 Annotation annotation = Annotation.NewInstance(refString, AnnotationType.EDITORIAL(), Language.DEFAULT());
359 lastEntity.addAnnotation(annotation);
360 }
361
362
363 private PolytomousKey handleKeys(EfloraImportState state, Element elKey, Taxon taxon) {
364 UnmatchedLeads openKeys = state.getUnmatchedLeads();
365
366 //title
367 String title = makeKeyTitle(elKey);
368
369 //key
370 PolytomousKey key = PolytomousKey.NewTitledInstance(title);
371
372 //TODO add covered taxa etc.
373 verifyNoAttribute(elKey);
374
375 //notes
376 makeKeyNotes(elKey, key);
377
378 //keycouplets
379 List<Element> keychoices = new ArrayList<Element>();
380 keychoices.addAll(elKey.getChildren("keycouplet"));
381 keychoices.addAll(elKey.getChildren("keychoice"));
382
383
384 for (Element elKeychoice : keychoices){
385 handleKeyChoices(state, openKeys, key, elKeychoice, taxon);
386 elKey.removeContent(elKeychoice);
387 }
388
389 //
390 verifyNoChildren(elKey);
391 logger.info("Unmatched leads after key handling:" + openKeys.toString());
392
393
394 if (state.getConfig().isDoPrintKeys()){
395 key.print(System.err);
396 }
397 getFeatureTreeService().save(key);
398 return key;
399 }
400
401
402 /**
403 * @param state
404 * @param elKey
405 * @param openKeys
406 * @param key
407 * @param elKeychoice
408 * @param taxon
409 */
410 private void handleKeyChoices(EfloraImportState state, UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, Taxon taxon) {
411
412 //char Attribute
413 Feature feature = handleKeychoiceChar(state, elKeychoice);
414
415 //lead
416 List<FeatureNode> childNodes = handleKeychoiceLeads(state, key, elKeychoice, taxon, feature);
417
418 //num -> match with unmatched leads
419 handleKeychoiceNum(openKeys, key, elKeychoice, childNodes);
420
421 //others
422 verifyNoAttribute(elKeychoice);
423 }
424
425
426 /**
427 * @param openKeys
428 * @param key
429 * @param elKeychoice
430 * @param childNodes
431 */
432 private void handleKeychoiceNum(UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, List<FeatureNode> childNodes) {
433 Attribute numAttr = elKeychoice.getAttribute("num");
434 String num = CdmUtils.removeTrailingDot(numAttr == null? "":numAttr.getValue());
435 UnmatchedLeadsKey okk = UnmatchedLeadsKey.NewInstance(key, num);
436 Set<FeatureNode> matchingNodes = openKeys.getNodes(okk);
437 for (FeatureNode matchingNode : matchingNodes){
438 for (FeatureNode childNode : childNodes){
439 matchingNode.addChild(childNode);
440 }
441 openKeys.removeNode(okk, matchingNode);
442 }
443 if (matchingNodes.isEmpty()){
444 for (FeatureNode childNode : childNodes){
445 key.getRoot().addChild(childNode);
446 }
447 }
448
449 elKeychoice.removeAttribute("num");
450 }
451
452
453 /**
454 * @param state
455 * @param key
456 * @param elKeychoice
457 * @param taxon
458 * @param feature
459 * @return
460 */
461 private List<FeatureNode> handleKeychoiceLeads( EfloraImportState state, PolytomousKey key, Element elKeychoice, Taxon taxon, Feature feature) {
462 List<FeatureNode> childNodes = new ArrayList<FeatureNode>();
463 List<Element> leads = elKeychoice.getChildren("lead");
464 for(Element elLead : leads){
465 FeatureNode childNode = handleLead(state, key, elLead, taxon, feature);
466 childNodes.add(childNode);
467 }
468 return childNodes;
469 }
470
471
472 /**
473 * @param state
474 * @param elKeychoice
475 * @return
476 */
477 private Feature handleKeychoiceChar(EfloraImportState state, Element elKeychoice) {
478 Feature feature = null;
479 Attribute charAttr = elKeychoice.getAttribute("char");
480 if (charAttr != null){
481 String charStr = charAttr.getValue();
482 feature = getFeature(charStr, state);
483 elKeychoice.removeAttribute("char");
484 }
485 return feature;
486 }
487
488
489 private FeatureNode handleLead(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, Feature feature) {
490 FeatureNode node = FeatureNode.NewInstance();
491 node.setFeature(feature);
492
493 //text
494 String text = handleLeadText(elLead, node);
495
496 //num
497 handleLeadNum(elLead, text);
498
499 //goto
500 handleLeadGoto(state, key, elLead, taxon, node);
501
502 //others
503 verifyNoAttribute(elLead);
504
505 return node;
506 }
507
508
509 /**
510 * @param elLead
511 * @param node
512 * @return
513 */
514 private String handleLeadText(Element elLead, FeatureNode node) {
515 String text = elLead.getAttributeValue("text").trim();
516 if (StringUtils.isBlank(text)){
517 logger.warn("Empty text in lead");
518 }
519 elLead.removeAttribute("text");
520 node.addQuestion(Representation.NewInstance(text, null, null, Language.DEFAULT()));
521 return text;
522 }
523
524
525 /**
526 * @param state
527 * @param key
528 * @param elLead
529 * @param taxon
530 * @param node
531 */
532 private void handleLeadGoto(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, FeatureNode node) {
533 Attribute gotoAttr = elLead.getAttribute("goto");
534 if (gotoAttr != null){
535 String strGoto = gotoAttr.getValue().trim();
536 //create key
537 UnmatchedLeadsKey gotoKey = null;
538 if (isInternalNode(strGoto)){
539 gotoKey = UnmatchedLeadsKey.NewInstance(key, strGoto);
540 }else{
541 String taxonKey = makeTaxonKey(strGoto, taxon);
542 gotoKey = UnmatchedLeadsKey.NewInstance(taxonKey);
543 }
544 //
545 UnmatchedLeads openKeys = state.getUnmatchedLeads();
546 openKeys.addKey(gotoKey, node);
547 if (gotoKey.isInnerLead()){
548 Set<FeatureNode> existingNodes = openKeys.getNodes(gotoKey);
549 for (FeatureNode existingNode : existingNodes){
550 node.addChild(existingNode);
551 }
552 }
553 //remove attribute (need for consistency check)
554 elLead.removeAttribute("goto");
555 }else{
556 logger.warn("lead has no goto attribute");
557 }
558 }
559
560
561 /**
562 * @param elLead
563 * @param text
564 */
565 private void handleLeadNum(Element elLead, String text) {
566 Attribute numAttr = elLead.getAttribute("num");
567 if (numAttr != null){
568 //TODO num
569 String num = numAttr.getValue();
570 elLead.removeAttribute("num");
571 }else{
572 logger.info("Keychoice has no num attribute: " + text);
573 }
574 }
575
576
577 private String makeTaxonKey(String strGoto, Taxon taxon) {
578 String result = "";
579 if (strGoto == null){
580 return "";
581 }
582 String strGenusName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getGenusOrUninomial();
583 strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", ""); //replace all brackets
584 strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
585
586 strGoto = strGoto.trim();
587 String[] split = strGoto.split("\\s");
588 for (int i = 0; i<split.length; i++){
589 String single = split[i];
590 if (isGenusAbbrev(single, strGenusName)){
591 split[i] = strGenusName;
592 }
593 // if (isInfraSpecificMarker(single)){
594 // String strSpeciesName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getSpecificEpithet();
595 // split[i] = strGenusName + " " + strSpeciesName + " ";
596 // }
597 result = (result + " " + split[i]).trim();
598 }
599 return result;
600 }
601
602
603 private boolean isInfraSpecificMarker(String single) {
604 try {
605 if (Rank.getRankByAbbreviation(single).isInfraSpecific()){
606 return true;
607 }
608 } catch (UnknownCdmTypeException e) {
609 return false;
610 }
611 return false;
612 }
613
614
615 private boolean isGenusAbbrev(String single, String strGenusName) {
616 if (! single.matches("[A-Z]\\.?")) {
617 return false;
618 }else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
619 return false;
620 }else{
621 return single.charAt(0) == strGenusName.charAt(0);
622 }
623 }
624
625
626 private boolean isInternalNode(String strGoto) {
627 return CdmUtils.isNumeric(strGoto);
628 }
629
630
631 private void makeKeyNotes(Element keyElement, PolytomousKey key) {
632 Element elNotes = keyElement.getChild("notes");
633 if (elNotes != null){
634 keyElement.removeContent(elNotes);
635 String notes = elNotes.getTextNormalize();
636 if (StringUtils.isNotBlank(notes)){
637 key.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
638 }
639 }
640 }
641
642
643 private String makeKeyTitle(Element keyElement) {
644 String title = "- no title - ";
645 Attribute titleAttr = keyElement.getAttribute("title");
646 keyElement.removeAttribute(titleAttr);
647 if (titleAttr == null){
648 Element elTitle = keyElement.getChild("keytitle");
649 keyElement.removeContent(elTitle);
650 if (elTitle != null){
651 title = elTitle.getTextNormalize();
652 }
653 }else{
654 title = titleAttr.getValue();
655 }
656 return title;
657 }
658
659
660 /**
661 * @param state
662 * @param element
663 * @param taxon
664 */
665 private TextData handleChromosomes(EfloraImportState state, Element element, Taxon taxon) {
666 Feature chromosomeFeature = getFeature("chromosomes", state);
667 verifyNoAttribute(element);
668 verifyNoChildren(element);
669 String value = element.getTextNormalize();
670 value = replaceStart(value, "Chromosomes");
671 String chromosomesPart = getChromosomesPart(value);
672 String references = value.replace(chromosomesPart, "").trim();
673 chromosomesPart = chromosomesPart.replace(":", "").trim();
674 return addDescriptionElement(state, taxon, chromosomesPart, chromosomeFeature, references);
675 }
676
677
678 /**
679 * @param ref
680 * @param string
681 * @return
682 */
683 private void makeOriginalSourceReferences(ISourceable sourcable, String splitter, String refAll) {
684 String[] splits = refAll.split(splitter);
685 for (String strRef: splits){
686 ReferenceBase ref = ReferenceFactory.newGeneric();
687 ref.setTitleCache(strRef, true);
688 String refDetail = parseReferenceYearAndDetail(ref);
689 sourcable.addSource(null, null, ref, refDetail);
690 }
691
692
693 //TODO use regex instead
694 /* String detailResult = null;
695 String titleToParse = ref.getTitleCache();
696 String reReference = "^\\.{1,}";
697 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
698 String reYear = "\\([1-2]{1}[0-9]{3}\\)";
699 String reYearPeriod = reYear + "(-" + reYear + ")+";
700 String reDetail = "\\.{1,10}$";
701 */
702 }
703
704
705 /**
706 * @param value
707 * @return
708 */
709 private String getChromosomesPart(String str) {
710 Pattern pattern = Pattern.compile("2n\\s*=\\s*\\d{1,2}:");
711 Matcher matcher = pattern.matcher(str);
712 if (matcher.find()){
713 return matcher.group(0);
714 }else{
715 logger.warn("Chromosomes could not be parsed: " + str);
716 }
717 return str;
718 }
719
720
721 /**
722 * @param state
723 * @param element
724 * @param taxon
725 */
726 private TextData handleTaxonNotes(EfloraImportState state, Element element, Taxon taxon) {
727 TextData result = null;
728 verifyNoChildren(element, true);
729 //verifyNoAttribute(element);
730 List<Attribute> attributes = element.getAttributes();
731 for (Attribute attribute : attributes){
732 if (! attribute.getName().equalsIgnoreCase("class")){
733 logger.warn("Char has unhandled attribute " + attribute.getName());
734 }else{
735 String classValue = attribute.getValue();
736 result = handleDescriptiveElement(state, element, taxon, classValue);
737 }
738 }
739 //if no class attribute exists, handle as note
740 if (attributes.isEmpty()){
741 result = handleDescriptiveElement(state, element, taxon, "Note");
742 }
743
744 //Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
745 //taxon.addAnnotation(annotation);
746 return result; //annotation;
747 }
748
749
750 /**
751 * @param state
752 * @param element
753 * @param taxon
754 * @param result
755 * @param attribute
756 * @return
757 */
758 private TextData handleDescriptiveElement(EfloraImportState state, Element element, Taxon taxon, String classValue) {
759 TextData result = null;
760 Feature feature = getFeature(classValue, state);
761 if (feature == null){
762 logger.warn("Unhandled feature: " + classValue);
763 }else{
764 String value = element.getValue();
765 value = replaceStart(value, "Notes");
766 value = replaceStart(value, "Note");
767 result = addDescriptionElement(state, taxon, value, feature, null);
768 }
769 return result;
770 }
771
772
773 private void removeBr(Element element) {
774 element.removeChildren("Br");
775 element.removeChildren("br");
776 element.removeChildren("BR");
777 }
778
779
780 /**
781 * @param state
782 * @param element
783 * @param taxon
784 */
785 private TextData handleUses(EfloraImportState state, Element element, Taxon taxon) {
786 verifyNoAttribute(element);
787 verifyNoChildren(element, true);
788 String value = element.getTextNormalize();
789 value = replaceStart(value, "Uses");
790 Feature feature = Feature.USES();
791 return addDescriptionElement(state, taxon, value, feature, null);
792
793 }
794
795
796 /**
797 * @param state
798 * @param element
799 * @param taxon
800 * @param unhandledDescriptionChildren
801 */
802 private DescriptionElementBase handleDistribution(EfloraImportState state, Element element, Taxon taxon) {
803 verifyNoAttribute(element);
804 verifyNoChildren(element, true);
805 String value = element.getTextNormalize();
806 value = replaceStart(value, "Distribution");
807 Feature feature = Feature.DISTRIBUTION();
808 //distribution parsing almost impossible as there is lots of freetext in the distribution tag
809 return addDescriptionElement(state, taxon, value, feature, null);
810 }
811
812
813 /**
814 * @param state
815 * @param element
816 * @param taxon
817 * @param unhandledDescriptionChildren
818 */
819 private TextData handleEcology(EfloraImportState state, Element elEcology, Taxon taxon) {
820 verifyNoAttribute(elEcology);
821 verifyNoChildren(elEcology, true);
822 String value = elEcology.getTextNormalize();
823 Feature feature = Feature.ECOLOGY();
824 if (value.startsWith("Habitat & Ecology")){
825 feature = getFeature("Habitat & Ecology", state);
826 value = replaceStart(value, "Habitat & Ecology");
827 }else if (value.startsWith("Habitat")){
828 value = replaceStart(value, "Habitat");
829 feature = getFeature("Habitat", state);
830 }
831 return addDescriptionElement(state, taxon, value, feature, null);
832 }
833
834
835
836 /**
837 * @param value
838 * @param replacementString
839 */
840 private String replaceStart(String value, String replacementString) {
841 if (value.startsWith(replacementString) ){
842 value = value.substring(replacementString.length()).trim();
843 }
844 while (value.startsWith("-") || value.startsWith("\96") ){
845 value = value.substring("-".length()).trim();
846 }
847 return value;
848 }
849
850
851 /**
852 * @param value
853 * @param replacementString
854 */
855 protected String removeTrailing(String value, String replacementString) {
856 if (value == null){
857 return null;
858 }
859 if (value.endsWith(replacementString) ){
860 value = value.substring(0, value.length() - replacementString.length()).trim();
861 }
862 return value;
863 }
864
865 /**
866 * @param state
867 * @param element
868 * @param taxon
869 * @param unhandledNomeclatureChildren
870 */
871 private void handleNomenclature(EfloraImportState state, Element elNomenclature, Taxon taxon, Set<String> unhandledChildren) {
872 verifyNoAttribute(elNomenclature);
873
874 List<Element> elements = elNomenclature.getChildren();
875 for (Element element : elements){
876 if (element.getName().equals("homotypes")){
877 handleHomotypes(state, element, taxon);
878 }else if (element.getName().equals("notes")){
879 handleNomenclatureNotes(state, element, taxon);
880 }else{
881 unhandledChildren.add(element.getName());
882 }
883 }
884
885 }
886
887
888
889 private void handleNomenclatureNotes(EfloraImportState state, Element elNotes, Taxon taxon) {
890 verifyNoAttribute(elNotes);
891 verifyNoChildren(elNotes);
892 String notesText = elNotes.getTextNormalize();
893 Annotation annotation = Annotation.NewInstance(notesText, AnnotationType.EDITORIAL(), Language.DEFAULT());
894 taxon.addAnnotation(annotation);
895 }
896
897
898
899 private static Set<String> unhandledHomotypeChildren = new HashSet<String>();
900 /**
901 * @param state
902 * @param element
903 * @param taxon
904 */
905 private void handleHomotypes(EfloraImportState state, Element elHomotypes, Taxon taxon) {
906 verifyNoAttribute(elHomotypes);
907
908 List<Element> elements = elHomotypes.getChildren();
909 HomotypicalGroup homotypicalGroup = null;
910 for (Element element : elements){
911 if (element.getName().equals("nom")){
912 homotypicalGroup = handleNom(state, element, taxon, homotypicalGroup);
913 }else{
914 unhandledHomotypeChildren.add(element.getName());
915 }
916 }
917
918 }
919
920 private static Set<String> unhandledNomChildren = new HashSet<String>();
921
922 /**
923 * @param state
924 * @param element
925 * @param taxon
926 */
927 private HomotypicalGroup handleNom(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
928 List<Attribute> attributes = elNom.getAttributes();
929
930 boolean taxonBaseClassType = false;
931 for (Attribute attribute : attributes){
932 if (! attribute.getName().equalsIgnoreCase("class")){
933 logger.warn("Nom has unhandled attribute " + attribute.getName());
934 }else{
935 String classValue = attribute.getValue();
936 if (classValue.equalsIgnoreCase("acceptedname")){
937 homotypicalGroup = handleNomTaxon(state, elNom, taxon,homotypicalGroup, false);
938 taxonBaseClassType = true;
939 }else if (classValue.equalsIgnoreCase("synonym")){
940 homotypicalGroup = handleNomTaxon(state, elNom, taxon, homotypicalGroup, true);
941 taxonBaseClassType = true;
942 }else if (classValue.equalsIgnoreCase("typeref")){
943 handleTypeRef(state, elNom, taxon, homotypicalGroup);
944 }else{
945 logger.warn("Unhandled class value for nom: " + classValue);
946 }
947
948 }
949 }
950
951 List<Element> elements = elNom.getChildren();
952 for (Element element : elements){
953 if (element.getName().equals("name") || element.getName().equals("homonym") ){
954 if (taxonBaseClassType == false){
955 logger.warn("Name or homonym tag not allowed in non taxon nom tag");
956 }
957 }else{
958 unhandledNomChildren.add(element.getName());
959 }
960 }
961
962 return homotypicalGroup;
963
964 }
965
966 /**
967 * @param state
968 * @param elNom
969 * @param taxon
970 * @param homotypicalGroup
971 */
972 protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
973 verifyNoChildren(elNom);
974 String typeRef = elNom.getTextNormalize();
975 typeRef = removeStartingTypeRefMinus(typeRef);
976
977 String[] split = typeRef.split(":");
978 if (split.length < 2){
979 logger.warn("typeRef has no ':' : " + typeRef);
980 }else if (split.length > 2){
981 logger.warn("typeRef has more than 1 ':' : " + typeRef);
982 }else{
983 StringBuffer typeType = new StringBuffer(split[0]);
984 String typeText = split[1].trim();
985 TypeDesignationBase typeDesignation = getTypeDesignationAndReference(typeType);
986
987 //Name Type Desitnations
988 if (typeDesignation instanceof NameTypeDesignation){
989 makeNameTypeDesignations(typeType, typeText, typeDesignation);
990 }
991 //SpecimenTypeDesignations
992 else if (typeDesignation instanceof SpecimenTypeDesignation){
993 makeSpecimenTypeDesignation(typeType, typeText, typeDesignation);
994 }else{
995 logger.error("Unhandled type designation class" + typeDesignation.getClass().getName());
996 }
997 for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
998 name.addTypeDesignation(typeDesignation, true);
999 }
1000 }
1001 }
1002
1003
1004 /**
1005 * @param typeRef
1006 * @return
1007 */
1008 protected String removeStartingTypeRefMinus(String typeRef) {
1009 typeRef = replaceStart(typeRef, "-");
1010 typeRef = replaceStart(typeRef, "\97");
1011 typeRef = replaceStart(typeRef, "\u002d");
1012 typeRef = replaceStart(typeRef, "\u2013");
1013 typeRef = replaceStart(typeRef, "--");
1014 return typeRef;
1015 }
1016
1017 /**
1018 * @param typeType
1019 * @param typeText
1020 * @param typeDesignation
1021 */
1022 private void makeNameTypeDesignations(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1023 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1024 //do nothing
1025 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1026 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1027 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype")){
1028 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1029 }else{
1030 logger.warn("Unhandled type string: " + typeType + "(" + CharUtils.unicodeEscaped(typeType.charAt(0)) + ")");
1031 }
1032 //clean
1033 typeText = cleanNameType(typeText);
1034 //create name
1035 BotanicalName nameType = (BotanicalName)parser.parseFullName(typeText, NomenclaturalCode.ICBN, Rank.SPECIES());
1036 ((NameTypeDesignation) typeDesignation).setTypeName(nameType);
1037 //TODO wie können NameTypes den Namen zugeordnet werden? - wird aber vom Portal via NameCache matching gemacht
1038 }
1039
1040
1041 private String cleanNameType(String typeText) {
1042 String result;
1043 String[] split = typeText.split("\\[.*\\].?");
1044 result = split[0];
1045 return result;
1046 }
1047
1048
1049 /**
1050 * @param typeType
1051 * @param typeText
1052 * @param typeDesignation
1053 */
1054 protected void makeSpecimenTypeDesignation(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1055 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1056 //do nothing
1057 }else if (typeType.toString().trim().equalsIgnoreCase("Neotype") || typeType.toString().trim().equalsIgnoreCase("Neotypes")){
1058 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.NEOTYPE());
1059 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype") || typeType.toString().trim().equalsIgnoreCase("Syntypes")){
1060 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1061 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1062 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1063 }else if (typeType.toString().trim().equalsIgnoreCase("Paratype")){
1064 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.PARATYPE());
1065 }else{
1066 logger.warn("Unhandled type string: " + typeType);
1067 }
1068 Specimen specimen = Specimen.NewInstance();
1069 if (typeText.length() > 255){
1070 specimen.setTitleCache(typeText.substring(0, 252) + "...", true);
1071 }else{
1072 specimen.setTitleCache(typeText, true);
1073 }
1074 specimen.addDefinition(typeText, Language.ENGLISH());
1075 ((SpecimenTypeDesignation) typeDesignation).setTypeSpecimen(specimen);
1076 }
1077
1078 private TypeDesignationBase getTypeDesignationAndReference(StringBuffer typeType) {
1079 TypeDesignationBase result;
1080 ReferenceBase ref = parseTypeDesignationReference(typeType);
1081 if (typeType.indexOf(" species")>-1 || typeType.indexOf("genus")>-1){
1082 if (typeType.indexOf(" species")>-1 ){
1083 result = NameTypeDesignation.NewInstance();
1084 int start = typeType.indexOf(" species");
1085 typeType.replace(start, start + " species".length(), "");
1086 }else {
1087 result = NameTypeDesignation.NewInstance();
1088 int start = typeType.indexOf(" genus");
1089 typeType.replace(start, start + " genus".length(), "");
1090 }
1091 }else{
1092 result = SpecimenTypeDesignation.NewInstance();
1093 }
1094 result.setCitation(ref);
1095 return result;
1096 }
1097
1098
1099 private ReferenceBase parseTypeDesignationReference(StringBuffer typeType) {
1100 ReferenceBase result = null;
1101 String reBracketReference = "\\(.*\\)";
1102 Pattern patBracketReference = Pattern.compile(reBracketReference);
1103 Matcher matcher = patBracketReference.matcher(typeType);
1104 if (matcher.find()){
1105 String refString = matcher.group();
1106 int start = typeType.indexOf(refString);
1107 typeType.replace(start, start + refString.length(), "");
1108 refString = refString.replace("(", "").replace(")", "").trim();
1109 ReferenceBase ref = ReferenceFactory.newGeneric();
1110 ref.setTitleCache(refString, true);
1111 result = ref;
1112 }
1113 return result;
1114 }
1115
1116
1117 /**
1118 * @param state
1119 * @param elNom
1120 * @param taxon
1121 */
1122 //body/taxon/
1123 private HomotypicalGroup handleNomTaxon(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1124 NonViralName name = makeName(taxon, homotypicalGroup, isSynonym);
1125 String num = null;
1126
1127 boolean hasGenusInfo = false;
1128 TeamOrPersonBase lastTeam = null;
1129
1130 //genus
1131 List<Element> elGenus = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "genus");
1132 if (elGenus.size() > 0){
1133 hasGenusInfo = true;
1134 }else{
1135 logger.debug ("No Synonym Genus");
1136 }
1137 //infra rank -> needed to handle authors correctly
1138 List<Element> elInfraRank = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "infrank");
1139 Rank infraRank = null;
1140 infraRank = handleInfRank(name, elInfraRank, infraRank);
1141
1142 //get left over elements
1143 List<Element> elements = elNom.getChildren();
1144 elements.removeAll(elInfraRank);
1145
1146 for (Element element : elements){
1147 if (element.getName().equals("name")){
1148 String classValue = element.getAttributeValue("class");
1149 String value = element.getValue().trim();
1150 if (classValue.equalsIgnoreCase("genus") || classValue.equalsIgnoreCase("family") ){
1151 name.setGenusOrUninomial(value);
1152 }else if (classValue.equalsIgnoreCase("family") ){
1153 name.setGenusOrUninomial(value);
1154 name.setRank(Rank.FAMILY());
1155 }else if (classValue.equalsIgnoreCase("subgenus")){
1156 //name.setInfraGenericEpithet(value);
1157 name.setNameCache(value.replace(":", "").trim());
1158 name.setRank(Rank.SUBGENUS());
1159 }else if (classValue.equalsIgnoreCase("epithet") ){
1160 if (hasGenusInfo == true){
1161 name.setSpecificEpithet(value);
1162 }else{
1163 handleInfraspecificEpithet(element, classValue, name);
1164 }
1165 }else if (classValue.equalsIgnoreCase("author")){
1166 handleNameAuthors(element, name);
1167 }else if (classValue.equalsIgnoreCase("paraut")){
1168 handleBasionymAuthor(state, element, name, false);
1169 }else if (classValue.equalsIgnoreCase("infrauthor") || classValue.equalsIgnoreCase("infraut")){
1170 handleInfrAuthor(state, element, name, true);
1171 }else if (classValue.equalsIgnoreCase("infrapar") || classValue.equalsIgnoreCase("infrpar") || classValue.equalsIgnoreCase("parauthor") ){
1172 handleBasionymAuthor(state, element, name, true);
1173 }else if (classValue.equalsIgnoreCase("infrepi")){
1174 handleInfrEpi(name, infraRank, value);
1175 }else if (classValue.equalsIgnoreCase("pub")){
1176 lastTeam = handleNomenclaturalReference(name, value);
1177 }else if (classValue.equalsIgnoreCase("usage")){
1178 lastTeam = handleNameUsage(taxon, name, value, lastTeam);
1179 }else if (classValue.equalsIgnoreCase("note")){
1180 handleNameNote(name, value);
1181 }else if (classValue.equalsIgnoreCase("num")){
1182 if (num != null){
1183 logger.warn("Duplicate num: " + value);
1184 }else{
1185 num = value;
1186 }
1187 if (isSynonym == true){
1188 logger.warn("Synonym should not have a num");
1189 }
1190 }else if (classValue.equalsIgnoreCase("typification")){
1191 logger.warn("Typification should not be a nom class");
1192 }else{
1193 logger.warn("Unhandled name class: " + classValue);
1194 }
1195 }else if(element.getName().equals("homonym")){
1196 handleHomonym(state, element, name);
1197 }else{
1198 // child element is not "name"
1199 unhandledNomChildren.add(element.getName());
1200 }
1201 }
1202
1203 //handle key
1204 if (! isSynonym){
1205 String taxonString = name.getNameCache();
1206 //try to find matching lead nodes
1207 UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, taxonString);
1208 Set<FeatureNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
1209 //same without using the num
1210 if (num != null){
1211 UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", taxonString);
1212 handleMatchingNodes(state, taxon, noNumLeadsKey);
1213 }
1214 if (matchingNodes.isEmpty() && num != null){
1215 logger.warn("Taxon has num but no matching nodes exist: " + num+ ", Key: " + leadsKey.toString());
1216 }
1217 }
1218
1219 //test nom element has no text
1220 if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("\97", "").replace("\u002d","").replace("\u2013", ""))){
1221 String strElNom = elNom.getTextNormalize();
1222 if ("?".equals(strElNom)){
1223 handleQuestionMark(name, taxon);
1224 }
1225 // Character c = strElNom.charAt(0);
1226 //System.out.println(CharUtils.unicodeEscaped(c));
1227 logger.warn("Nom tag has text: " + strElNom);
1228 }
1229
1230 return name.getHomotypicalGroup();
1231 }
1232
1233
1234 private void handleQuestionMark(NonViralName name, Taxon taxon) {
1235 int count = name.getTaxonBases().size();
1236 if (count != 1){
1237 logger.warn("Name has " + count + " taxa. This is not handled for question mark");
1238 }else{
1239 TaxonBase taxonBase = (TaxonBase)name.getTaxonBases().iterator().next();
1240 taxonBase.setDoubtful(true);
1241 }
1242 }
1243
1244
1245 //merge with handleNomTaxon
1246 private void handleHomonym(EfloraImportState state, Element elHomonym, NonViralName upperName) {
1247 verifyNoAttribute(elHomonym);
1248
1249 //hommonym name
1250 BotanicalName homonymName = BotanicalName.NewInstance(upperName.getRank());
1251 homonymName.setGenusOrUninomial(upperName.getGenusOrUninomial());
1252 homonymName.setInfraGenericEpithet(upperName.getInfraGenericEpithet());
1253 homonymName.setSpecificEpithet(upperName.getSpecificEpithet());
1254 homonymName.setInfraSpecificEpithet(upperName.getInfraSpecificEpithet());
1255
1256 for (Element elName : (List<Element>)elHomonym.getChildren("name")){
1257 String classValue = elName.getAttributeValue("class");
1258 String value = elName.getValue().trim();
1259 if (classValue.equalsIgnoreCase("genus") ){
1260 homonymName.setGenusOrUninomial(value);
1261 }else if (classValue.equalsIgnoreCase("epithet") ){
1262 homonymName.setSpecificEpithet(value);
1263 }else if (classValue.equalsIgnoreCase("author")){
1264 handleNameAuthors(elName, homonymName);
1265 }else if (classValue.equalsIgnoreCase("paraut")){
1266 handleBasionymAuthor(state, elName, homonymName, true);
1267 }else if (classValue.equalsIgnoreCase("pub")){
1268 handleNomenclaturalReference(homonymName, value);
1269 }else if (classValue.equalsIgnoreCase("note")){
1270 handleNameNote(homonymName, value);
1271 }else{
1272 logger.warn("Unhandled class value: " + classValue);
1273 }
1274 }
1275 //TODO verify other information
1276
1277
1278 //rel
1279 boolean homonymIsLater = false;
1280 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1281 if (upperName.getNomenclaturalReference() != null && homonymName.getNomenclaturalReference() != null){
1282 TimePeriod homonymYear = homonymName.getNomenclaturalReference().getDatePublished();
1283 TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();
1284 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1285 }else{
1286 if (upperName.getNomenclaturalReference() == null){
1287 logger.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName.getTitleCache());
1288 }
1289 if (homonymName.getNomenclaturalReference() == null){
1290 logger.warn("Homonym does not have a nomenclatural reference or year: " + homonymName.getTitleCache());
1291 }
1292 }
1293 if (homonymIsLater){
1294 homonymName.addRelationshipToName(upperName, relType, null);
1295 }else{
1296 upperName.addRelationshipToName(homonymName, relType, null);
1297 }
1298
1299 }
1300
1301
1302 /**
1303 * @param state
1304 * @param taxon
1305 * @param leadsKey
1306 * @return
1307 */
1308 private Set<FeatureNode> handleMatchingNodes(EfloraImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
1309 Set<FeatureNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
1310 for (FeatureNode matchingNode : matchingNodes){
1311 state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
1312 matchingNode.setTaxon(taxon);
1313 state.getFeatureNodesToSave().add(matchingNode);
1314 }
1315 return matchingNodes;
1316 }
1317
1318
1319 private void handleNameNote(NonViralName name, String value) {
1320 logger.warn("Name note: " + value + ". Available in portal?");
1321 Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.DEFAULT());
1322 name.addAnnotation(annotation);
1323 }
1324
1325
1326 /**
1327 * @param taxon
1328 * @param name
1329 * @param value
1330 */
1331 protected TeamOrPersonBase handleNameUsage(Taxon taxon, NonViralName name, String referenceTitle, TeamOrPersonBase lastTeam) {
1332 ReferenceBase ref = ReferenceFactory.newGeneric();
1333 referenceTitle = removeStartingSymbols(referenceTitle, ref);
1334
1335 ref.setTitleCache(referenceTitle, true);
1336 String microReference = parseReferenceYearAndDetail(ref);
1337 TeamOrPersonBase team = getReferenceAuthor(ref);
1338 parseReferenceType(ref);
1339 if (team == null){
1340 team = lastTeam;
1341 }
1342 ref.setAuthorTeam(team);
1343
1344 TaxonDescription description = getDescription(taxon);
1345 TextData textData = TextData.NewInstance(Feature.CITATION());
1346 textData.addSource(null, null, ref, microReference, name, null);
1347 description.addElement(textData);
1348 return team;
1349 }
1350
1351
1352 /**
1353 * @param referenceTitle
1354 * @param ref
1355 * @return
1356 */
1357 private String removeStartingSymbols(String referenceTitle, ReferenceBase ref) {
1358 if (referenceTitle.startsWith(";") || referenceTitle.startsWith(",") || referenceTitle.startsWith(":")){
1359 referenceTitle = referenceTitle.substring(1).trim();
1360 ref.setTitleCache(referenceTitle);
1361 }
1362 return referenceTitle;
1363 }
1364
1365
1366 private void parseReferenceType(ReferenceBase ref) {
1367 String title = ref.getTitle();
1368 if (title == null){
1369 return;
1370 }
1371 title = title.trim();
1372 //no in reference
1373 if (! title.startsWith("in ")){
1374 ref.setType(ReferenceType.Book);
1375 return;
1376 }
1377
1378 title = title.substring(3);
1379 //in reference
1380 //no ,
1381 if (title.indexOf(",") == -1){
1382 ref.setType(ReferenceType.Article);
1383 IJournal journal = ReferenceFactory.newJournal();
1384 journal.setTitle(title);
1385 ref.setTitle(null);
1386 ref.setInJournal(journal);
1387 //return;
1388 }else{
1389 //,-references
1390 ref.setType(ReferenceType.BookSection);
1391 String[] split = (title).split(",\\s*[A-Z]");
1392 if (split.length <= 1){
1393 logger.warn("Can not fully decide what reference type. Guess it is a book section: " + title );
1394 }
1395 IBook book = ReferenceFactory.newBook();
1396 Team bookTeam = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1397 try {
1398 title = title.substring(split[0].length() + 1).trim();
1399 } catch (Exception e) {
1400 logger.error("ERROR occurred when trying to split title: " + title + "; split[0]: + " + split[0]);
1401 }
1402 book.setTitle(title);
1403 book.setAuthorTeam(bookTeam);
1404 book.setDatePublished(ref.getDatePublished());
1405 ref.setTitle(null);
1406 ref.setInBook(book);
1407 }
1408 }
1409
1410
1411 protected Team getReferenceAuthor (ReferenceBase ref) {
1412 boolean isCache = false;
1413 String referenceTitle = ref.getTitle();
1414 if (referenceTitle == null){
1415 isCache = true;
1416 referenceTitle = ref.getTitleCache();
1417 }
1418 //in references
1419 String[] split = (" " + referenceTitle).split(" in ");
1420 if (split.length > 1){
1421 if (StringUtils.isNotBlank(split[0])){
1422 //' in ' is within the reference string, take the preceding string as the team
1423 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1424 if (! isCache){
1425 ref.setTitle("in " + split[1]);
1426 }
1427 return team;
1428 }else{
1429 //string starts with in therefore no author is given
1430 return null;
1431 }
1432 }
1433 //no ,-reference
1434 split = referenceTitle.split(",");
1435 if (split.length < 2){
1436 //no author is given
1437 return null;
1438 }
1439
1440 //,-references
1441 split = (referenceTitle).split(",\\s*[A-Z]");
1442 if (split.length > 1){
1443 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1444 if (! isCache){
1445 ref.setTitle(referenceTitle.substring(split[0].length()+1).trim());
1446 }
1447 return team;
1448 }else{
1449 logger.warn("Can't decide if a usage has an author: " + referenceTitle );
1450 return null;
1451 }
1452 }
1453
1454
1455 /**
1456 * Replaced by <homonym> tag but still in use for exceptions
1457 * @param detail
1458 * @param name
1459 * @return
1460 */
1461 protected String parseHomonym(String detail, NonViralName name) {
1462 String result;
1463 if (detail == null){
1464 return detail;
1465 }
1466
1467
1468 //non RE
1469 String reNon = "(\\s|,)non\\s";
1470 Pattern patReference = Pattern.compile(reNon);
1471 Matcher matcher = patReference.matcher(detail);
1472 if (matcher.find()){
1473 int start = matcher.start();
1474 int end = matcher.end();
1475
1476 if (detail != null){
1477 logger.warn("Unhandled non part: " + detail.substring(start));
1478 return detail;
1479 }
1480
1481 result = detail.substring(0, start);
1482
1483 //homonym string
1484 String homonymString = detail.substring(end);
1485
1486 //hommonym name
1487 BotanicalName homonymName = BotanicalName.NewInstance(name.getRank());
1488 homonymName.setGenusOrUninomial(name.getGenusOrUninomial());
1489 homonymName.setInfraGenericEpithet(name.getInfraGenericEpithet());
1490 homonymName.setSpecificEpithet(name.getSpecificEpithet());
1491 homonymName.setInfraSpecificEpithet(name.getInfraSpecificEpithet());
1492 ReferenceBase homonymNomRef = ReferenceFactory.newGeneric();
1493 homonymNomRef.setTitleCache(homonymString);
1494 String homonymNomRefDetail = parseReferenceYearAndDetail(homonymNomRef);
1495 homonymName.setNomenclaturalMicroReference(homonymNomRefDetail);
1496 String authorTitle = homonymNomRef.getTitleCache();
1497 Team team = Team.NewTitledInstance(authorTitle, authorTitle);
1498 homonymNomRef.setAuthorTeam(team);
1499 homonymNomRef.setTitle("");
1500 homonymNomRef.setProtectedTitleCache(false);
1501
1502 //rel
1503 boolean homonymIsLater = false;
1504 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1505 TimePeriod homonymYear = homonymNomRef.getDatePublished();
1506 if (name.getNomenclaturalReference() != null){
1507 TimePeriod nameYear = name.getNomenclaturalReference().getDatePublished();
1508 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1509 }else{
1510 logger.warn("Classification name has no nomenclatural reference");
1511 }
1512 if (homonymIsLater){
1513 homonymName.addRelationshipToName(name, relType, null);
1514 }else{
1515 name.addRelationshipToName(homonymName, relType, null);
1516 }
1517
1518 }else{
1519 return detail;
1520 }
1521 return result;
1522 }
1523
1524
1525 /**
1526 * @Xpath body/taxon/nomenclature/homotypes/nom/name[@class="pub"]
1527 * @param name
1528 * @param value
1529 */
1530 protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
1531 ReferenceBase nomRef = ReferenceFactory.newGeneric();
1532 nomRef.setTitleCache(value, true);
1533 parseNomStatus(nomRef, name);
1534 String microReference = parseReferenceYearAndDetail(nomRef);
1535 name.setNomenclaturalReference(nomRef);
1536 microReference = parseHomonym(microReference, name);
1537 name.setNomenclaturalMicroReference(microReference);
1538 TeamOrPersonBase team = (TeamOrPersonBase)name.getCombinationAuthorTeam();
1539 if (team == null){
1540 logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
1541 }else{
1542 nomRef.setAuthorTeam(team);
1543 }
1544 return team;
1545 }
1546
1547 private void handleInfrAuthor(EfloraImportState state, Element elAuthor, NonViralName name, boolean overwrite) {
1548 String strAuthor = elAuthor.getValue().trim();
1549 if (strAuthor.endsWith(",")){
1550 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1551 }
1552 TeamOrPersonBase[] team = getTeam(strAuthor);
1553 if (name.getCombinationAuthorTeam() != null && overwrite == false){
1554 logger.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1555 }else{
1556 name.setCombinationAuthorTeam(team[0]);
1557 name.setExCombinationAuthorTeam(team[1]);
1558 }
1559
1560
1561 }
1562
1563
1564 /**
1565 * Sets the names rank according to the infrank value
1566 * @param name
1567 * @param elements
1568 * @param elInfraRank
1569 * @param infraRank
1570 * @return
1571 */
1572 private Rank handleInfRank(NonViralName name, List<Element> elInfraRank, Rank infraRank) {
1573 if (elInfraRank.size() == 1){
1574 String strRank = elInfraRank.get(0).getTextNormalize();
1575 try {
1576 infraRank = Rank.getRankByNameOrAbbreviation(strRank);
1577 } catch (UnknownCdmTypeException e) {
1578 try{
1579 infraRank = Rank.getRankByNameOrAbbreviation(strRank + ".");
1580 } catch (UnknownCdmTypeException e2) {
1581 logger.warn("Unknown infrank " + strRank + ". Set infraRank to (null).");
1582 }
1583 }
1584 }else if (elInfraRank.size() > 1){
1585 logger.warn ("There is more than 1 infrank");
1586 }
1587 if (infraRank != null){
1588 name.setRank(infraRank);
1589 }
1590 return infraRank;
1591 }
1592
1593
1594 private void handleInfrEpi(NonViralName name, Rank infraRank, String value) {
1595 if (infraRank != null && infraRank.isInfraSpecific()){
1596 name.setInfraSpecificEpithet(value);
1597 if (CdmUtils.isCapital(value)){
1598 logger.warn("Infraspecific epithet starts with a capital letter: " + value);
1599 }
1600 }else if (infraRank != null && infraRank.isInfraGeneric()){
1601 name.setInfraGenericEpithet(value);
1602 if (! CdmUtils.isCapital(value)){
1603 logger.warn("Infrageneric epithet does not start with a capital letter: " + value);
1604 }
1605 }else{
1606 logger.warn("Infrepi could not be handled: " + value);
1607 }
1608 }
1609
1610
1611
1612 /**
1613 * Returns the (empty) with the correct homotypical group depending on the taxon status
1614 * @param taxon
1615 * @param homotypicalGroup
1616 * @param isSynonym
1617 * @return
1618 */
1619 private NonViralName makeName(Taxon taxon,HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1620 NonViralName name;
1621 if (isSynonym){
1622 name = BotanicalName.NewInstance(Rank.SPECIES(), homotypicalGroup);
1623 SynonymRelationshipType synonymType = SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF();
1624 if (taxon.getHomotypicGroup().equals(homotypicalGroup)){
1625 synonymType = SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF();
1626 }
1627 taxon.addSynonymName(name, synonymType);
1628 }else{
1629 name = (NonViralName)taxon.getName();
1630 }
1631 return name;
1632 }
1633
1634
1635 /**
1636 * @param element
1637 * @param taxon
1638 */
1639 private void handleInfraspecificEpithet(Element element, String attrValue, NonViralName name) {
1640 String value = element.getTextNormalize();
1641 if (value.indexOf("subsp.") != -1){
1642 //TODO genus and species epi
1643 String infrEpi = value.substring(value.indexOf("subsp.") + 6).trim();
1644 name.setInfraSpecificEpithet(infrEpi);
1645 name.setRank(Rank.SUBSPECIES());
1646 }else if (value.indexOf("var.") != -1){
1647 //TODO genus and species epi
1648 String infrEpi = value.substring(value.indexOf("var.") + 4).trim();
1649 name.setInfraSpecificEpithet(infrEpi);
1650 name.setRank(Rank.VARIETY());
1651 }else{
1652 logger.warn("Unhandled infraspecific type: " + value);
1653 }
1654 }
1655
1656
1657 /**
1658 * @param state
1659 * @param element
1660 * @param name
1661 */
1662 private void handleBasionymAuthor(EfloraImportState state, Element elBasionymAuthor, NonViralName name, boolean overwrite) {
1663 String strAuthor = elBasionymAuthor.getValue().trim();
1664 Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");
1665 if (reBasionymAuthor.matcher(strAuthor).matches()){
1666 strAuthor = strAuthor.substring(1, strAuthor.length()-1);
1667 }else{
1668 logger.warn("Brackets are missing for original combination author " + strAuthor);
1669 }
1670 TeamOrPersonBase[] basionymTeam = getTeam(strAuthor);
1671 if (name.getBasionymAuthorTeam() != null && overwrite == false){
1672 logger.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1673 }else{
1674 name.setBasionymAuthorTeam(basionymTeam[0]);
1675 name.setExBasionymAuthorTeam(basionymTeam[1]);
1676
1677 }
1678 }
1679
1680 private Map<String, UUID> teamMap = new HashMap<String, UUID>();
1681 /**
1682 * @param elAuthors
1683 * @param name
1684 * @param elNom
1685 */
1686 private void handleNameAuthors(Element elAuthor, NonViralName name) {
1687 if (name.getCombinationAuthorTeam() != null){
1688 logger.warn("Name already has a combination author. Name: " + name.getTitleCache() + ", Author: " + elAuthor.getTextNormalize());
1689 }
1690 String strAuthor = elAuthor.getValue().trim();
1691 if (strAuthor.endsWith(",")){
1692 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1693 }
1694 if (strAuthor.indexOf("(") > -1 || strAuthor.indexOf(")") > -1){
1695 logger.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor);
1696 }
1697 TeamOrPersonBase[] team = getTeam(strAuthor);
1698 name.setCombinationAuthorTeam(team[0]);
1699 name.setExCombinationAuthorTeam(team[1]);
1700 }
1701
1702
1703 /**
1704 * @param strAuthor
1705 * @return
1706 */
1707 private TeamOrPersonBase[] getTeam(String strAuthor) {
1708 TeamOrPersonBase[] result = new TeamOrPersonBase[2];
1709 String[] split = strAuthor.split(" ex ");
1710 String strBaseAuthor = null;
1711 String strExAuthor = null;
1712
1713 if (split.length == 2){
1714 strBaseAuthor = split[1];
1715 strExAuthor = split[0];
1716 }else if (split.length == 1){
1717 strBaseAuthor = split[0];
1718 }else{
1719 logger.warn("Could not parse (ex) author: " + strAuthor);
1720 }
1721 result[0] = getUuidTeam(strBaseAuthor);
1722 if (result[0] == null){
1723 result[0] = parseSingleTeam(strBaseAuthor);
1724 teamMap.put(strBaseAuthor, result[0].getUuid());
1725 }
1726 if (strExAuthor != null){
1727 result[1] = getUuidTeam(strExAuthor);
1728 if (result[1] == null){
1729 result[1] = Team.NewInstance();
1730 result[1].setTitleCache(strExAuthor, true);
1731 teamMap.put(strExAuthor, result[1].getUuid());
1732 }
1733
1734 }
1735 return result;
1736 }
1737
1738
1739 protected TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {
1740 TeamOrPersonBase result;
1741 String[] split = strBaseAuthor.split("&");
1742 if (split.length > 1){
1743 result = Team.NewInstance();
1744 for (String personString : split){
1745 Person person = makePerson(personString);
1746 ((Team)result).addTeamMember(person);
1747 }
1748 }else{
1749 result = makePerson(strBaseAuthor.trim());
1750 }
1751 return result;
1752 }
1753
1754
1755 /**
1756 * @param personString
1757 * @return
1758 */
1759 private Person makePerson(String personString) {
1760 personString = personString.trim();
1761 Person person = Person.NewTitledInstance(personString);
1762 person.setNomenclaturalTitle(personString);
1763 return person;
1764 }
1765
1766
1767 /**
1768 * @param result
1769 * @param strBaseAuthor
1770 */
1771 private TeamOrPersonBase getUuidTeam(String strBaseAuthor) {
1772 UUID uuidTeam = teamMap.get(strBaseAuthor);
1773 return CdmBase.deproxy(getAgentService().find(uuidTeam), TeamOrPersonBase.class);
1774 }
1775
1776
1777 private void handleDescription(EfloraImportState state, Element elDescription, Taxon taxon, Set<String> unhandledChildren) {
1778 verifyNoAttribute(elDescription);
1779
1780 List<Element> elements = elDescription.getChildren();
1781 for (Element element : elements){
1782 if (element.getName().equalsIgnoreCase("char")){
1783 handleChar(state, element, taxon);
1784 }else{
1785 logger.warn("Unhandled description child: " + element.getName());
1786 }
1787 }
1788
1789 }
1790
1791
1792 /**
1793 * @param state
1794 * @param element
1795 * @param taxon
1796 */
1797 private void handleChar(EfloraImportState state, Element element, Taxon taxon) {
1798 List<Attribute> attributes = element.getAttributes();
1799 for (Attribute attribute : attributes){
1800 if (! attribute.getName().equalsIgnoreCase("class")){
1801 logger.warn("Char has unhandled attribute " + attribute.getName());
1802 }else{
1803 String classValue = attribute.getValue();
1804 Feature feature = getFeature(classValue, state);
1805 if (feature == null){
1806 logger.warn("Unhandled feature: " + classValue);
1807 }else{
1808 String value = element.getValue();
1809 addDescriptionElement(state, taxon, value, feature, null);
1810 }
1811
1812 }
1813 }
1814
1815 List<Element> elements = element.getChildren();
1816 if (! elements.isEmpty()){
1817 logger.warn("Char has unhandled children");
1818 }
1819 }
1820
1821
1822 /**
1823 * @param taxon
1824 * @return
1825 */
1826 protected TaxonDescription getDescription(Taxon taxon) {
1827 for (TaxonDescription description : taxon.getDescriptions()){
1828 if (! description.isImageGallery()){
1829 return description;
1830 }
1831 }
1832 TaxonDescription newDescription = TaxonDescription.NewInstance(taxon);
1833 return newDescription;
1834 }
1835
1836
1837 /**
1838 * @param classValue
1839 * @param state
1840 * @return
1841 * @throws UndefinedTransformerMethodException
1842 */
1843 private Feature getFeature(String classValue, EfloraImportState state) {
1844 UUID uuid;
1845 try {
1846 uuid = state.getTransformer().getFeatureUuid(classValue);
1847 if (uuid == null){
1848 logger.info("Uuid is null for " + classValue);
1849 }
1850 String featureText = StringUtils.capitalize(classValue);
1851 Feature feature = getFeature(state, uuid, featureText, featureText, classValue);
1852 if (feature == null){
1853 throw new NullPointerException(classValue + " not recognized as a feature");
1854 }
1855 return feature;
1856 } catch (Exception e) {
1857 logger.warn("Could not create feature for " + classValue + ": " + e.getMessage()) ;
1858 return Feature.UNKNOWN();
1859 }
1860 }
1861
1862
1863 /**
1864 * @param state
1865 * @param element
1866 * @param taxon
1867 * @param unhandledTitleClassess
1868 */
1869 private void handleTitle(EfloraImportState state, Element element, Taxon taxon, Set<String> unhandledTitleClassess) {
1870 // attributes
1871 List<Attribute> attributes = element.getAttributes();
1872 for (Attribute attribute : attributes){
1873 if (! attribute.getName().equalsIgnoreCase("class") ){
1874 if (! attribute.getName().equalsIgnoreCase("num")){
1875 logger.warn("Title has unhandled attribute " + attribute.getName());
1876 }else{
1877 //TODO num attribute in taxon
1878 }
1879 }else{
1880 String classValue = attribute.getValue();
1881 try {
1882 Rank rank;
1883 try {
1884 rank = Rank.getRankByNameOrAbbreviation(classValue);
1885 } catch (Exception e) {
1886 //TODO nc
1887 rank = Rank.getRankByEnglishName(classValue, NomenclaturalCode.ICBN, false);
1888 }
1889 taxon.getName().setRank(rank);
1890 if (rank.equals(Rank.FAMILY()) || rank.equals(Rank.GENUS())){
1891 handleGenus(element.getValue(), taxon.getName());
1892 }else if (rank.equals(Rank.SUBGENUS())){
1893 handleSubGenus(element.getValue(), taxon.getName());
1894 }else if (rank.equals(Rank.SECTION_BOTANY())){
1895 handleSection(element.getValue(), taxon.getName());
1896 }else if (rank.equals(Rank.SPECIES())){
1897 handleSpecies(element.getValue(), taxon.getName());
1898 }else if (rank.equals(Rank.SUBSPECIES())){
1899 handleSubSpecies(element.getValue(), taxon.getName());
1900 }else if (rank.equals(Rank.VARIETY())){
1901 handleVariety(element.getValue(), taxon.getName());
1902 }else{
1903 logger.warn("Unhandled rank: " + rank.getLabel());
1904 }
1905 } catch (UnknownCdmTypeException e) {
1906 logger.warn("Unknown rank " + classValue);
1907 unhandledTitleClassess.add(classValue);
1908 }
1909 }
1910 }
1911 List<Element> elements = element.getChildren();
1912 if (! elements.isEmpty()){
1913 logger.warn("Title has unexpected children");
1914 }
1915 UUID uuidTitle = EfloraTransformer.uuidTitle;
1916 ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "title", "title", "title");
1917 taxon.addExtension(element.getTextNormalize(), titleExtension);
1918
1919 }
1920
1921
1922 /**
1923 * @param value
1924 * @param taxonNameBase
1925 */
1926 private void handleSubGenus(String value, TaxonNameBase taxonNameBase) {
1927 String name = value.replace("Subgenus", "").trim();
1928 ((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1929 }
1930
1931 /**
1932 * @param value
1933 * @param taxonNameBase
1934 */
1935 private void handleSection(String value, TaxonNameBase taxonNameBase) {
1936 String name = value.replace("Section", "").trim();
1937 ((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1938 }
1939
1940 /**
1941 * @param value
1942 * @param taxonNameBase
1943 */
1944 private void handleSpecies(String value, TaxonNameBase taxonNameBase) {
1945 //do nothing
1946 }
1947
1948 /**
1949 * @param value
1950 * @param taxonNameBase
1951 */
1952 private void handleVariety(String value, TaxonNameBase taxonNameBase) {
1953 //do nothing
1954 }
1955
1956 /**
1957 * @param value
1958 * @param taxonNameBase
1959 */
1960 private void handleSubSpecies(String value, TaxonNameBase taxonNameBase) {
1961 //do nothing
1962 }
1963
1964
1965 private Pattern rexGenusAuthor = Pattern.compile("(\\[|\\().*(\\]|\\))");
1966
1967 /**
1968 * @param value
1969 * @param taxonNameBase
1970 */
1971 protected void handleGenus(String value, TaxonNameBase taxonName) {
1972 Matcher matcher = rexGenusAuthor.matcher(value);
1973 if (matcher.find()){
1974 String author = matcher.group();
1975 // String genus = value.replace(author, "");
1976 author = author.substring(1, author.length() - 1);
1977 Team team = Team.NewInstance();
1978 team.setTitleCache(author, true);
1979 Credit credit = Credit.NewInstance(team, null);
1980 taxonName.addCredit(credit);
1981 // NonViralName nvn = (NonViralName)taxonName;
1982 // nvn.setCombinationAuthorTeam(team);
1983 // nvn.setGenusOrUninomial(genus);
1984 }else{
1985 logger.info("No Author match for " + value);
1986 }
1987 }
1988
1989
1990 /**
1991 * @param taxon
1992 * @param lastTaxon
1993 */
1994 private void handleTaxonRelation(EfloraImportState state, Taxon taxon, Taxon lastTaxon) {
1995
1996 TaxonomicTree tree = getTree(state);
1997 if (lastTaxon == null){
1998 tree.addChildTaxon(taxon, null, null, null);
1999 return;
2000 }
2001 Rank thisRank = taxon.getName().getRank();
2002 Rank lastRank = lastTaxon.getName().getRank();
2003 if (lastTaxon.getTaxonNodes().size() > 0){
2004 TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
2005 if (thisRank.isLower(lastRank ) ){
2006 lastNode.addChildTaxon(taxon, null, null, null);
2007 fillMissingEpithetsForTaxa(lastTaxon, taxon);
2008 }else if (thisRank.equals(lastRank)){
2009 TaxonNode parent = lastNode.getParent();
2010 if (parent != null){
2011 parent.addChildTaxon(taxon, null, null, null);
2012 fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
2013 }else{
2014 tree.addChildTaxon(taxon, null, null, null);
2015 }
2016 }else if (thisRank.isHigher(lastRank)){
2017 handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2018 // TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2019 // parentNode.addChildTaxon(taxon, null, null, null);
2020 }
2021 }else{
2022 logger.warn("Last taxon has no node");
2023 }
2024 }
2025
2026
2027
2028 /**
2029 * @param state
2030 * @return
2031 */
2032 private TaxonomicTree getTree(EfloraImportState state) {
2033 TaxonomicTree result = state.getTree(null);
2034 if (result == null){
2035 UUID uuid = state.getConfig().getTaxonomicTreeUuid();
2036 if (uuid == null){
2037 logger.warn("No classification uuid is defined");
2038 result = getNewClassification(state);
2039 }else{
2040 result = getTaxonTreeService().getTaxonomicTreeByUuid(uuid);
2041 if (result == null){
2042 result = getNewClassification(state);
2043 result.setUuid(uuid);
2044 }
2045 }
2046 state.putTree(null, result);
2047 }
2048 return result;
2049 }
2050
2051
2052 private TaxonomicTree getNewClassification(EfloraImportState state) {
2053 TaxonomicTree result;
2054 result = TaxonomicTree.NewInstance(state.getConfig().getClassificationTitle());
2055 state.putTree(null, result);
2056 return result;
2057 }
2058
2059
2060 /**
2061 * @param state
2062 * @param taxon
2063 * @param value
2064 * @param feature
2065 * @return
2066 */
2067 private TextData addDescriptionElement(EfloraImportState state, Taxon taxon, String value, Feature feature, String references) {
2068 TextData textData = TextData.NewInstance(feature);
2069 Language textLanguage = getDefaultLanguage(state);
2070 textData.putText(value, textLanguage);
2071 TaxonDescription description = getDescription(taxon);
2072 description.addElement(textData);
2073 if (references != null){
2074 makeOriginalSourceReferences(textData, ";", references);
2075 }
2076 return textData;
2077 }
2078
2079 private Language getDefaultLanguage(EfloraImportState state) {
2080 UUID defaultLanguageUuid = state.getConfig().getDefaultLanguageUuid();
2081 if (defaultLanguageUuid != null){
2082 Language result = state.getDefaultLanguage();
2083 if (result == null || ! result.getUuid().equals(defaultLanguageUuid)){
2084 result = (Language)getTermService().find(defaultLanguageUuid);
2085 state.setDefaultLanguage(result);
2086 if (result == null){
2087 logger.warn("Default language for " + defaultLanguageUuid + " does not exist.");
2088 }
2089 }
2090 return result;
2091 }else{
2092 return Language.DEFAULT();
2093 }
2094 }
2095
2096
2097 /**
2098 * @param elNomenclature
2099 */
2100 private void verifyNoAttribute(Element element) {
2101 List<Attribute> attributes = element.getAttributes();
2102 if (! attributes.isEmpty()){
2103 logger.warn(element.getName() + " has unhandled attributes: " + attributes.get(0).getValue() + "..." );
2104 }
2105 }
2106
2107 /**
2108 * @param elNomenclature
2109 */
2110 protected void verifyNoChildren(Element element) {
2111 verifyNoChildren(element, false);
2112 }
2113
2114 /**
2115 * @param elNomenclature
2116 */
2117 private void verifyNoChildren(Element element, boolean ignoreLineBreak) {
2118 List<Element> children = element.getChildren();
2119 if (! children.isEmpty()){
2120 if (ignoreLineBreak == true){
2121 for (Element child : children){
2122 if (! child.getName().equalsIgnoreCase("BR")){
2123 logger.warn(element.getName() + " has unhandled child: " + child.getName());
2124 }
2125 }
2126 }else{
2127 logger.warn(element.getName() + " has unhandled children");
2128 }
2129 }
2130 }
2131
2132
2133
2134 /**
2135 * Parses the nomenclatural status from the references titleCache. If a nomenclatural status
2136 * exists it is added to the name and the nom. status part of the references title cache is
2137 * removed. Requires protected title cache.
2138 * @param ref
2139 * @param nonViralName
2140 */
2141 protected void parseNomStatus(ReferenceBase ref, NonViralName nonViralName) {
2142 String titleToParse = ref.getTitleCache();
2143
2144 String noStatusTitle = parser.parseNomStatus(titleToParse, nonViralName);
2145 if (! noStatusTitle.equals(titleToParse)){
2146 ref.setTitleCache(noStatusTitle, true);
2147 }
2148 }
2149
2150
2151 /**
2152 * Extracts the date published part and returns micro reference
2153 * @param ref
2154 * @return
2155 */
2156 private String parseReferenceYearAndDetail(ReferenceBase ref){
2157 String detailResult = null;
2158 String titleToParse = ref.getTitleCache();
2159 titleToParse = removeStartingSymbols(titleToParse, ref);
2160 String reReference = "^\\.{1,}";
2161 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2162 String oneMonth = "(Feb.|Dec.|March|June|July)";
2163 String reYear = oneMonth + "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2164 String secondYear = "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2165
2166 String reYearPeriod = "\\(" + reYear + "(\\-" + secondYear + ")?\\)";
2167 String reDetail = "\\.{1,10}$";
2168
2169 //pattern for the whole string
2170 Pattern patReference = Pattern.compile(/*reReference +*/ reYearPeriod /*+ reDetail */);
2171 Matcher matcher = patReference.matcher(titleToParse);
2172 if (matcher.find()){
2173 int start = matcher.start();
2174 int end = matcher.end();
2175
2176 //title and other information precedes the year part
2177 String title = titleToParse.substring(0, start).trim();
2178 //detail follows the year part
2179 String detail = titleToParse.substring(end).trim();
2180
2181 //time period
2182 String strPeriod = matcher.group().trim();
2183 strPeriod = strPeriod.substring(1, strPeriod.length()-1); //remove brackets
2184 Pattern patStartMonth = Pattern.compile("^" + oneMonth);
2185 matcher = patStartMonth.matcher(strPeriod);
2186 strPeriod = strPeriod.replace(" ", "");
2187 Integer startMonth = null;
2188 if (matcher.find()){
2189 end = matcher.end();
2190 strPeriod = strPeriod.substring(0, end) + " " + strPeriod.substring(end);
2191 startMonth = getMonth(strPeriod.substring(0, end));
2192 }
2193
2194 TimePeriod datePublished = TimePeriod.parseString(strPeriod);
2195 if (startMonth != null){
2196 datePublished.setStartMonth(startMonth);
2197 }
2198 ref.setDatePublished(datePublished);
2199 ref.setTitle(title);
2200 detailResult = CdmUtils.removeTrailingDot(detail);
2201 if (detailResult.endsWith(".") || detailResult.endsWith(";") || detailResult.endsWith(",") ){
2202 detailResult = detailResult.substring(0, detailResult.length() -1);
2203 }
2204 ref.setProtectedTitleCache(false);
2205 }else{
2206 logger.warn("Could not parse reference: " + titleToParse);
2207 }
2208 return detailResult;
2209
2210 }
2211
2212
2213
2214 private Integer getMonth(String month) {
2215 if (month.startsWith("Jan")){
2216 return 1;
2217 }else if (month.startsWith("Feb")){
2218 return 2;
2219 }else if (month.startsWith("Mar")){
2220 return 3;
2221 }else if (month.startsWith("Apr")){
2222 return 4;
2223 }else if (month.startsWith("May")){
2224 return 5;
2225 }else if (month.startsWith("Jun")){
2226 return 6;
2227 }else if (month.startsWith("Jul")){
2228 return 7;
2229 }else if (month.startsWith("Aug")){
2230 return 8;
2231 }else if (month.startsWith("Sep")){
2232 return 9;
2233 }else if (month.startsWith("Oct")){
2234 return 10;
2235 }else if (month.startsWith("Nov")){
2236 return 11;
2237 }else if (month.startsWith("Dec")){
2238 return 12;
2239 }else{
2240 logger.warn("Month not yet supported: " + month);
2241 return null;
2242 }
2243 }
2244
2245
2246 /* (non-Javadoc)
2247 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2248 */
2249 protected boolean isIgnore(EfloraImportState state){
2250 return ! state.getConfig().isDoTaxa();
2251 }
2252
2253 }