Eflora base classes
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / eflora / EfloraTaxonImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.eflora;
11
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.UUID;
19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern;
21
22 import org.apache.commons.lang.CharUtils;
23 import org.apache.commons.lang.StringUtils;
24 import org.apache.log4j.Logger;
25 import org.jdom.Attribute;
26 import org.jdom.Element;
27 import org.springframework.stereotype.Component;
28 import org.springframework.transaction.TransactionStatus;
29
30 import eu.etaxonomy.cdm.common.CdmUtils;
31 import eu.etaxonomy.cdm.common.ResultWrapper;
32 import eu.etaxonomy.cdm.common.XmlHelp;
33 import eu.etaxonomy.cdm.io.common.ICdmIO;
34 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
35 import eu.etaxonomy.cdm.io.eflora.UnmatchedLeads.UnmatchedLeadsKey;
36 import eu.etaxonomy.cdm.model.agent.Team;
37 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
38 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
39 import eu.etaxonomy.cdm.model.common.Annotation;
40 import eu.etaxonomy.cdm.model.common.AnnotationType;
41 import eu.etaxonomy.cdm.model.common.CdmBase;
42 import eu.etaxonomy.cdm.model.common.Credit;
43 import eu.etaxonomy.cdm.model.common.ExtensionType;
44 import eu.etaxonomy.cdm.model.common.ISourceable;
45 import eu.etaxonomy.cdm.model.common.Language;
46 import eu.etaxonomy.cdm.model.common.Marker;
47 import eu.etaxonomy.cdm.model.common.MarkerType;
48 import eu.etaxonomy.cdm.model.common.Representation;
49 import eu.etaxonomy.cdm.model.common.TimePeriod;
50 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
51 import eu.etaxonomy.cdm.model.description.Feature;
52 import eu.etaxonomy.cdm.model.description.FeatureNode;
53 import eu.etaxonomy.cdm.model.description.PolytomousKey;
54 import eu.etaxonomy.cdm.model.description.TaxonDescription;
55 import eu.etaxonomy.cdm.model.description.TextData;
56 import eu.etaxonomy.cdm.model.name.BotanicalName;
57 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
58 import eu.etaxonomy.cdm.model.name.NameRelationshipType;
59 import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
60 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
61 import eu.etaxonomy.cdm.model.name.NonViralName;
62 import eu.etaxonomy.cdm.model.name.Rank;
63 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
64 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
65 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
66 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
67 import eu.etaxonomy.cdm.model.occurrence.Specimen;
68 import eu.etaxonomy.cdm.model.reference.IBook;
69 import eu.etaxonomy.cdm.model.reference.IJournal;
70 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
71 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
72 import eu.etaxonomy.cdm.model.reference.ReferenceType;
73 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
74 import eu.etaxonomy.cdm.model.taxon.Taxon;
75 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
76 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
77 import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;
78 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
79 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
80
81
82 /**
83 * @author a.mueller
84 *
85 */
86 @Component
87 public class EfloraTaxonImport extends EfloraImportBase implements ICdmIO<EfloraImportState> {
88 private static final Logger logger = Logger.getLogger(EfloraTaxonImport.class);
89
90 private static int modCount = 30000;
91 private NonViralNameParserImpl parser = new NonViralNameParserImpl();
92
93 public EfloraTaxonImport(){
94 super();
95 }
96
97
98 @Override
99 public boolean doCheck(EfloraImportState state){
100 boolean result = true;
101 return result;
102 }
103
104 //TODO make part of state, but state is renewed when invoking the import a second time
105 private UnmatchedLeads unmatchedLeads;
106
107 @Override
108 public boolean doInvoke(EfloraImportState state){
109 logger.info("start make Taxa ...");
110
111 //FIXME reset state
112 state.putTree(null, null);
113 // UnmatchedLeads unmatchedLeads = state.getOpenKeys();
114 if (unmatchedLeads == null){
115 unmatchedLeads = UnmatchedLeads.NewInstance();
116 }
117 state.setUnmatchedLeads(unmatchedLeads);
118
119 TransactionStatus tx = startTransaction();
120 unmatchedLeads.saveToSession(getFeatureTreeService());
121
122
123 //TODO generally do not store the reference object in the config
124 ReferenceBase sourceReference = state.getConfig().getSourceReference();
125 getReferenceService().saveOrUpdate(sourceReference);
126
127 Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
128 ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
129
130 Element elbody= getBodyElement(state.getConfig());
131 List<Element> elTaxonList = elbody.getChildren();
132
133 int i = 0;
134
135 Set<String> unhandledTitleClassess = new HashSet<String>();
136 Set<String> unhandledNomeclatureChildren = new HashSet<String>();
137 Set<String> unhandledDescriptionChildren = new HashSet<String>();
138
139 Taxon lastTaxon = getLastTaxon(state);
140
141 //for each taxon
142 for (Element elTaxon : elTaxonList){
143 try {
144 if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
145 if (! elTaxon.getName().equalsIgnoreCase("taxon")){
146 logger.warn("body has element other than 'taxon'");
147 }
148
149 BotanicalName botanicalName = BotanicalName.NewInstance(Rank.SPECIES());
150 Taxon taxon = Taxon.NewInstance(botanicalName, state.getConfig().getSourceReference());
151
152 handleTaxonAttributes(elTaxon, taxon, state);
153
154
155 List<Element> children = elTaxon.getChildren();
156 handleTaxonElement(state, unhandledTitleClassess, unhandledNomeclatureChildren, unhandledDescriptionChildren, taxon, children);
157 handleTaxonRelation(state, taxon, lastTaxon);
158 lastTaxon = taxon;
159 taxaToSave.add(taxon);
160 state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
161
162 } catch (Exception e) {
163 logger.warn("Exception occurred in Sapindacea taxon import: " + e);
164 e.printStackTrace();
165 }
166
167 }
168
169 System.out.println(state.getUnmatchedLeads().toString());
170 logger.warn("There are taxa with attributes 'excluded' and 'dubious'");
171
172 logger.info("Children for nomenclature are: " + unhandledNomeclatureChildren);
173 logger.info("Children for description are: " + unhandledDescriptionChildren);
174 logger.info("Children for homotypes are: " + unhandledHomotypeChildren);
175 logger.info("Children for nom are: " + unhandledNomChildren);
176
177
178 //invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
179 logger.info(i + " taxa handled. Saving ...");
180 getTaxonService().saveOrUpdate(taxaToSave);
181 getFeatureTreeService().saveOrUpdateFeatureNodesAll(state.getFeatureNodesToSave());
182 state.getFeatureNodesToSave().clear();
183 commitTransaction(tx);
184
185 logger.info("end makeTaxa ...");
186 logger.info("start makeKey ...");
187 // invokeDoKey(state);
188 logger.info("end makeKey ...");
189
190 return success.getValue();
191 }
192
193
194 private void handleTaxonAttributes(Element elTaxon, Taxon taxon, EfloraImportState state) {
195 List<Attribute> attrList = elTaxon.getAttributes();
196 for (Attribute attr : attrList){
197 String attrName = attr.getName();
198 String attrValue = attr.getValue();
199 if ("class".equals(attrName)){
200 if (attrValue.equalsIgnoreCase("dubious") || attrValue.equalsIgnoreCase("DUBIOUS GENUS") || attrValue.equalsIgnoreCase("DOUBTFUL SPECIES") ){
201 taxon.setDoubtful(true);
202 }else{
203 MarkerType markerType = getMarkerType(state, attrValue);
204 if (markerType == null){
205 logger.warn("Class attribute value for taxon not yet supported: " + attrValue);
206 }else{
207 taxon.addMarker(Marker.NewInstance(markerType, true));
208 }
209 }
210 }else if ("num".equals(attrName)){
211 logger.warn("num not yet supported");
212 }else{
213 logger.warn("Attribute " + attrName + " not yet supported for element taxon");
214 }
215 }
216
217 }
218
219
220 private Taxon getLastTaxon(EfloraImportState state) {
221 if (state.getConfig().getLastTaxonUuid() == null){
222 return null;
223 }else{
224 return (Taxon)getTaxonService().find(state.getConfig().getLastTaxonUuid());
225 }
226 }
227
228
229 // private void invokeDoKey(SapindaceaeImportState state) {
230 // TransactionStatus tx = startTransaction();
231 //
232 // Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
233 // ITaxonService taxonService = getTaxonService();
234 // ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
235 //
236 // Element elbody= getBodyElement(state.getConfig());
237 // List<Element> elTaxonList = elbody.getChildren();
238 //
239 // int i = 0;
240 //
241 // //for each taxon
242 // for (Element elTaxon : elTaxonList){
243 // if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
244 // if (! elTaxon.getName().equalsIgnoreCase("taxon")){
245 // continue;
246 // }
247 //
248 // List<Element> children = elTaxon.getChildren("key");
249 // for (Element element : children){
250 // handleKeys(state, element, null);
251 // }
252 // nodesToSave.add(taxon);
253 //
254 // }
255 //
256 // }
257
258
259 // body/taxon/*
260 private void handleTaxonElement(EfloraImportState state, Set<String> unhandledTitleClassess, Set<String> unhandledNomeclatureChildren, Set<String> unhandledDescriptionChildren, Taxon taxon, List<Element> children) {
261 AnnotatableEntity lastEntity = null;
262 for (Element element : children){
263 String elName = element.getName();
264
265 if (elName.equalsIgnoreCase("title")){
266 handleTitle(state, element, taxon, unhandledTitleClassess);
267 lastEntity = null;
268 }else if(elName.equalsIgnoreCase("nomenclature")){
269 handleNomenclature(state, element, taxon, unhandledNomeclatureChildren);
270 lastEntity = null;
271 }else if(elName.equalsIgnoreCase("description")){
272 handleDescription(state, element, taxon, unhandledDescriptionChildren);
273 lastEntity = null;
274 }else if(elName.equalsIgnoreCase("habitatecology")){
275 lastEntity = handleEcology(state, element, taxon);
276 }else if(elName.equalsIgnoreCase("distribution")){
277 lastEntity = handleDistribution(state, element, taxon);
278 }else if(elName.equalsIgnoreCase("uses")){
279 lastEntity = handleUses(state, element, taxon);
280 }else if(elName.equalsIgnoreCase("notes")){
281 lastEntity = handleTaxonNotes(state, element, taxon);
282 }else if(elName.equalsIgnoreCase("chromosomes")){
283 lastEntity = handleChromosomes(state, element, taxon);
284 }else if(elName.equalsIgnoreCase("key")){
285 lastEntity = handleKeys(state, element, taxon);
286 }else if(elName.equalsIgnoreCase("references")){
287 handleReferences(state, element, taxon, lastEntity);
288 lastEntity = null;
289 }else if(elName.equalsIgnoreCase("taxon")){
290 logger.warn("A taxon should not be part of a taxon");
291 }else if(elName.equalsIgnoreCase("homotypes")){
292 logger.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
293 }else{
294 logger.warn("Unexpected child for taxon: " + elName);
295 }
296 }
297 }
298
299
300 private void handleReferences(EfloraImportState state, Element elReferences, Taxon taxon, AnnotatableEntity lastEntity) {
301 verifyNoAttribute(elReferences);
302 verifyNoChildren(elReferences, true);
303 String refString = elReferences.getTextNormalize();
304 // refString = replaceStart(replaceStart(refString, "References:"), "Sources:");
305 if (lastEntity == null){
306 logger.warn("No last entity defined: " + refString);
307 return;
308 }
309
310 Annotation annotation = Annotation.NewInstance(refString, AnnotationType.EDITORIAL(), Language.DEFAULT());
311 lastEntity.addAnnotation(annotation);
312
313 // ReferenceBase ref = ReferenceFactory.newGeneric();
314 // ref.setTitleCache(refString, true);
315 // if (lastEntity instanceof DescriptionElementBase){
316 // DescriptionElementSource source = DescriptionElementSource.NewInstance(ref, null);
317 // CdmBase.deproxy(lastEntity, DescriptionElementBase.class).addSource(source);
318 // }else if (lastEntity instanceof IdentifiableEntity){
319 // IdentifiableSource source = IdentifiableSource.NewInstance(ref, null);
320 // CdmBase.deproxy(lastEntity, IdentifiableEntity.class).addSource(source);
321 // }
322 // else{
323 // logger.warn("lastEntity type not supported: " + lastEntity.getClass().getName());
324 // }
325
326 logger.info("References need to be moved to their parent");
327
328 }
329
330
331 private PolytomousKey handleKeys(EfloraImportState state, Element elKey, Taxon taxon) {
332 UnmatchedLeads openKeys = state.getUnmatchedLeads();
333
334 //title
335 String title = makeKeyTitle(elKey);
336
337 //key
338 PolytomousKey key = PolytomousKey.NewTitledInstance(title);
339
340 //TODO add covered taxa etc.
341 verifyNoAttribute(elKey);
342
343 //notes
344 makeKeyNotes(elKey, key);
345
346 //keycouplets
347 List<Element> keychoices = new ArrayList<Element>();
348 keychoices.addAll(elKey.getChildren("keycouplet"));
349 keychoices.addAll(elKey.getChildren("keychoice"));
350
351
352 for (Element elKeychoice : keychoices){
353 handleKeyChoices(state, openKeys, key, elKeychoice, taxon);
354 elKey.removeContent(elKeychoice);
355 }
356
357 //
358 verifyNoChildren(elKey);
359 logger.info("Unmatched leads after key handling:" + openKeys.toString());
360
361
362 if (state.getConfig().isDoPrintKeys()){
363 key.print(System.err);
364 }
365 getFeatureTreeService().save(key);
366 return key;
367 }
368
369
370 /**
371 * @param state
372 * @param elKey
373 * @param openKeys
374 * @param key
375 * @param elKeychoice
376 * @param taxon
377 */
378 private void handleKeyChoices(EfloraImportState state, UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, Taxon taxon) {
379
380 //char Attribute
381 Feature feature = handleKeychoiceChar(state, elKeychoice);
382
383 //lead
384 List<FeatureNode> childNodes = handleKeychoiceLeads(state, key, elKeychoice, taxon, feature);
385
386 //num -> match with unmatched leads
387 handleKeychoiceNum(openKeys, key, elKeychoice, childNodes);
388
389 //others
390 verifyNoAttribute(elKeychoice);
391 }
392
393
394 /**
395 * @param openKeys
396 * @param key
397 * @param elKeychoice
398 * @param childNodes
399 */
400 private void handleKeychoiceNum(UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, List<FeatureNode> childNodes) {
401 Attribute numAttr = elKeychoice.getAttribute("num");
402 String num = CdmUtils.removeTrailingDot(numAttr == null? "":numAttr.getValue());
403 UnmatchedLeadsKey okk = UnmatchedLeadsKey.NewInstance(key, num);
404 Set<FeatureNode> matchingNodes = openKeys.getNodes(okk);
405 for (FeatureNode matchingNode : matchingNodes){
406 for (FeatureNode childNode : childNodes){
407 matchingNode.addChild(childNode);
408 }
409 openKeys.removeNode(okk, matchingNode);
410 }
411 if (matchingNodes.isEmpty()){
412 for (FeatureNode childNode : childNodes){
413 key.getRoot().addChild(childNode);
414 }
415 }
416
417 elKeychoice.removeAttribute("num");
418 }
419
420
421 /**
422 * @param state
423 * @param key
424 * @param elKeychoice
425 * @param taxon
426 * @param feature
427 * @return
428 */
429 private List<FeatureNode> handleKeychoiceLeads( EfloraImportState state, PolytomousKey key, Element elKeychoice, Taxon taxon, Feature feature) {
430 List<FeatureNode> childNodes = new ArrayList<FeatureNode>();
431 List<Element> leads = elKeychoice.getChildren("lead");
432 for(Element elLead : leads){
433 FeatureNode childNode = handleLead(state, key, elLead, taxon, feature);
434 childNodes.add(childNode);
435 }
436 return childNodes;
437 }
438
439
440 /**
441 * @param state
442 * @param elKeychoice
443 * @return
444 */
445 private Feature handleKeychoiceChar(EfloraImportState state, Element elKeychoice) {
446 Feature feature = null;
447 Attribute charAttr = elKeychoice.getAttribute("char");
448 if (charAttr != null){
449 String charStr = charAttr.getValue();
450 feature = getFeature(charStr, state);
451 elKeychoice.removeAttribute("char");
452 }
453 return feature;
454 }
455
456
457 private FeatureNode handleLead(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, Feature feature) {
458 FeatureNode node = FeatureNode.NewInstance();
459 node.setFeature(feature);
460
461 //text
462 String text = handleLeadText(elLead, node);
463
464 //num
465 handleLeadNum(elLead, text);
466
467 //goto
468 handleLeadGoto(state, key, elLead, taxon, node);
469
470 //others
471 verifyNoAttribute(elLead);
472
473 return node;
474 }
475
476
477 /**
478 * @param elLead
479 * @param node
480 * @return
481 */
482 private String handleLeadText(Element elLead, FeatureNode node) {
483 String text = elLead.getAttributeValue("text").trim();
484 if (StringUtils.isBlank(text)){
485 logger.warn("Empty text in lead");
486 }
487 elLead.removeAttribute("text");
488 node.addQuestion(Representation.NewInstance(text, null, null, Language.DEFAULT()));
489 return text;
490 }
491
492
493 /**
494 * @param state
495 * @param key
496 * @param elLead
497 * @param taxon
498 * @param node
499 */
500 private void handleLeadGoto(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, FeatureNode node) {
501 Attribute gotoAttr = elLead.getAttribute("goto");
502 if (gotoAttr != null){
503 String strGoto = gotoAttr.getValue().trim();
504 //create key
505 UnmatchedLeadsKey gotoKey = null;
506 if (isInternalNode(strGoto)){
507 gotoKey = UnmatchedLeadsKey.NewInstance(key, strGoto);
508 }else{
509 String taxonKey = makeTaxonKey(strGoto, taxon);
510 gotoKey = UnmatchedLeadsKey.NewInstance(taxonKey);
511 }
512 //
513 UnmatchedLeads openKeys = state.getUnmatchedLeads();
514 openKeys.addKey(gotoKey, node);
515 if (gotoKey.isInnerLead()){
516 Set<FeatureNode> existingNodes = openKeys.getNodes(gotoKey);
517 for (FeatureNode existingNode : existingNodes){
518 node.addChild(existingNode);
519 }
520 }
521 //remove attribute (need for consistency check)
522 elLead.removeAttribute("goto");
523 }else{
524 logger.warn("lead has no goto attribute");
525 }
526 }
527
528
529 /**
530 * @param elLead
531 * @param text
532 */
533 private void handleLeadNum(Element elLead, String text) {
534 Attribute numAttr = elLead.getAttribute("num");
535 if (numAttr != null){
536 //TODO num
537 String num = numAttr.getValue();
538 elLead.removeAttribute("num");
539 }else{
540 logger.info("Keychoice has no num attribute: " + text);
541 }
542 }
543
544
545 private String makeTaxonKey(String strGoto, Taxon taxon) {
546 String result = "";
547 if (strGoto == null){
548 return "";
549 }
550 String strGenusName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getGenusOrUninomial();
551 strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", ""); //replace all brackets
552 strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
553
554 strGoto = strGoto.trim();
555 String[] split = strGoto.split("\\s");
556 for (int i = 0; i<split.length; i++){
557 String single = split[i];
558 if (isGenusAbbrev(single, strGenusName)){
559 split[i] = strGenusName;
560 }
561 // if (isInfraSpecificMarker(single)){
562 // String strSpeciesName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getSpecificEpithet();
563 // split[i] = strGenusName + " " + strSpeciesName + " ";
564 // }
565 result = (result + " " + split[i]).trim();
566 }
567 return result;
568 }
569
570
571 private boolean isInfraSpecificMarker(String single) {
572 try {
573 if (Rank.getRankByAbbreviation(single).isInfraSpecific()){
574 return true;
575 }
576 } catch (UnknownCdmTypeException e) {
577 return false;
578 }
579 return false;
580 }
581
582
583 private boolean isGenusAbbrev(String single, String strGenusName) {
584 if (! single.matches("[A-Z]\\.?")) {
585 return false;
586 }else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
587 return false;
588 }else{
589 return single.charAt(0) == strGenusName.charAt(0);
590 }
591 }
592
593
594 private boolean isInternalNode(String strGoto) {
595 return CdmUtils.isNumeric(strGoto);
596 }
597
598
599 private void makeKeyNotes(Element keyElement, PolytomousKey key) {
600 Element elNotes = keyElement.getChild("notes");
601 if (elNotes != null){
602 keyElement.removeContent(elNotes);
603 String notes = elNotes.getTextNormalize();
604 if (StringUtils.isNotBlank(notes)){
605 key.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
606 }
607 }
608 }
609
610
611 private String makeKeyTitle(Element keyElement) {
612 String title = "- no title - ";
613 Attribute titleAttr = keyElement.getAttribute("title");
614 keyElement.removeAttribute(titleAttr);
615 if (titleAttr == null){
616 Element elTitle = keyElement.getChild("keytitle");
617 keyElement.removeContent(elTitle);
618 if (elTitle != null){
619 title = elTitle.getTextNormalize();
620 }
621 }else{
622 title = titleAttr.getValue();
623 }
624 return title;
625 }
626
627
628 /**
629 * @param state
630 * @param element
631 * @param taxon
632 */
633 private TextData handleChromosomes(EfloraImportState state, Element element, Taxon taxon) {
634 Feature chromosomeFeature = getFeature("chromosomes", state);
635 verifyNoAttribute(element);
636 verifyNoChildren(element);
637 String value = element.getTextNormalize();
638 value = replaceStart(value, "Chromosomes");
639 String chromosomesPart = getChromosomesPart(value);
640 String references = value.replace(chromosomesPart, "").trim();
641 chromosomesPart = chromosomesPart.replace(":", "").trim();
642 return addDescriptionElement(taxon, chromosomesPart, chromosomeFeature, references);
643 }
644
645
646 /**
647 * @param ref
648 * @param string
649 * @return
650 */
651 private void makeOriginalSourceReferences(ISourceable sourcable, String splitter, String refAll) {
652 String[] splits = refAll.split(splitter);
653 for (String strRef: splits){
654 ReferenceBase ref = ReferenceFactory.newGeneric();
655 ref.setTitleCache(strRef, true);
656 String refDetail = parseReferenceYearAndDetail(ref);
657 sourcable.addSource(null, null, ref, refDetail);
658 }
659
660
661 //TODO use regex instead
662 /* String detailResult = null;
663 String titleToParse = ref.getTitleCache();
664 String reReference = "^\\.{1,}";
665 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
666 String reYear = "\\([1-2]{1}[0-9]{3}\\)";
667 String reYearPeriod = reYear + "(-" + reYear + ")+";
668 String reDetail = "\\.{1,10}$";
669 */
670 }
671
672
673 /**
674 * @param value
675 * @return
676 */
677 private String getChromosomesPart(String str) {
678 Pattern pattern = Pattern.compile("2n\\s*=\\s*\\d{1,2}:");
679 Matcher matcher = pattern.matcher(str);
680 if (matcher.find()){
681 return matcher.group(0);
682 }else{
683 logger.warn("Chromosomes could not be parsed: " + str);
684 }
685 return str;
686 }
687
688
689 /**
690 * @param state
691 * @param element
692 * @param taxon
693 */
694 private TextData handleTaxonNotes(EfloraImportState state, Element element, Taxon taxon) {
695 TextData result = null;
696 verifyNoChildren(element, true);
697 //verifyNoAttribute(element);
698 List<Attribute> attributes = element.getAttributes();
699 for (Attribute attribute : attributes){
700 if (! attribute.getName().equalsIgnoreCase("class")){
701 logger.warn("Char has unhandled attribute " + attribute.getName());
702 }else{
703 String classValue = attribute.getValue();
704 result = handleDescriptiveElement(state, element, taxon, classValue);
705 }
706 }
707 //if no class attribute exists, handle as note
708 if (attributes.isEmpty()){
709 result = handleDescriptiveElement(state, element, taxon, "Note");
710 }
711
712 //Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
713 //taxon.addAnnotation(annotation);
714 return result; //annotation;
715 }
716
717
718 /**
719 * @param state
720 * @param element
721 * @param taxon
722 * @param result
723 * @param attribute
724 * @return
725 */
726 private TextData handleDescriptiveElement(EfloraImportState state,
727 Element element, Taxon taxon, String classValue) {
728 TextData result = null;
729 Feature feature = getFeature(classValue, state);
730 if (feature == null){
731 logger.warn("Unhandled feature: " + classValue);
732 }else{
733 String value = element.getValue();
734 value = replaceStart(value, "Notes");
735 value = replaceStart(value, "Note");
736 result = addDescriptionElement(taxon, value, feature, null);
737 }
738 return result;
739 }
740
741
742 private void removeBr(Element element) {
743 element.removeChildren("Br");
744 element.removeChildren("br");
745 element.removeChildren("BR");
746 }
747
748
749 /**
750 * @param state
751 * @param element
752 * @param taxon
753 */
754 private TextData handleUses(EfloraImportState state, Element element, Taxon taxon) {
755 verifyNoAttribute(element);
756 verifyNoChildren(element, true);
757 String value = element.getTextNormalize();
758 value = replaceStart(value, "Uses");
759 Feature feature = Feature.USES();
760 return addDescriptionElement(taxon, value, feature, null);
761
762 }
763
764
765 /**
766 * @param state
767 * @param element
768 * @param taxon
769 * @param unhandledDescriptionChildren
770 */
771 private DescriptionElementBase handleDistribution(EfloraImportState state, Element element, Taxon taxon) {
772 verifyNoAttribute(element);
773 verifyNoChildren(element, true);
774 String value = element.getTextNormalize();
775 value = replaceStart(value, "Distribution");
776 Feature feature = Feature.DISTRIBUTION();
777 //distribution parsing almost impossible as there is lots of freetext in the distribution tag
778 return addDescriptionElement(taxon, value, feature, null);
779 }
780
781
782 /**
783 * @param state
784 * @param element
785 * @param taxon
786 * @param unhandledDescriptionChildren
787 */
788 private TextData handleEcology(EfloraImportState state, Element elEcology, Taxon taxon) {
789 verifyNoAttribute(elEcology);
790 verifyNoChildren(elEcology, true);
791 String value = elEcology.getTextNormalize();
792 Feature feature = Feature.ECOLOGY();
793 if (value.startsWith("Habitat & Ecology")){
794 feature = getFeature("Habitat & Ecology", state);
795 value = replaceStart(value, "Habitat & Ecology");
796 }else if (value.startsWith("Habitat")){
797 value = replaceStart(value, "Habitat");
798 feature = getFeature("Habitat", state);
799 }
800 return addDescriptionElement(taxon, value, feature, null);
801 }
802
803
804
805 /**
806 * @param value
807 * @param replacementString
808 */
809 private String replaceStart(String value, String replacementString) {
810 if (value.startsWith(replacementString) ){
811 value = value.substring(replacementString.length()).trim();
812 }
813 if (value.startsWith("-") ){
814 value = value.substring("-".length()).trim();
815 }
816 return value;
817 }
818
819
820
821 /**
822 * @param state
823 * @param element
824 * @param taxon
825 * @param unhandledNomeclatureChildren
826 */
827 private void handleNomenclature(EfloraImportState state, Element elNomenclature, Taxon taxon, Set<String> unhandledChildren) {
828 verifyNoAttribute(elNomenclature);
829
830 List<Element> elements = elNomenclature.getChildren();
831 for (Element element : elements){
832 if (element.getName().equals("homotypes")){
833 handleHomotypes(state, element, taxon);
834 }else if (element.getName().equals("notes")){
835 handleNomenclatureNotes(state, element, taxon);
836 }else{
837 unhandledChildren.add(element.getName());
838 }
839 }
840
841 }
842
843
844
845 private void handleNomenclatureNotes(EfloraImportState state, Element elNotes, Taxon taxon) {
846 verifyNoAttribute(elNotes);
847 verifyNoChildren(elNotes);
848 String notesText = elNotes.getTextNormalize();
849 Annotation annotation = Annotation.NewInstance(notesText, AnnotationType.EDITORIAL(), Language.DEFAULT());
850 taxon.addAnnotation(annotation);
851 }
852
853
854
855 private static Set<String> unhandledHomotypeChildren = new HashSet<String>();
856 /**
857 * @param state
858 * @param element
859 * @param taxon
860 */
861 private void handleHomotypes(EfloraImportState state, Element elHomotypes, Taxon taxon) {
862 verifyNoAttribute(elHomotypes);
863
864 List<Element> elements = elHomotypes.getChildren();
865 HomotypicalGroup homotypicalGroup = null;
866 for (Element element : elements){
867 if (element.getName().equals("nom")){
868 homotypicalGroup = handleNom(state, element, taxon, homotypicalGroup);
869 }else{
870 unhandledHomotypeChildren.add(element.getName());
871 }
872 }
873
874 }
875
876 private static Set<String> unhandledNomChildren = new HashSet<String>();
877
878 /**
879 * @param state
880 * @param element
881 * @param taxon
882 */
883 private HomotypicalGroup handleNom(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
884 List<Attribute> attributes = elNom.getAttributes();
885
886 boolean taxonBaseClassType = false;
887 for (Attribute attribute : attributes){
888 if (! attribute.getName().equalsIgnoreCase("class")){
889 logger.warn("Nom has unhandled attribute " + attribute.getName());
890 }else{
891 String classValue = attribute.getValue();
892 if (classValue.equalsIgnoreCase("acceptedname")){
893 homotypicalGroup = handleNomTaxon(state, elNom, taxon,homotypicalGroup, false);
894 taxonBaseClassType = true;
895 }else if (classValue.equalsIgnoreCase("synonym")){
896 homotypicalGroup = handleNomTaxon(state, elNom, taxon, homotypicalGroup, true);
897 taxonBaseClassType = true;
898 }else if (classValue.equalsIgnoreCase("typeref")){
899 handleTypeRef(state, elNom, taxon, homotypicalGroup);
900 }else{
901 logger.warn("Unhandled class value for nom: " + classValue);
902 }
903
904 }
905 }
906
907 List<Element> elements = elNom.getChildren();
908 for (Element element : elements){
909 if (element.getName().equals("name")){
910 if (taxonBaseClassType == false){
911 logger.warn("Name tag not allowed in non taxon nom tag");
912 }
913 }else{
914 unhandledNomChildren.add(element.getName());
915 }
916 }
917
918 return homotypicalGroup;
919
920 }
921
922 /**
923 * @param state
924 * @param elNom
925 * @param taxon
926 * @param homotypicalGroup
927 */
928 private void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
929 verifyNoChildren(elNom);
930 String typeRef = elNom.getTextNormalize();
931 typeRef = replaceStart(typeRef, "-");
932 typeRef = replaceStart(typeRef, "\97");
933 typeRef = replaceStart(typeRef, "\u002d");
934 typeRef = replaceStart(typeRef, "\u2013");
935
936 String[] split = typeRef.split(":");
937 if (split.length < 2){
938 logger.warn("typeRef has no ':' : " + typeRef);
939 }else if (split.length > 2){
940 logger.warn("typeRef has more than 1 ':' : " + typeRef);
941 }else{
942 StringBuffer typeType = new StringBuffer(split[0]);
943 String typeText = split[1].trim();
944 TypeDesignationBase typeDesignation = getTypeDesignationAndReference(typeType);
945
946 //Name Type Desitnations
947 if (typeDesignation instanceof NameTypeDesignation){
948 makeNameTypeDesignations(typeType, typeText, typeDesignation);
949 }
950 //SpecimenTypeDesignations
951 else if (typeDesignation instanceof SpecimenTypeDesignation){
952 makeSpecimenTypeDesignation(typeType, typeText, typeDesignation);
953 }else{
954 logger.error("Unhandled type designation class" + typeDesignation.getClass().getName());
955 }
956 for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
957 name.addTypeDesignation(typeDesignation, true);
958 }
959 }
960 }
961
962 /**
963 * @param typeType
964 * @param typeText
965 * @param typeDesignation
966 */
967 private void makeNameTypeDesignations(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
968 if (typeType.toString().trim().equalsIgnoreCase("Type")){
969 //do nothing
970 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
971 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
972 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype")){
973 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
974 }else{
975 logger.warn("Unhandled type string: " + typeType + "(" + CharUtils.unicodeEscaped(typeType.charAt(0)) + ")");
976 }
977 //clean
978 typeText = cleanNameType(typeText);
979 //create name
980 BotanicalName nameType = (BotanicalName)parser.parseFullName(typeText, NomenclaturalCode.ICBN, Rank.SPECIES());
981 ((NameTypeDesignation) typeDesignation).setTypeName(nameType);
982 //TODO wie können NameTypes den Namen zugeordnet werden? - wird aber vom Portal via NameCache matching gemacht
983 }
984
985
986 private String cleanNameType(String typeText) {
987 String result;
988 String[] split = typeText.split("\\[.*\\].?");
989 result = split[0];
990 return result;
991 }
992
993
994 /**
995 * @param typeType
996 * @param typeText
997 * @param typeDesignation
998 */
999 private void makeSpecimenTypeDesignation(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1000 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1001 //do nothing
1002 }else if (typeType.toString().trim().equalsIgnoreCase("Neotype") || typeType.toString().trim().equalsIgnoreCase("Neotypes")){
1003 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.NEOTYPE());
1004 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype") || typeType.toString().trim().equalsIgnoreCase("Syntypes")){
1005 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1006 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1007 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1008 }else if (typeType.toString().trim().equalsIgnoreCase("Paratype")){
1009 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.PARATYPE());
1010 }else{
1011 logger.warn("Unhandled type string: " + typeType);
1012 }
1013 Specimen specimen = Specimen.NewInstance();
1014 if (typeText.length() > 255){
1015 specimen.setTitleCache(typeText.substring(0, 252) + "...", true);
1016 }else{
1017 specimen.setTitleCache(typeText, true);
1018 }
1019 specimen.addDefinition(typeText, Language.ENGLISH());
1020 ((SpecimenTypeDesignation) typeDesignation).setTypeSpecimen(specimen);
1021 }
1022
1023 private TypeDesignationBase getTypeDesignationAndReference(StringBuffer typeType) {
1024 TypeDesignationBase result;
1025 ReferenceBase ref = parseTypeDesignationReference(typeType);
1026 if (typeType.indexOf(" species")>-1 || typeType.indexOf("genus")>-1){
1027 if (typeType.indexOf(" species")>-1 ){
1028 result = NameTypeDesignation.NewInstance();
1029 int start = typeType.indexOf(" species");
1030 typeType.replace(start, start + " species".length(), "");
1031 }else {
1032 result = NameTypeDesignation.NewInstance();
1033 int start = typeType.indexOf(" genus");
1034 typeType.replace(start, start + " genus".length(), "");
1035 }
1036 }else{
1037 result = SpecimenTypeDesignation.NewInstance();
1038 }
1039 result.setCitation(ref);
1040 return result;
1041 }
1042
1043
1044 private ReferenceBase parseTypeDesignationReference(StringBuffer typeType) {
1045 ReferenceBase result = null;
1046 String reBracketReference = "\\(.*\\)";
1047 Pattern patBracketReference = Pattern.compile(reBracketReference);
1048 Matcher matcher = patBracketReference.matcher(typeType);
1049 if (matcher.find()){
1050 String refString = matcher.group();
1051 int start = typeType.indexOf(refString);
1052 typeType.replace(start, start + refString.length(), "");
1053 refString = refString.replace("(", "").replace(")", "").trim();
1054 ReferenceBase ref = ReferenceFactory.newGeneric();
1055 ref.setTitleCache(refString, true);
1056 result = ref;
1057 }
1058 return result;
1059 }
1060
1061
1062 /**
1063 * @param state
1064 * @param elNom
1065 * @param taxon
1066 */
1067 //body/taxon/
1068 private HomotypicalGroup handleNomTaxon(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1069 NonViralName name = makeName(taxon, homotypicalGroup, isSynonym);
1070 String num = null;
1071
1072 boolean hasGenusInfo = false;
1073 TeamOrPersonBase lastTeam = null;
1074
1075 //genus
1076 List<Element> elGenus = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "genus");
1077 if (elGenus.size() > 0){
1078 hasGenusInfo = true;
1079 }else{
1080 logger.debug ("No Synonym Genus");
1081 }
1082 //infra rank -> needed to handle authors correctly
1083 List<Element> elInfraRank = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "infrank");
1084 Rank infraRank = null;
1085 infraRank = handleInfRank(name, elInfraRank, infraRank);
1086
1087 //get left over elements
1088 List<Element> elements = elNom.getChildren();
1089 elements.removeAll(elInfraRank);
1090
1091 for (Element element : elements){
1092 if (element.getName().equals("name")){
1093 String classValue = element.getAttributeValue("class");
1094 String value = element.getValue().trim();
1095 if (classValue.equalsIgnoreCase("genus") || classValue.equalsIgnoreCase("family") ){
1096 name.setGenusOrUninomial(value);
1097 }else if (classValue.equalsIgnoreCase("family") ){
1098 name.setGenusOrUninomial(value);
1099 name.setRank(Rank.FAMILY());
1100 }else if (classValue.equalsIgnoreCase("subgenus")){
1101 //name.setInfraGenericEpithet(value);
1102 name.setNameCache(value.replace(":", "").trim());
1103 name.setRank(Rank.SUBGENUS());
1104 }else if (classValue.equalsIgnoreCase("epithet") ){
1105 if (hasGenusInfo == true){
1106 name.setSpecificEpithet(value);
1107 }else{
1108 handleInfraspecificEpithet(element, classValue, name);
1109 }
1110 }else if (classValue.equalsIgnoreCase("author")){
1111 handleNameAuthors(element, name);
1112 }else if (classValue.equalsIgnoreCase("paraut")){
1113 handleBasionymAuthor(state, element, name, false);
1114 }else if (classValue.equalsIgnoreCase("infrauthor") || classValue.equalsIgnoreCase("infraut")){
1115 handleInfrAuthor(state, element, name, true);
1116 }else if (classValue.equalsIgnoreCase("infrapar") || classValue.equalsIgnoreCase("infrpar") || classValue.equalsIgnoreCase("parauthor") ){
1117 handleBasionymAuthor(state, element, name, true);
1118 }else if (classValue.equalsIgnoreCase("infrepi")){
1119 handleInfrEpi(name, infraRank, value);
1120 }else if (classValue.equalsIgnoreCase("pub")){
1121 lastTeam = handleNomenclaturalReference(name, value);
1122 }else if (classValue.equalsIgnoreCase("usage")){
1123 lastTeam = handleNameUsage(taxon, name, value, lastTeam);
1124 }else if (classValue.equalsIgnoreCase("note")){
1125 handleNameNote(name, value);
1126 }else if (classValue.equalsIgnoreCase("num")){
1127 if (num != null){
1128 logger.warn("Duplicate num: " + value);
1129 }else{
1130 num = value;
1131 }
1132 if (isSynonym == true){
1133 logger.warn("Synonym should not have a num");
1134 }
1135 }else if (classValue.equalsIgnoreCase("typification")){
1136 logger.warn("Typification should not be a nom class");
1137 }else{
1138 logger.warn("Unhandled name class: " + classValue);
1139 }
1140 }else if(element.getName().equals("homonym")){
1141 handleHomonym(element, name);
1142 }else{
1143 // child element is not "name"
1144 unhandledNomChildren.add(element.getName());
1145 }
1146 }
1147
1148 //handle key
1149 if (! isSynonym){
1150 String taxonString = name.getNameCache();
1151 //try to find matching lead nodes
1152 UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, taxonString);
1153 Set<FeatureNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
1154 //same without using the num
1155 if (num != null){
1156 UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", taxonString);
1157 handleMatchingNodes(state, taxon, noNumLeadsKey);
1158 }
1159 if (matchingNodes.isEmpty() && num != null){
1160 logger.warn("Taxon has num but no matching nodes exist: " + num+ ", Key: " + leadsKey.toString());
1161 }
1162 }
1163
1164 //test nom element has no text
1165 if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("\97", "").replace("\u002d","").replace("\u2013", ""))){
1166 String strElNom = elNom.getTextNormalize();
1167 if ("?".equals(strElNom)){
1168 handleQuestionMark(name, taxon);
1169 }
1170 // Character c = strElNom.charAt(0);
1171 //System.out.println(CharUtils.unicodeEscaped(c));
1172 logger.warn("Nom tag has text: " + strElNom);
1173 }
1174
1175 return name.getHomotypicalGroup();
1176 }
1177
1178
1179 private void handleQuestionMark(NonViralName name, Taxon taxon) {
1180 int count = name.getTaxonBases().size();
1181 if (count != 1){
1182 logger.warn("Name has " + count + " taxa. This is not handled for question mark");
1183 }else{
1184 TaxonBase taxonBase = (TaxonBase)name.getTaxonBases().iterator().next();
1185 taxonBase.setDoubtful(true);
1186 }
1187 }
1188
1189
1190 //merge with handleNomTaxon
1191 private void handleHomonym(Element elHomonym, NonViralName upperName) {
1192 verifyNoAttribute(elHomonym);
1193
1194 //hommonym name
1195 BotanicalName homonymName = BotanicalName.NewInstance(upperName.getRank());
1196 homonymName.setGenusOrUninomial(upperName.getGenusOrUninomial());
1197 homonymName.setInfraGenericEpithet(upperName.getInfraGenericEpithet());
1198 homonymName.setSpecificEpithet(upperName.getSpecificEpithet());
1199 homonymName.setInfraSpecificEpithet(upperName.getInfraSpecificEpithet());
1200
1201 for (Element elName : (List<Element>)elHomonym.getChildren("name")){
1202 String classValue = elName.getAttributeValue("class");
1203 String value = elName.getValue().trim();
1204 if (classValue.equalsIgnoreCase("genus") ){
1205 homonymName.setGenusOrUninomial(value);
1206 }else if (classValue.equalsIgnoreCase("epithet") ){
1207 homonymName.setSpecificEpithet(value);
1208 }else if (classValue.equalsIgnoreCase("author")){
1209 handleNameAuthors(elName, homonymName);
1210 }else if (classValue.equalsIgnoreCase("pub")){
1211 handleNomenclaturalReference(homonymName, value);
1212 }else if (classValue.equalsIgnoreCase("note")){
1213 handleNameNote(homonymName, value);
1214 }else{
1215 logger.warn("Unhandled class value: " + classValue);
1216 }
1217 }
1218 //TODO verify other information
1219
1220
1221 //rel
1222 boolean homonymIsLater = false;
1223 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1224 if (upperName.getNomenclaturalReference() != null && homonymName.getNomenclaturalReference() != null){
1225 TimePeriod homonymYear = homonymName.getNomenclaturalReference().getDatePublished();
1226 TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();
1227 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1228 }else{
1229 logger.warn("Classification name has no nomenclatural reference");
1230 }
1231 if (homonymIsLater){
1232 homonymName.addRelationshipToName(upperName, relType, null);
1233 }else{
1234 upperName.addRelationshipToName(homonymName, relType, null);
1235 }
1236
1237 }
1238
1239
1240 /**
1241 * @param state
1242 * @param taxon
1243 * @param leadsKey
1244 * @return
1245 */
1246 private Set<FeatureNode> handleMatchingNodes(EfloraImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
1247 Set<FeatureNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
1248 for (FeatureNode matchingNode : matchingNodes){
1249 state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
1250 matchingNode.setTaxon(taxon);
1251 state.getFeatureNodesToSave().add(matchingNode);
1252 }
1253 return matchingNodes;
1254 }
1255
1256
1257 private void handleNameNote(NonViralName name, String value) {
1258 logger.warn("Name note: " + value + ". Available in portal?");
1259 Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.DEFAULT());
1260 name.addAnnotation(annotation);
1261 }
1262
1263
1264 /**
1265 * @param taxon
1266 * @param name
1267 * @param value
1268 */
1269 private TeamOrPersonBase handleNameUsage(Taxon taxon, NonViralName name, String referenceTitle, TeamOrPersonBase lastTeam) {
1270 ReferenceBase ref = ReferenceFactory.newGeneric();
1271 referenceTitle = removeStartingSymbols(referenceTitle, ref);
1272
1273 ref.setTitleCache(referenceTitle, true);
1274 String microReference = parseReferenceYearAndDetail(ref);
1275 TeamOrPersonBase team = getReferenceAuthor(ref);
1276 parseReferenceType(ref);
1277 if (team == null){
1278 team = lastTeam;
1279 }
1280 ref.setAuthorTeam(team);
1281
1282 TaxonDescription description = getDescription(taxon);
1283 TextData textData = TextData.NewInstance(Feature.CITATION());
1284 textData.addSource(null, null, ref, microReference, name, null);
1285 description.addElement(textData);
1286 return team;
1287 }
1288
1289
1290 /**
1291 * @param referenceTitle
1292 * @param ref
1293 * @return
1294 */
1295 private String removeStartingSymbols(String referenceTitle, ReferenceBase ref) {
1296 if (referenceTitle.startsWith(";") || referenceTitle.startsWith(",") || referenceTitle.startsWith(":")){
1297 referenceTitle = referenceTitle.substring(1).trim();
1298 ref.setTitleCache(referenceTitle);
1299 }
1300 return referenceTitle;
1301 }
1302
1303
1304 private void parseReferenceType(ReferenceBase ref) {
1305 String title = ref.getTitle();
1306 if (title == null){
1307 return;
1308 }
1309 title = title.trim();
1310 //no in reference
1311 if (! title.startsWith("in ")){
1312 ref.setType(ReferenceType.Book);
1313 return;
1314 }
1315
1316 title = title.substring(3);
1317 //in reference
1318 //no ,
1319 if (title.indexOf(",") == -1){
1320 ref.setType(ReferenceType.Article);
1321 IJournal journal = ReferenceFactory.newJournal();
1322 journal.setTitle(title);
1323 ref.setTitle(null);
1324 ref.setInJournal(journal);
1325 //return;
1326 }else{
1327 //,-references
1328 ref.setType(ReferenceType.BookSection);
1329 String[] split = (title).split(",\\s*[A-Z]");
1330 if (split.length <= 1){
1331 logger.warn("Can not fully decide what reference type. Guess it is a book section: " + title );
1332 }
1333 IBook book = ReferenceFactory.newBook();
1334 Team bookTeam = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1335 try {
1336 title = title.substring(split[0].length() + 1).trim();
1337 } catch (Exception e) {
1338 logger.error("ERROR occurred when trying to split title: " + title + "; split[0]: + " + split[0]);
1339 }
1340 book.setTitle(title);
1341 book.setAuthorTeam(bookTeam);
1342 book.setDatePublished(ref.getDatePublished());
1343 ref.setTitle(null);
1344 ref.setInBook(book);
1345 }
1346 }
1347
1348
1349 private Team getReferenceAuthor (ReferenceBase ref) {
1350 boolean isCache = false;
1351 String referenceTitle = ref.getTitle();
1352 if (referenceTitle == null){
1353 isCache = true;
1354 referenceTitle = ref.getTitleCache();
1355 }
1356 //in references
1357 String[] split = (" " + referenceTitle).split(" in ");
1358 if (split.length > 1){
1359 if (StringUtils.isNotBlank(split[0])){
1360 //' in ' is within the reference string, take the preceding string as the team
1361 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1362 if (! isCache){
1363 ref.setTitle("in " + split[1]);
1364 }
1365 return team;
1366 }else{
1367 //string starts with in therefore no author is given
1368 return null;
1369 }
1370 }
1371 //no ,-reference
1372 split = referenceTitle.split(",");
1373 if (split.length < 2){
1374 //no author is given
1375 return null;
1376 }
1377
1378 //,-references
1379 split = (referenceTitle).split(",\\s*[A-Z]");
1380 if (split.length > 1){
1381 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1382 if (! isCache){
1383 ref.setTitle(referenceTitle.substring(split[0].length()+1).trim());
1384 }
1385 return team;
1386 }else{
1387 logger.warn("Can't decide if a usage has an author: " + referenceTitle );
1388 return null;
1389 }
1390 }
1391
1392
1393 /**
1394 * Replaced by <homonym> tag but still in use for exceptions
1395 * @param detail
1396 * @param name
1397 * @return
1398 */
1399 private String parseHomonym(String detail, NonViralName name) {
1400 String result;
1401 if (detail == null){
1402 return detail;
1403 }
1404
1405
1406 //non RE
1407 String reNon = "(\\s|,)non\\s";
1408 Pattern patReference = Pattern.compile(reNon);
1409 Matcher matcher = patReference.matcher(detail);
1410 if (matcher.find()){
1411 int start = matcher.start();
1412 int end = matcher.end();
1413
1414 if (detail != null){
1415 logger.warn("Unhandled non part: " + detail.substring(start));
1416 return detail;
1417 }
1418
1419 result = detail.substring(0, start);
1420
1421 //homonym string
1422 String homonymString = detail.substring(end);
1423
1424 //hommonym name
1425 BotanicalName homonymName = BotanicalName.NewInstance(name.getRank());
1426 homonymName.setGenusOrUninomial(name.getGenusOrUninomial());
1427 homonymName.setInfraGenericEpithet(name.getInfraGenericEpithet());
1428 homonymName.setSpecificEpithet(name.getSpecificEpithet());
1429 homonymName.setInfraSpecificEpithet(name.getInfraSpecificEpithet());
1430 ReferenceBase homonymNomRef = ReferenceFactory.newGeneric();
1431 homonymNomRef.setTitleCache(homonymString);
1432 String homonymNomRefDetail = parseReferenceYearAndDetail(homonymNomRef);
1433 homonymName.setNomenclaturalMicroReference(homonymNomRefDetail);
1434 String authorTitle = homonymNomRef.getTitleCache();
1435 Team team = Team.NewTitledInstance(authorTitle, authorTitle);
1436 homonymNomRef.setAuthorTeam(team);
1437 homonymNomRef.setTitle("");
1438 homonymNomRef.setProtectedTitleCache(false);
1439
1440 //rel
1441 boolean homonymIsLater = false;
1442 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1443 TimePeriod homonymYear = homonymNomRef.getDatePublished();
1444 if (name.getNomenclaturalReference() != null){
1445 TimePeriod nameYear = name.getNomenclaturalReference().getDatePublished();
1446 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1447 }else{
1448 logger.warn("Classification name has no nomenclatural reference");
1449 }
1450 if (homonymIsLater){
1451 homonymName.addRelationshipToName(name, relType, null);
1452 }else{
1453 name.addRelationshipToName(homonymName, relType, null);
1454 }
1455
1456 }else{
1457 return detail;
1458 }
1459 return result;
1460 }
1461
1462
1463 /**
1464 * @param name
1465 * @param value
1466 */
1467 private TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
1468 ReferenceBase nomRef = ReferenceFactory.newGeneric();
1469 nomRef.setTitleCache(value, true);
1470 parseNomStatus(nomRef, name);
1471 String microReference = parseReferenceYearAndDetail(nomRef);
1472 name.setNomenclaturalReference(nomRef);
1473 microReference = parseHomonym(microReference, name);
1474 name.setNomenclaturalMicroReference(microReference);
1475 TeamOrPersonBase team = (TeamOrPersonBase)name.getCombinationAuthorTeam();
1476 if (team == null){
1477 logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
1478 }else{
1479 nomRef.setAuthorTeam(team);
1480 }
1481 return team;
1482 }
1483
1484 private void handleInfrAuthor(EfloraImportState state, Element elAuthor, NonViralName name, boolean overwrite) {
1485 String strAuthor = elAuthor.getValue().trim();
1486 if (strAuthor.endsWith(",")){
1487 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1488 }
1489 Team[] team = getTeam(strAuthor);
1490 if (name.getCombinationAuthorTeam() != null && overwrite == false){
1491 logger.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1492 }else{
1493 name.setCombinationAuthorTeam(team[0]);
1494 name.setExCombinationAuthorTeam(team[1]);
1495 }
1496
1497
1498 }
1499
1500
1501 /**
1502 * Sets the names rank according to the infrank value
1503 * @param name
1504 * @param elements
1505 * @param elInfraRank
1506 * @param infraRank
1507 * @return
1508 */
1509 private Rank handleInfRank(NonViralName name, List<Element> elInfraRank, Rank infraRank) {
1510 if (elInfraRank.size() == 1){
1511 String strRank = elInfraRank.get(0).getTextNormalize();
1512 try {
1513 infraRank = Rank.getRankByNameOrAbbreviation(strRank);
1514 } catch (UnknownCdmTypeException e) {
1515 try{
1516 infraRank = Rank.getRankByNameOrAbbreviation(strRank + ".");
1517 } catch (UnknownCdmTypeException e2) {
1518 logger.warn("Unknown infrank " + strRank + ". Set infraRank to (null).");
1519 }
1520 }
1521 }else if (elInfraRank.size() > 1){
1522 logger.warn ("There is more than 1 infrank");
1523 }
1524 if (infraRank != null){
1525 name.setRank(infraRank);
1526 }
1527 return infraRank;
1528 }
1529
1530
1531 private void handleInfrEpi(NonViralName name, Rank infraRank, String value) {
1532 if (infraRank != null && infraRank.isInfraSpecific()){
1533 name.setInfraSpecificEpithet(value);
1534 if (CdmUtils.isCapital(value)){
1535 logger.warn("Infraspecific epithet starts with a capital letter: " + value);
1536 }
1537 }else if (infraRank != null && infraRank.isInfraGeneric()){
1538 name.setInfraGenericEpithet(value);
1539 if (! CdmUtils.isCapital(value)){
1540 logger.warn("Infrageneric epithet does not start with a capital letter: " + value);
1541 }
1542 }else{
1543 logger.warn("Infrepi could not be handled: " + value);
1544 }
1545 }
1546
1547
1548
1549 /**
1550 * Returns the (empty) with the correct homotypical group depending on the taxon status
1551 * @param taxon
1552 * @param homotypicalGroup
1553 * @param isSynonym
1554 * @return
1555 */
1556 private NonViralName makeName(Taxon taxon,HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1557 NonViralName name;
1558 if (isSynonym){
1559 name = BotanicalName.NewInstance(Rank.SPECIES(), homotypicalGroup);
1560 SynonymRelationshipType synonymType = SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF();
1561 if (taxon.getHomotypicGroup().equals(homotypicalGroup)){
1562 synonymType = SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF();
1563 }
1564 taxon.addSynonymName(name, synonymType);
1565 }else{
1566 name = (NonViralName)taxon.getName();
1567 }
1568 return name;
1569 }
1570
1571
1572 /**
1573 * @param element
1574 * @param taxon
1575 */
1576 private void handleInfraspecificEpithet(Element element, String attrValue, NonViralName name) {
1577 String value = element.getTextNormalize();
1578 if (value.indexOf("subsp.") != -1){
1579 //TODO genus and species epi
1580 String infrEpi = value.substring(value.indexOf("subsp.") + 6).trim();
1581 name.setInfraSpecificEpithet(infrEpi);
1582 name.setRank(Rank.SUBSPECIES());
1583 }else if (value.indexOf("var.") != -1){
1584 //TODO genus and species epi
1585 String infrEpi = value.substring(value.indexOf("var.") + 4).trim();
1586 name.setInfraSpecificEpithet(infrEpi);
1587 name.setRank(Rank.VARIETY());
1588 }else{
1589 logger.warn("Unhandled infraspecific type: " + value);
1590 }
1591 }
1592
1593
1594 /**
1595 * @param state
1596 * @param element
1597 * @param name
1598 */
1599 private void handleBasionymAuthor(EfloraImportState state, Element element, NonViralName name, boolean overwrite) {
1600 String strAuthor = element.getValue().trim();
1601 Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");
1602 if (reBasionymAuthor.matcher(strAuthor).matches()){
1603 strAuthor = strAuthor.substring(1, strAuthor.length()-1);
1604 }else{
1605 logger.warn("Brackets are missing for original combination author " + strAuthor);
1606 }
1607 Team[] basionymTeam = getTeam(strAuthor);
1608 if (name.getBasionymAuthorTeam() != null && overwrite == false){
1609 logger.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1610 }else{
1611 name.setBasionymAuthorTeam(basionymTeam[0]);
1612 name.setExBasionymAuthorTeam(basionymTeam[1]);
1613
1614 }
1615 }
1616
1617 private Map<String, UUID> teamMap = new HashMap<String, UUID>();
1618 /**
1619 * @param elAuthors
1620 * @param name
1621 * @param elNom
1622 */
1623 private void handleNameAuthors(Element elAuthor, NonViralName name) {
1624 if (name.getCombinationAuthorTeam() != null){
1625 logger.warn("Name already has a combination author. Name: " + name.getTitleCache() + ", Author: " + elAuthor.getTextNormalize());
1626 }
1627 String strAuthor = elAuthor.getValue().trim();
1628 if (strAuthor.endsWith(",")){
1629 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1630 }
1631 Team[] team = getTeam(strAuthor);
1632 name.setCombinationAuthorTeam(team[0]);
1633 name.setExCombinationAuthorTeam(team[1]);
1634 }
1635
1636
1637 /**
1638 * @param strAuthor
1639 * @return
1640 */
1641 private Team[] getTeam(String strAuthor) {
1642 Team[] result = new Team[2];
1643 String[] split = strAuthor.split(" ex ");
1644 String strBaseAuthor = null;
1645 String strExAuthor = null;
1646
1647 if (split.length == 2){
1648 strBaseAuthor = split[1];
1649 strExAuthor = split[0];
1650 }else if (split.length == 1){
1651 strBaseAuthor = split[0];
1652 }else{
1653 logger.warn("Could not parse (ex) author: " + strAuthor);
1654 }
1655
1656 result[0] = getUuidTeam(strBaseAuthor);
1657 if (result[0] == null){
1658 result[0] = Team.NewInstance();
1659 result[0].setTitleCache(strBaseAuthor, true);
1660 teamMap.put(strBaseAuthor, result[0].getUuid());
1661 }
1662 if (strExAuthor != null){
1663 result[1] = getUuidTeam(strExAuthor);
1664 if (result[1] == null){
1665 result[1] = Team.NewInstance();
1666 result[1].setTitleCache(strExAuthor, true);
1667 teamMap.put(strExAuthor, result[1].getUuid());
1668 }
1669
1670 }
1671 return result;
1672 }
1673
1674
1675 /**
1676 * @param result
1677 * @param strBaseAuthor
1678 */
1679 private Team getUuidTeam(String strBaseAuthor) {
1680 UUID uuidTeam = teamMap.get(strBaseAuthor);
1681 return CdmBase.deproxy(getAgentService().find(uuidTeam), Team.class);
1682 }
1683
1684
1685 private void handleDescription(EfloraImportState state, Element elDescription, Taxon taxon, Set<String> unhandledChildren) {
1686 verifyNoAttribute(elDescription);
1687
1688 List<Element> elements = elDescription.getChildren();
1689 for (Element element : elements){
1690 if (element.getName().equalsIgnoreCase("char")){
1691 handleChar(state, element, taxon);
1692 }else{
1693 logger.warn("Unhandled description child: " + element.getName());
1694 }
1695 }
1696
1697 }
1698
1699
1700 /**
1701 * @param state
1702 * @param element
1703 * @param taxon
1704 */
1705 private void handleChar(EfloraImportState state, Element element, Taxon taxon) {
1706 List<Attribute> attributes = element.getAttributes();
1707 for (Attribute attribute : attributes){
1708 if (! attribute.getName().equalsIgnoreCase("class")){
1709 logger.warn("Char has unhandled attribute " + attribute.getName());
1710 }else{
1711 String classValue = attribute.getValue();
1712 Feature feature = getFeature(classValue, state);
1713 if (feature == null){
1714 logger.warn("Unhandled feature: " + classValue);
1715 }else{
1716 String value = element.getValue();
1717 addDescriptionElement(taxon, value, feature, null);
1718 }
1719
1720 }
1721 }
1722
1723 List<Element> elements = element.getChildren();
1724 if (! elements.isEmpty()){
1725 logger.warn("Char has unhandled children");
1726 }
1727 }
1728
1729
1730 /**
1731 * @param taxon
1732 * @return
1733 */
1734 private TaxonDescription getDescription(Taxon taxon) {
1735 for (TaxonDescription description : taxon.getDescriptions()){
1736 if (! description.isImageGallery()){
1737 return description;
1738 }
1739 }
1740 TaxonDescription newDescription = TaxonDescription.NewInstance(taxon);
1741 return newDescription;
1742 }
1743
1744
1745 /**
1746 * @param classValue
1747 * @param state
1748 * @return
1749 * @throws UndefinedTransformerMethodException
1750 */
1751 private Feature getFeature(String classValue, EfloraImportState state) {
1752 UUID uuid;
1753 try {
1754 uuid = state.getTransformer().getFeatureUuid(classValue);
1755 if (uuid == null){
1756 logger.info("Uuid is null for " + classValue);
1757 }
1758 String featureText = StringUtils.capitalize(classValue);
1759 Feature feature = getFeature(state, uuid, featureText, featureText, classValue);
1760 if (feature == null){
1761 throw new NullPointerException(classValue + " not recognized as a feature");
1762 }
1763 return feature;
1764 } catch (Exception e) {
1765 logger.warn("Could not create feature for " + classValue + ": " + e.getMessage()) ;
1766 return Feature.UNKNOWN();
1767 }
1768 }
1769
1770
1771 /**
1772 * @param state
1773 * @param element
1774 * @param taxon
1775 * @param unhandledTitleClassess
1776 */
1777 private void handleTitle(EfloraImportState state, Element element, Taxon taxon, Set<String> unhandledTitleClassess) {
1778 // attributes
1779 List<Attribute> attributes = element.getAttributes();
1780 for (Attribute attribute : attributes){
1781 if (! attribute.getName().equalsIgnoreCase("class") ){
1782 if (! attribute.getName().equalsIgnoreCase("num")){
1783 logger.warn("Title has unhandled attribute " + attribute.getName());
1784 }else{
1785 //TODO num attribute in taxon
1786 }
1787 }else{
1788 String classValue = attribute.getValue();
1789 try {
1790 Rank rank;
1791 try {
1792 rank = Rank.getRankByNameOrAbbreviation(classValue);
1793 } catch (Exception e) {
1794 //TODO nc
1795 rank = Rank.getRankByEnglishName(classValue, NomenclaturalCode.ICBN, false);
1796 }
1797 taxon.getName().setRank(rank);
1798 if (rank.equals(Rank.FAMILY()) || rank.equals(Rank.GENUS())){
1799 handleGenus(element.getValue(), taxon.getName());
1800 }else if (rank.equals(Rank.SUBGENUS())){
1801 handleSubGenus(element.getValue(), taxon.getName());
1802 }else if (rank.equals(Rank.SECTION_BOTANY())){
1803 handleSection(element.getValue(), taxon.getName());
1804 }else if (rank.equals(Rank.SPECIES())){
1805 handleSpecies(element.getValue(), taxon.getName());
1806 }else if (rank.equals(Rank.SUBSPECIES())){
1807 handleSubSpecies(element.getValue(), taxon.getName());
1808 }else if (rank.equals(Rank.VARIETY())){
1809 handleVariety(element.getValue(), taxon.getName());
1810 }else{
1811 logger.warn("Unhandled rank: " + rank.getLabel());
1812 }
1813 } catch (UnknownCdmTypeException e) {
1814 logger.warn("Unknown rank " + classValue);
1815 unhandledTitleClassess.add(classValue);
1816 }
1817 }
1818 }
1819 List<Element> elements = element.getChildren();
1820 if (! elements.isEmpty()){
1821 logger.warn("Title has unexpected children");
1822 }
1823 UUID uuidTitle = EfloraTransformer.uuidTitle;
1824 ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "title", "title", "title");
1825 taxon.addExtension(element.getTextNormalize(), titleExtension);
1826
1827 }
1828
1829
1830 /**
1831 * @param value
1832 * @param taxonNameBase
1833 */
1834 private void handleSubGenus(String value, TaxonNameBase taxonNameBase) {
1835 String name = value.replace("Subgenus", "").trim();
1836 ((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1837 }
1838
1839 /**
1840 * @param value
1841 * @param taxonNameBase
1842 */
1843 private void handleSection(String value, TaxonNameBase taxonNameBase) {
1844 String name = value.replace("Section", "").trim();
1845 ((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1846 }
1847
1848 /**
1849 * @param value
1850 * @param taxonNameBase
1851 */
1852 private void handleSpecies(String value, TaxonNameBase taxonNameBase) {
1853 //do nothing
1854 }
1855
1856 /**
1857 * @param value
1858 * @param taxonNameBase
1859 */
1860 private void handleVariety(String value, TaxonNameBase taxonNameBase) {
1861 //do nothing
1862 }
1863
1864 /**
1865 * @param value
1866 * @param taxonNameBase
1867 */
1868 private void handleSubSpecies(String value, TaxonNameBase taxonNameBase) {
1869 //do nothing
1870 }
1871
1872
1873 private Pattern rexGenusAuthor = Pattern.compile("(\\[|\\().*(\\]|\\))");
1874
1875 /**
1876 * @param value
1877 * @param taxonNameBase
1878 */
1879 private void handleGenus(String value, TaxonNameBase taxonName) {
1880 Matcher matcher = rexGenusAuthor.matcher(value);
1881 if (matcher.find()){
1882 String author = matcher.group();
1883 // String genus = value.replace(author, "");
1884 author = author.substring(1, author.length() - 1);
1885 Team team = Team.NewInstance();
1886 team.setTitleCache(author, true);
1887 Credit credit = Credit.NewInstance(team, null);
1888 taxonName.addCredit(credit);
1889 // NonViralName nvn = (NonViralName)taxonName;
1890 // nvn.setCombinationAuthorTeam(team);
1891 // nvn.setGenusOrUninomial(genus);
1892 }else{
1893 logger.info("No Author match for " + value);
1894 }
1895 }
1896
1897
1898 /**
1899 * @param taxon
1900 * @param lastTaxon
1901 */
1902 private void handleTaxonRelation(EfloraImportState state, Taxon taxon, Taxon lastTaxon) {
1903
1904 TaxonomicTree tree = getTree(state);
1905 if (lastTaxon == null){
1906 tree.addChildTaxon(taxon, null, null, null);
1907 return;
1908 }
1909 Rank thisRank = taxon.getName().getRank();
1910 Rank lastRank = lastTaxon.getName().getRank();
1911 if (lastTaxon.getTaxonNodes().size() > 0){
1912 TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
1913 if (thisRank.isLower(lastRank ) ){
1914 lastNode.addChildTaxon(taxon, null, null, null);
1915 fillMissingEpithetsForTaxa(lastTaxon, taxon);
1916 }else if (thisRank.equals(lastRank)){
1917 TaxonNode parent = lastNode.getParent();
1918 if (parent != null){
1919 parent.addChildTaxon(taxon, null, null, null);
1920 fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
1921 }else{
1922 tree.addChildTaxon(taxon, null, null, null);
1923 }
1924 }else if (thisRank.isHigher(lastRank)){
1925 handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
1926 // TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
1927 // parentNode.addChildTaxon(taxon, null, null, null);
1928 }
1929 }else{
1930 logger.warn("Last taxon has no node");
1931 }
1932 }
1933
1934
1935
1936 /**
1937 * @param state
1938 * @return
1939 */
1940 private TaxonomicTree getTree(EfloraImportState state) {
1941 TaxonomicTree result = state.getTree(null);
1942 if (result == null){
1943 UUID uuid = state.getConfig().getTaxonomicTreeUuid();
1944 if (uuid == null){
1945 logger.warn("No classification uuid is defined");
1946 result = getNewClassification(state);
1947 }else{
1948 result = getTaxonTreeService().getTaxonomicTreeByUuid(uuid);
1949 if (result == null){
1950 result = getNewClassification(state);
1951 result.setUuid(uuid);
1952 }
1953 }
1954 state.putTree(null, result);
1955 }
1956 return result;
1957 }
1958
1959
1960 private TaxonomicTree getNewClassification(EfloraImportState state) {
1961 TaxonomicTree result;
1962 result = TaxonomicTree.NewInstance(state.getConfig().getClassificationTitle());
1963 state.putTree(null, result);
1964 return result;
1965 }
1966
1967
1968
1969 /**
1970 * @param taxon
1971 * @param value
1972 * @param feature
1973 * @return
1974 */
1975 private TextData addDescriptionElement(Taxon taxon, String value, Feature feature, String references) {
1976 TextData textData = TextData.NewInstance(feature);
1977 textData.putText(value, Language.ENGLISH());
1978 TaxonDescription description = getDescription(taxon);
1979 description.addElement(textData);
1980 if (references != null){
1981 makeOriginalSourceReferences(textData, ";", references);
1982 }
1983 return textData;
1984 }
1985
1986 /**
1987 * @param elNomenclature
1988 */
1989 private void verifyNoAttribute(Element element) {
1990 List<Attribute> attributes = element.getAttributes();
1991 if (! attributes.isEmpty()){
1992 logger.warn(element.getName() + " has unhandled attributes: " + attributes.get(0).getValue() + "..." );
1993 }
1994 }
1995
1996 /**
1997 * @param elNomenclature
1998 */
1999 private void verifyNoChildren(Element element) {
2000 verifyNoChildren(element, false);
2001 }
2002
2003 /**
2004 * @param elNomenclature
2005 */
2006 private void verifyNoChildren(Element element, boolean ignoreLineBreak) {
2007 List<Element> children = element.getChildren();
2008 if (! children.isEmpty()){
2009 if (ignoreLineBreak == true){
2010 for (Element child : children){
2011 if (! child.getName().equalsIgnoreCase("BR")){
2012 logger.warn(element.getName() + " has unhandled child: " + child.getName());
2013 }
2014 }
2015 }else{
2016 logger.warn(element.getName() + " has unhandled children");
2017 }
2018 }
2019 }
2020
2021
2022
2023 private void parseNomStatus(ReferenceBase ref, NonViralName nonViralName) {
2024 String titleToParse = ref.getTitleCache();
2025
2026
2027 String noStatusTitle = parser.parseNomStatus(titleToParse, nonViralName);
2028 if (! noStatusTitle.equals(titleToParse)){
2029 ref.setTitleCache(noStatusTitle, true);
2030 }
2031 }
2032
2033
2034 /**
2035 * Extracts the date published part and returns micro reference
2036 * @param ref
2037 * @return
2038 */
2039 private String parseReferenceYearAndDetail(ReferenceBase ref){
2040 String detailResult = null;
2041 String titleToParse = ref.getTitleCache();
2042 titleToParse = removeStartingSymbols(titleToParse, ref);
2043 String reReference = "^\\.{1,}";
2044 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2045 String oneMonth = "(Feb.|Dec.|March|June|July)";
2046 String reYear = oneMonth + "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2047 String secondYear = "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2048
2049 String reYearPeriod = "\\(" + reYear + "(\\-" + secondYear + ")?\\)";
2050 String reDetail = "\\.{1,10}$";
2051
2052 //pattern for the whole string
2053 Pattern patReference = Pattern.compile(/*reReference +*/ reYearPeriod /*+ reDetail */);
2054 Matcher matcher = patReference.matcher(titleToParse);
2055 if (matcher.find()){
2056 int start = matcher.start();
2057 int end = matcher.end();
2058
2059 //title and other information precedes the year part
2060 String title = titleToParse.substring(0, start).trim();
2061 //detail follows the year part
2062 String detail = titleToParse.substring(end).trim();
2063
2064 //time period
2065 String strPeriod = matcher.group().trim();
2066 strPeriod = strPeriod.substring(1, strPeriod.length()-1); //remove brackets
2067 Pattern patStartMonth = Pattern.compile("^" + oneMonth);
2068 matcher = patStartMonth.matcher(strPeriod);
2069 strPeriod = strPeriod.replace(" ", "");
2070 Integer startMonth = null;
2071 if (matcher.find()){
2072 end = matcher.end();
2073 strPeriod = strPeriod.substring(0, end) + " " + strPeriod.substring(end);
2074 startMonth = getMonth(strPeriod.substring(0, end));
2075 }
2076
2077 TimePeriod datePublished = TimePeriod.parseString(strPeriod);
2078 if (startMonth != null){
2079 datePublished.setStartMonth(startMonth);
2080 }
2081 ref.setDatePublished(datePublished);
2082 ref.setTitle(title);
2083 detailResult = CdmUtils.removeTrailingDot(detail);
2084 if (detailResult.endsWith(".") || detailResult.endsWith(";") || detailResult.endsWith(",") ){
2085 detailResult = detailResult.substring(0, detailResult.length() -1);
2086 }
2087 ref.setProtectedTitleCache(false);
2088 }else{
2089 logger.warn("Could not parse reference: " + titleToParse);
2090 }
2091 return detailResult;
2092
2093 }
2094
2095
2096
2097 private Integer getMonth(String month) {
2098 if (month.startsWith("Jan")){
2099 return 1;
2100 }else if (month.startsWith("Feb")){
2101 return 2;
2102 }else if (month.startsWith("Mar")){
2103 return 3;
2104 }else if (month.startsWith("Apr")){
2105 return 4;
2106 }else if (month.startsWith("May")){
2107 return 5;
2108 }else if (month.startsWith("Jun")){
2109 return 6;
2110 }else if (month.startsWith("Jul")){
2111 return 7;
2112 }else if (month.startsWith("Aug")){
2113 return 8;
2114 }else if (month.startsWith("Sep")){
2115 return 9;
2116 }else if (month.startsWith("Oct")){
2117 return 10;
2118 }else if (month.startsWith("Nov")){
2119 return 11;
2120 }else if (month.startsWith("Dec")){
2121 return 12;
2122 }else{
2123 logger.warn("Month not yet supported: " + month);
2124 return null;
2125 }
2126 }
2127
2128
2129 /* (non-Javadoc)
2130 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2131 */
2132 protected boolean isIgnore(EfloraImportState state){
2133 return ! state.getConfig().isDoTaxa();
2134 }
2135
2136 }