2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.io
.eflora
;
11 import java
.util
.ArrayList
;
12 import java
.util
.HashMap
;
13 import java
.util
.HashSet
;
14 import java
.util
.List
;
17 import java
.util
.UUID
;
18 import java
.util
.regex
.Matcher
;
19 import java
.util
.regex
.Pattern
;
21 import org
.apache
.commons
.lang
.CharUtils
;
22 import org
.apache
.commons
.lang
.StringUtils
;
23 import org
.apache
.logging
.log4j
.LogManager
;
24 import org
.apache
.logging
.log4j
.Logger
;
25 import org
.jdom
.Attribute
;
26 import org
.jdom
.Element
;
27 import org
.springframework
.stereotype
.Component
;
28 import org
.springframework
.transaction
.TransactionStatus
;
30 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
31 import eu
.etaxonomy
.cdm
.common
.ResultWrapper
;
32 import eu
.etaxonomy
.cdm
.common
.XmlHelp
;
33 import eu
.etaxonomy
.cdm
.io
.common
.ICdmIO
;
34 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
35 import eu
.etaxonomy
.cdm
.io
.eflora
.UnmatchedLeads
.UnmatchedLeadsKey
;
36 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
37 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
38 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
39 import eu
.etaxonomy
.cdm
.model
.common
.AnnotatableEntity
;
40 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
41 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
42 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
43 import eu
.etaxonomy
.cdm
.model
.common
.Credit
;
44 import eu
.etaxonomy
.cdm
.model
.common
.ExtensionType
;
45 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
46 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
47 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
48 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
49 import eu
.etaxonomy
.cdm
.model
.common
.VerbatimTimePeriod
;
50 import eu
.etaxonomy
.cdm
.model
.description
.CommonTaxonName
;
51 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
52 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
53 import eu
.etaxonomy
.cdm
.model
.description
.KeyStatement
;
54 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKey
;
55 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKeyNode
;
56 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
57 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
58 import eu
.etaxonomy
.cdm
.model
.name
.HomotypicalGroup
;
59 import eu
.etaxonomy
.cdm
.model
.name
.IBotanicalName
;
60 import eu
.etaxonomy
.cdm
.model
.name
.INonViralName
;
61 import eu
.etaxonomy
.cdm
.model
.name
.NameRelationshipType
;
62 import eu
.etaxonomy
.cdm
.model
.name
.NameTypeDesignation
;
63 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
64 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
65 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignation
;
66 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignationStatus
;
67 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
68 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameFactory
;
69 import eu
.etaxonomy
.cdm
.model
.name
.TypeDesignationBase
;
70 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
71 import eu
.etaxonomy
.cdm
.model
.reference
.IBook
;
72 import eu
.etaxonomy
.cdm
.model
.reference
.IJournal
;
73 import eu
.etaxonomy
.cdm
.model
.reference
.ISourceable
;
74 import eu
.etaxonomy
.cdm
.model
.reference
.OriginalSourceType
;
75 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
76 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
77 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceType
;
78 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
79 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymType
;
80 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
81 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
82 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
83 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
84 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
85 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
91 public class EfloraTaxonImport
extends EfloraImportBase
implements ICdmIO
<EfloraImportState
> {
93 private static final long serialVersionUID
= -333673708310331342L;
94 private static Logger logger
= LogManager
.getLogger();
96 private static int modCount
= 30000;
97 private final NonViralNameParserImpl parser
= new NonViralNameParserImpl();
99 public EfloraTaxonImport(){
104 public boolean doCheck(EfloraImportState state
){
105 boolean result
= true;
109 //TODO make part of state, but state is renewed when invoking the import a second time
110 private UnmatchedLeads unmatchedLeads
;
113 public void doInvoke(EfloraImportState state
){
114 logger
.info("start make Taxa ...");
117 state
.putTree(null, null);
118 // UnmatchedLeads unmatchedLeads = state.getOpenKeys();
119 if (unmatchedLeads
== null){
120 unmatchedLeads
= UnmatchedLeads
.NewInstance();
122 state
.setUnmatchedLeads(unmatchedLeads
);
124 TransactionStatus tx
= startTransaction();
125 unmatchedLeads
.saveToSession(getPolytomousKeyNodeService());
128 //TODO generally do not store the reference object in the config
129 Reference sourceReference
= state
.getConfig().getSourceReference();
130 getReferenceService().saveOrUpdate(sourceReference
);
132 Set
<TaxonBase
> taxaToSave
= new HashSet
<>();
133 ResultWrapper
<Boolean
> success
= ResultWrapper
.NewInstance(true);
135 Element elbody
= getBodyElement(state
.getConfig());
136 List
<Element
> elTaxonList
= elbody
.getChildren();
140 Set
<String
> unhandledTitleClassess
= new HashSet
<String
>();
141 Set
<String
> unhandledNomeclatureChildren
= new HashSet
<String
>();
142 Set
<String
> unhandledDescriptionChildren
= new HashSet
<String
>();
144 Taxon lastTaxon
= getLastTaxon(state
);
147 for (Element elTaxon
: elTaxonList
){
149 if ((i
++ % modCount
) == 0 && i
> 1){ logger
.info("Taxa handled: " + (i
-1));}
150 if (! elTaxon
.getName().equalsIgnoreCase("taxon")){
151 logger
.warn("body has element other than 'taxon'");
154 IBotanicalName botanicalName
= TaxonNameFactory
.NewBotanicalInstance(Rank
.SPECIES());
155 Taxon taxon
= Taxon
.NewInstance(botanicalName
, state
.getConfig().getSourceReference());
157 handleTaxonAttributes(elTaxon
, taxon
, state
);
160 List
<Element
> children
= elTaxon
.getChildren();
161 handleTaxonElement(state
, unhandledTitleClassess
, unhandledNomeclatureChildren
, unhandledDescriptionChildren
, taxon
, children
);
162 handleTaxonRelation(state
, taxon
, lastTaxon
);
164 taxaToSave
.add(taxon
);
165 state
.getConfig().setLastTaxonUuid(lastTaxon
.getUuid());
167 } catch (Exception e
) {
168 logger
.warn("Exception occurred in Sapindacea taxon import: " + e
);
174 System
.out
.println(state
.getUnmatchedLeads().toString());
175 logger
.warn("There are taxa with attributes 'excluded' and 'dubious'");
177 logger
.info("Children for nomenclature are: " + unhandledNomeclatureChildren
);
178 logger
.info("Children for description are: " + unhandledDescriptionChildren
);
179 logger
.info("Children for homotypes are: " + unhandledHomotypeChildren
);
180 logger
.info("Children for nom are: " + unhandledNomChildren
);
183 //invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
184 logger
.info(i
+ " taxa handled. Saving ...");
185 getTaxonService().saveOrUpdate(taxaToSave
);
186 getTermNodeService().saveOrUpdate(state
.getFeatureNodesToSave());
187 state
.getFeatureNodesToSave().clear();
188 commitTransaction(tx
);
190 logger
.info("end makeTaxa ...");
191 logger
.info("start makeKey ...");
192 // invokeDoKey(state);
193 logger
.info("end makeKey ...");
195 if (! success
.getValue()){
196 state
.setUnsuccessfull();
202 private void handleTaxonAttributes(Element elTaxon
, Taxon taxon
, EfloraImportState state
) {
203 List
<Attribute
> attrList
= elTaxon
.getAttributes();
204 for (Attribute attr
: attrList
){
205 String attrName
= attr
.getName();
206 String attrValue
= attr
.getValue();
207 if ("class".equals(attrName
)){
208 if (attrValue
.equalsIgnoreCase("dubious") || attrValue
.equalsIgnoreCase("DUBIOUS GENUS") || attrValue
.equalsIgnoreCase("DOUBTFUL SPECIES") ){
209 taxon
.setDoubtful(true);
211 MarkerType markerType
= getMarkerType(state
, attrValue
);
212 if (markerType
== null){
213 logger
.warn("Class attribute value for taxon not yet supported: " + attrValue
);
215 taxon
.addMarker(Marker
.NewInstance(markerType
, true));
218 }else if ("num".equals(attrName
)){
219 logger
.warn("num not yet supported");
221 logger
.warn("Attribute " + attrName
+ " not yet supported for element taxon");
228 private Taxon
getLastTaxon(EfloraImportState state
) {
229 if (state
.getConfig().getLastTaxonUuid() == null){
232 return (Taxon
)getTaxonService().find(state
.getConfig().getLastTaxonUuid());
237 // private void invokeDoKey(SapindaceaeImportState state) {
238 // TransactionStatus tx = startTransaction();
240 // Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
241 // ITaxonService taxonService = getTaxonService();
242 // ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
244 // Element elbody= getBodyElement(state.getConfig());
245 // List<Element> elTaxonList = elbody.getChildren();
250 // for (Element elTaxon : elTaxonList){
251 // if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
252 // if (! elTaxon.getName().equalsIgnoreCase("taxon")){
256 // List<Element> children = elTaxon.getChildren("key");
257 // for (Element element : children){
258 // handleKeys(state, element, null);
260 // nodesToSave.add(taxon);
268 private void handleTaxonElement(EfloraImportState state
, Set
<String
> unhandledTitleClassess
, Set
<String
> unhandledNomeclatureChildren
, Set
<String
> unhandledDescriptionChildren
, Taxon taxon
, List
<Element
> children
) {
269 AnnotatableEntity lastEntity
= null;
270 for (Element element
: children
){
271 String elName
= element
.getName();
273 if (elName
.equalsIgnoreCase("title")){
274 handleTitle(state
, element
, taxon
, unhandledTitleClassess
);
276 }else if(elName
.equalsIgnoreCase("nomenclature")){
277 handleNomenclature(state
, element
, taxon
, unhandledNomeclatureChildren
);
279 }else if(elName
.equalsIgnoreCase("description")){
280 handleDescription(state
, element
, taxon
, unhandledDescriptionChildren
);
282 }else if(elName
.equalsIgnoreCase("habitatecology")){
283 lastEntity
= handleEcology(state
, element
, taxon
);
284 }else if(elName
.equalsIgnoreCase("distribution")){
285 lastEntity
= handleDistribution(state
, element
, taxon
);
286 }else if(elName
.equalsIgnoreCase("uses")){
287 lastEntity
= handleUses(state
, element
, taxon
);
288 }else if(elName
.equalsIgnoreCase("notes")){
289 lastEntity
= handleTaxonNotes(state
, element
, taxon
);
290 }else if(elName
.equalsIgnoreCase("chromosomes")){
291 lastEntity
= handleChromosomes(state
, element
, taxon
);
292 }else if(elName
.equalsIgnoreCase("vernacularnames")){
293 handleVernaculars(state
, element
, taxon
);
294 }else if(elName
.equalsIgnoreCase("key")){
295 lastEntity
= handleKeys(state
, element
, taxon
);
296 }else if(elName
.equalsIgnoreCase("references")){
297 handleReferences(state
, element
, taxon
, lastEntity
);
299 }else if(elName
.equalsIgnoreCase("taxon")){
300 logger
.warn("A taxon should not be part of a taxon");
301 }else if(elName
.equalsIgnoreCase("homotypes")){
302 logger
.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
304 logger
.warn("Unexpected child for taxon: " + elName
);
310 private void handleVernaculars(EfloraImportState state
, Element elVernacular
, Taxon taxon
) {
311 verifyNoAttribute(elVernacular
);
312 verifyNoChildren(elVernacular
, false);
313 String value
= elVernacular
.getTextNormalize();
314 Feature feature
= Feature
.COMMON_NAME();
315 value
= replaceStart(value
, "Noms vernaculaires");
316 String
[] dialects
= value
.split(";");
317 for (String singleDialect
: dialects
){
318 handleSingleDialect(taxon
, singleDialect
, feature
, state
);
324 private void handleSingleDialect(Taxon taxon
, String singleDialect
, Feature feature
, EfloraImportState state
) {
325 singleDialect
= singleDialect
.trim();
326 TaxonDescription description
= getDescription(taxon
);
327 String reDialect
= "\\(dial\\.\\s.*\\)";
328 // String reDialect = "\\(.*\\)";
329 Pattern patDialect
= Pattern
.compile(reDialect
);
330 Matcher matcher
= patDialect
.matcher(singleDialect
);
332 String dialect
= singleDialect
.substring(matcher
.start(), matcher
.end());
333 dialect
= dialect
.replace("(dial. ", "").replace(")", "");
335 Language language
= null;
337 language
= this.getLanguage(state
, state
.getTransformer().getLanguageUuid(dialect
), dialect
, dialect
, dialect
);
338 } catch (UndefinedTransformerMethodException e
) {
339 logger
.error(e
.getMessage());
342 String commonNames
= singleDialect
.substring(0, matcher
.start());
343 String
[] splitNames
= commonNames
.split(",");
344 for (String commonNameString
: splitNames
){
345 commonNameString
= commonNameString
.trim();
346 CommonTaxonName commonName
= CommonTaxonName
.NewInstance(commonNameString
, language
);
347 description
.addElement(commonName
);
350 logger
.warn("No dialect match: " + singleDialect
);
355 private void handleReferences(EfloraImportState state
, Element elReferences
, Taxon taxon
, AnnotatableEntity lastEntity
) {
356 verifyNoAttribute(elReferences
);
357 verifyNoChildren(elReferences
, true);
358 String refString
= elReferences
.getTextNormalize();
359 if (lastEntity
== null){
360 logger
.warn("No last entity defined: " + refString
);
364 Annotation annotation
= Annotation
.NewInstance(refString
, AnnotationType
.EDITORIAL(), Language
.DEFAULT());
365 lastEntity
.addAnnotation(annotation
);
369 private PolytomousKey
handleKeys(EfloraImportState state
, Element elKey
, Taxon taxon
) {
370 UnmatchedLeads openKeys
= state
.getUnmatchedLeads();
373 String title
= makeKeyTitle(elKey
);
376 PolytomousKey key
= PolytomousKey
.NewTitledInstance(title
);
378 //TODO add covered taxa etc.
379 verifyNoAttribute(elKey
);
382 makeKeyNotes(elKey
, key
);
385 List
<Element
> keychoices
= new ArrayList
<Element
>();
386 keychoices
.addAll(elKey
.getChildren("keycouplet"));
387 keychoices
.addAll(elKey
.getChildren("keychoice"));
390 for (Element elKeychoice
: keychoices
){
391 handleKeyChoices(state
, openKeys
, key
, elKeychoice
, taxon
);
392 elKey
.removeContent(elKeychoice
);
396 verifyNoChildren(elKey
);
397 logger
.info("Unmatched leads after key handling:" + openKeys
.toString());
400 if (state
.getConfig().isDoPrintKeys()){
401 key
.print(System
.err
);
403 getPolytomousKeyService().save(key
);
416 private void handleKeyChoices(EfloraImportState state
, UnmatchedLeads openKeys
, PolytomousKey key
, Element elKeychoice
, Taxon taxon
) {
419 //TODO it's still unclear if char is a feature and needs to be a new attribute
420 //or if it is handled as question. Therefore both cases are handled but feature
421 //is finally not yet set
422 KeyStatement question
= handleKeychoiceChar(state
, elKeychoice
);
423 Feature feature
= handleKeychoiceCharAsFeature(state
, elKeychoice
);
426 List
<PolytomousKeyNode
> childNodes
= handleKeychoiceLeads(state
, key
, elKeychoice
, taxon
, question
, feature
);
428 //num -> match with unmatched leads
429 handleKeychoiceNum(openKeys
, key
, elKeychoice
, childNodes
);
432 verifyNoAttribute(elKeychoice
);
442 private void handleKeychoiceNum(UnmatchedLeads openKeys
, PolytomousKey key
, Element elKeychoice
, List
<PolytomousKeyNode
> childNodes
) {
443 Attribute numAttr
= elKeychoice
.getAttribute("num");
444 String num
= CdmUtils
.removeTrailingDots(numAttr
== null?
"":numAttr
.getValue());
445 UnmatchedLeadsKey okk
= UnmatchedLeadsKey
.NewInstance(key
, num
);
446 Set
<PolytomousKeyNode
> matchingNodes
= openKeys
.getNodes(okk
);
447 for (PolytomousKeyNode matchingNode
: matchingNodes
){
448 for (PolytomousKeyNode childNode
: childNodes
){
449 matchingNode
.addChild(childNode
);
451 openKeys
.removeNode(okk
, matchingNode
);
453 if (matchingNodes
.isEmpty()){
454 for (PolytomousKeyNode childNode
: childNodes
){
455 key
.getRoot().addChild(childNode
);
459 elKeychoice
.removeAttribute("num");
471 private List
<PolytomousKeyNode
> handleKeychoiceLeads( EfloraImportState state
, PolytomousKey key
, Element elKeychoice
, Taxon taxon
, KeyStatement question
, Feature feature
) {
472 List
<PolytomousKeyNode
> childNodes
= new ArrayList
<PolytomousKeyNode
>();
473 List
<Element
> leads
= elKeychoice
.getChildren("lead");
474 for(Element elLead
: leads
){
475 PolytomousKeyNode childNode
= handleLead(state
, key
, elLead
, taxon
, question
, feature
);
476 childNodes
.add(childNode
);
487 private KeyStatement
handleKeychoiceChar(EfloraImportState state
, Element elKeychoice
) {
488 KeyStatement statement
= null;
489 Attribute charAttr
= elKeychoice
.getAttribute("char");
490 if (charAttr
!= null){
491 String charStr
= charAttr
.getValue();
492 if (StringUtils
.isNotBlank(charStr
)){
493 statement
= KeyStatement
.NewInstance(charStr
);
495 elKeychoice
.removeAttribute("char");
505 private Feature
handleKeychoiceCharAsFeature(EfloraImportState state
, Element elKeychoice
) {
506 Feature feature
= null;
507 Attribute charAttr
= elKeychoice
.getAttribute("char");
508 if (charAttr
!= null){
509 String charStr
= charAttr
.getValue();
510 feature
= getFeature(charStr
, state
);
511 elKeychoice
.removeAttribute("char");
517 private PolytomousKeyNode
handleLead(EfloraImportState state
, PolytomousKey key
, Element elLead
, Taxon taxon
, KeyStatement question
, Feature feature
) {
518 PolytomousKeyNode node
= PolytomousKeyNode
.NewInstance();
519 //TODO the char attribute in the keychoice is more a feature than a question
520 //needs to be discussed on model side
521 node
.setQuestion(question
);
522 // node.setFeature(feature);
525 String text
= handleLeadText(elLead
, node
);
528 handleLeadNum(elLead
, text
);
531 handleLeadGoto(state
, key
, elLead
, taxon
, node
);
534 verifyNoAttribute(elLead
);
545 private String
handleLeadText(Element elLead
, PolytomousKeyNode node
) {
546 String text
= elLead
.getAttributeValue("text").trim();
547 if (StringUtils
.isBlank(text
)){
548 logger
.warn("Empty text in lead");
550 elLead
.removeAttribute("text");
551 KeyStatement statement
= KeyStatement
.NewInstance(text
);
552 node
.setStatement(statement
);
564 private void handleLeadGoto(EfloraImportState state
, PolytomousKey key
, Element elLead
, Taxon taxon
, PolytomousKeyNode node
) {
565 Attribute gotoAttr
= elLead
.getAttribute("goto");
566 if (gotoAttr
!= null){
567 String strGoto
= gotoAttr
.getValue().trim();
569 UnmatchedLeadsKey gotoKey
= null;
570 if (isInternalNode(strGoto
)){
571 gotoKey
= UnmatchedLeadsKey
.NewInstance(key
, strGoto
);
573 String taxonKey
= makeTaxonKey(strGoto
, taxon
);
574 gotoKey
= UnmatchedLeadsKey
.NewInstance(taxonKey
);
577 UnmatchedLeads openKeys
= state
.getUnmatchedLeads();
578 if (gotoKey
.isInnerLead()){
579 Set
<PolytomousKeyNode
> existingNodes
= openKeys
.getNodes(gotoKey
);
580 for (PolytomousKeyNode existingNode
: existingNodes
){
581 node
.addChild(existingNode
);
584 openKeys
.addKey(gotoKey
, node
);
585 //remove attribute (need for consistency check)
586 elLead
.removeAttribute("goto");
588 logger
.warn("lead has no goto attribute");
597 private void handleLeadNum(Element elLead
, String text
) {
598 Attribute numAttr
= elLead
.getAttribute("num");
599 if (numAttr
!= null){
601 String num
= numAttr
.getValue();
602 elLead
.removeAttribute("num");
604 logger
.info("Keychoice has no num attribute: " + text
);
609 private String
makeTaxonKey(String strGoto
, Taxon taxon
) {
611 if (strGoto
== null){
614 String strGenusName
= taxon
.getName().getGenusOrUninomial();
615 strGoto
= strGoto
.replaceAll("\\([^\\(\\)]*\\)", ""); //replace all brackets
616 strGoto
= strGoto
.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
618 strGoto
= strGoto
.trim();
619 String
[] split
= strGoto
.split("\\s");
620 for (int i
= 0; i
<split
.length
; i
++){
621 String single
= split
[i
];
622 if (isGenusAbbrev(single
, strGenusName
)){
623 split
[i
] = strGenusName
;
625 // if (isInfraSpecificMarker(single)){
626 // String strSpeciesName = taxon.getName().getSpecificEpithet();
627 // split[i] = strGenusName + " " + strSpeciesName + " ";
629 result
= (result
+ " " + split
[i
]).trim();
635 private boolean isInfraSpecificMarker(String single
) {
637 if (Rank
.getRankByIdInVoc(single
).isInfraSpecific()){
640 } catch (UnknownCdmTypeException e
) {
647 private boolean isGenusAbbrev(String single
, String strGenusName
) {
648 if (! single
.matches("[A-Z]\\.?")) {
650 }else if (single
.length() == 0 || strGenusName
== null || strGenusName
.length() == 0){
653 return single
.charAt(0) == strGenusName
.charAt(0);
658 private boolean isInternalNode(String strGoto
) {
659 return CdmUtils
.isNumeric(strGoto
);
663 private void makeKeyNotes(Element keyElement
, PolytomousKey key
) {
664 Element elNotes
= keyElement
.getChild("notes");
665 if (elNotes
!= null){
666 keyElement
.removeContent(elNotes
);
667 String notes
= elNotes
.getTextNormalize();
668 if (StringUtils
.isNotBlank(notes
)){
669 key
.addAnnotation(Annotation
.NewInstance(notes
, AnnotationType
.EDITORIAL(), Language
.DEFAULT()));
675 private String
makeKeyTitle(Element keyElement
) {
676 String title
= "- no title - ";
677 Attribute titleAttr
= keyElement
.getAttribute("title");
678 keyElement
.removeAttribute(titleAttr
);
679 if (titleAttr
== null){
680 Element elTitle
= keyElement
.getChild("keytitle");
681 keyElement
.removeContent(elTitle
);
682 if (elTitle
!= null){
683 title
= elTitle
.getTextNormalize();
686 title
= titleAttr
.getValue();
697 private TextData
handleChromosomes(EfloraImportState state
, Element element
, Taxon taxon
) {
698 Feature chromosomeFeature
= getFeature("chromosomes", state
);
699 verifyNoAttribute(element
);
700 verifyNoChildren(element
);
701 String value
= element
.getTextNormalize();
702 value
= replaceStart(value
, "Chromosomes");
703 String chromosomesPart
= getChromosomesPart(value
);
704 String references
= value
.replace(chromosomesPart
, "").trim();
705 chromosomesPart
= chromosomesPart
.replace(":", "").trim();
706 return addDescriptionElement(state
, taxon
, chromosomesPart
, chromosomeFeature
, references
);
715 private void makeOriginalSourceReferences(ISourceable sourcable
, String splitter
, String refAll
) {
716 String
[] splits
= refAll
.split(splitter
);
717 for (String strRef
: splits
){
718 Reference ref
= ReferenceFactory
.newGeneric();
719 ref
.setTitleCache(strRef
, true);
720 String refDetail
= parseReferenceYearAndDetail(ref
);
721 sourcable
.addSource(OriginalSourceType
.PrimaryTaxonomicSource
, null, null, ref
, refDetail
);
725 //TODO use regex instead
726 /* String detailResult = null;
727 String titleToParse = ref.getTitleCache();
728 String reReference = "^\\.{1,}";
729 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
730 String reYear = "\\([1-2]{1}[0-9]{3}\\)";
731 String reYearPeriod = reYear + "(-" + reYear + ")+";
732 String reDetail = "\\.{1,10}$";
741 private String
getChromosomesPart(String str
) {
742 Pattern pattern
= Pattern
.compile("2n\\s*=\\s*\\d{1,2}:");
743 Matcher matcher
= pattern
.matcher(str
);
745 return matcher
.group(0);
747 logger
.warn("Chromosomes could not be parsed: " + str
);
758 private TextData
handleTaxonNotes(EfloraImportState state
, Element element
, Taxon taxon
) {
759 TextData result
= null;
760 verifyNoChildren(element
, true);
761 //verifyNoAttribute(element);
762 List
<Attribute
> attributes
= element
.getAttributes();
763 for (Attribute attribute
: attributes
){
764 if (! attribute
.getName().equalsIgnoreCase("class")){
765 logger
.warn("Char has unhandled attribute " + attribute
.getName());
767 String classValue
= attribute
.getValue();
768 result
= handleDescriptiveElement(state
, element
, taxon
, classValue
);
771 //if no class attribute exists, handle as note
772 if (attributes
.isEmpty()){
773 result
= handleDescriptiveElement(state
, element
, taxon
, "Note");
776 //Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
777 //taxon.addAnnotation(annotation);
778 return result
; //annotation;
790 private TextData
handleDescriptiveElement(EfloraImportState state
, Element element
, Taxon taxon
, String classValue
) {
791 TextData result
= null;
792 Feature feature
= getFeature(classValue
, state
);
793 if (feature
== null){
794 logger
.warn("Unhandled feature: " + classValue
);
796 String value
= element
.getValue();
797 value
= replaceStart(value
, "Notes");
798 value
= replaceStart(value
, "Note");
799 result
= addDescriptionElement(state
, taxon
, value
, feature
, null);
805 private void removeBr(Element element
) {
806 element
.removeChildren("Br");
807 element
.removeChildren("br");
808 element
.removeChildren("BR");
817 private TextData
handleUses(EfloraImportState state
, Element element
, Taxon taxon
) {
818 verifyNoAttribute(element
);
819 verifyNoChildren(element
, true);
820 String value
= element
.getTextNormalize();
821 value
= replaceStart(value
, "Uses");
822 Feature feature
= Feature
.USES();
823 return addDescriptionElement(state
, taxon
, value
, feature
, null);
832 * @param unhandledDescriptionChildren
834 private DescriptionElementBase
handleDistribution(EfloraImportState state
, Element element
, Taxon taxon
) {
835 verifyNoAttribute(element
);
836 verifyNoChildren(element
, true);
837 String value
= element
.getTextNormalize();
838 value
= replaceStart(value
, "Distribution");
839 Feature feature
= Feature
.DISTRIBUTION();
840 //distribution parsing almost impossible as there is lots of freetext in the distribution tag
841 return addDescriptionElement(state
, taxon
, value
, feature
, null);
849 * @param unhandledDescriptionChildren
851 private TextData
handleEcology(EfloraImportState state
, Element elEcology
, Taxon taxon
) {
852 verifyNoAttribute(elEcology
);
853 verifyNoChildren(elEcology
, true);
854 String value
= elEcology
.getTextNormalize();
855 Feature feature
= Feature
.ECOLOGY();
856 if (value
.startsWith("Habitat & Ecology")){
857 feature
= getFeature("Habitat & Ecology", state
);
858 value
= replaceStart(value
, "Habitat & Ecology");
859 }else if (value
.startsWith("Habitat")){
860 value
= replaceStart(value
, "Habitat");
861 feature
= getFeature("Habitat", state
);
863 return addDescriptionElement(state
, taxon
, value
, feature
, null);
870 * @param replacementString
872 private String
replaceStart(String value
, String replacementString
) {
873 if (value
.startsWith(replacementString
) ){
874 value
= value
.substring(replacementString
.length()).trim();
876 while (value
.startsWith("-") || value
.startsWith("–") ){
877 value
= value
.substring("-".length()).trim();
885 * @param replacementString
887 protected String
removeTrailing(String value
, String replacementString
) {
891 if (value
.endsWith(replacementString
) ){
892 value
= value
.substring(0, value
.length() - replacementString
.length()).trim();
901 * @param unhandledNomeclatureChildren
903 private void handleNomenclature(EfloraImportState state
, Element elNomenclature
, Taxon taxon
, Set
<String
> unhandledChildren
) {
904 verifyNoAttribute(elNomenclature
);
906 List
<Element
> elements
= elNomenclature
.getChildren();
907 for (Element element
: elements
){
908 if (element
.getName().equals("homotypes")){
909 handleHomotypes(state
, element
, taxon
);
910 }else if (element
.getName().equals("notes")){
911 handleNomenclatureNotes(state
, element
, taxon
);
913 unhandledChildren
.add(element
.getName());
921 private void handleNomenclatureNotes(EfloraImportState state
, Element elNotes
, Taxon taxon
) {
922 verifyNoAttribute(elNotes
);
923 verifyNoChildren(elNotes
);
924 String notesText
= elNotes
.getTextNormalize();
925 Annotation annotation
= Annotation
.NewInstance(notesText
, AnnotationType
.EDITORIAL(), Language
.DEFAULT());
926 taxon
.addAnnotation(annotation
);
931 private static Set
<String
> unhandledHomotypeChildren
= new HashSet
<String
>();
937 private void handleHomotypes(EfloraImportState state
, Element elHomotypes
, Taxon taxon
) {
938 verifyNoAttribute(elHomotypes
);
940 List
<Element
> elements
= elHomotypes
.getChildren();
941 HomotypicalGroup homotypicalGroup
= null;
942 for (Element element
: elements
){
943 if (element
.getName().equals("nom")){
944 homotypicalGroup
= handleNom(state
, element
, taxon
, homotypicalGroup
);
946 unhandledHomotypeChildren
.add(element
.getName());
952 private static Set
<String
> unhandledNomChildren
= new HashSet
<String
>();
959 private HomotypicalGroup
handleNom(EfloraImportState state
, Element elNom
, Taxon taxon
, HomotypicalGroup homotypicalGroup
) {
960 List
<Attribute
> attributes
= elNom
.getAttributes();
962 boolean taxonBaseClassType
= false;
963 for (Attribute attribute
: attributes
){
964 if (! attribute
.getName().equalsIgnoreCase("class")){
965 logger
.warn("Nom has unhandled attribute " + attribute
.getName());
967 String classValue
= attribute
.getValue();
968 if (classValue
.equalsIgnoreCase("acceptedname")){
969 homotypicalGroup
= handleNomTaxon(state
, elNom
, taxon
,homotypicalGroup
, false);
970 taxonBaseClassType
= true;
971 }else if (classValue
.equalsIgnoreCase("synonym")){
972 homotypicalGroup
= handleNomTaxon(state
, elNom
, taxon
, homotypicalGroup
, true);
973 taxonBaseClassType
= true;
974 }else if (classValue
.equalsIgnoreCase("typeref")){
975 handleTypeRef(state
, elNom
, taxon
, homotypicalGroup
);
977 logger
.warn("Unhandled class value for nom: " + classValue
);
983 List
<Element
> elements
= elNom
.getChildren();
984 for (Element element
: elements
){
985 if (element
.getName().equals("name") || element
.getName().equals("homonym") ){
986 if (taxonBaseClassType
== false){
987 logger
.warn("Name or homonym tag not allowed in non taxon nom tag");
990 unhandledNomChildren
.add(element
.getName());
994 return homotypicalGroup
;
1002 * @param homotypicalGroup
1004 protected void handleTypeRef(EfloraImportState state
, Element elNom
, Taxon taxon
, HomotypicalGroup homotypicalGroup
) {
1005 verifyNoChildren(elNom
);
1006 String typeRef
= elNom
.getTextNormalize();
1007 typeRef
= removeStartingTypeRefMinus(typeRef
);
1009 String
[] split
= typeRef
.split(":");
1010 if (split
.length
< 2){
1011 logger
.warn("typeRef has no ':' : " + typeRef
);
1012 }else if (split
.length
> 2){
1013 logger
.warn("typeRef has more than 1 ':' : " + typeRef
);
1015 StringBuffer typeType
= new StringBuffer(split
[0]);
1016 String typeText
= split
[1].trim();
1017 TypeDesignationBase typeDesignation
= getTypeDesignationAndReference(typeType
);
1019 //Name Type Desitnations
1020 if (typeDesignation
instanceof NameTypeDesignation
){
1021 makeNameTypeDesignations(typeType
, typeText
, typeDesignation
);
1023 //SpecimenTypeDesignations
1024 else if (typeDesignation
instanceof SpecimenTypeDesignation
){
1025 makeSpecimenTypeDesignation(typeType
, typeText
, typeDesignation
);
1027 logger
.error("Unhandled type designation class" + typeDesignation
.getClass().getName());
1029 for (TaxonName name
: homotypicalGroup
.getTypifiedNames()){
1030 name
.addTypeDesignation(typeDesignation
, true);
1040 protected String
removeStartingTypeRefMinus(String typeRef
) {
1041 typeRef
= replaceStart(typeRef
, "-");
1042 typeRef
= replaceStart(typeRef
, "—");
1043 typeRef
= replaceStart(typeRef
, "\u002d");
1044 typeRef
= replaceStart(typeRef
, "\u2013");
1045 typeRef
= replaceStart(typeRef
, "--");
1052 * @param typeDesignation
1054 private void makeNameTypeDesignations(StringBuffer typeType
, String typeText
, TypeDesignationBase typeDesignation
) {
1055 if (typeType
.toString().trim().equalsIgnoreCase("Type")){
1057 }else if (typeType
.toString().trim().equalsIgnoreCase("Lectotype")){
1058 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.LECTOTYPE());
1059 }else if (typeType
.toString().trim().equalsIgnoreCase("Syntype")){
1060 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.SYNTYPE());
1062 logger
.warn("Unhandled type string: " + typeType
+ "(" + CharUtils
.unicodeEscaped(typeType
.charAt(0)) + ")");
1065 typeText
= cleanNameType(typeText
);
1067 TaxonName nameType
= (TaxonName
)parser
.parseFullName(typeText
, NomenclaturalCode
.ICNAFP
, Rank
.SPECIES());
1068 ((NameTypeDesignation
) typeDesignation
).setTypeName(nameType
);
1069 //TODO wie können NameTypes den Namen zugeordnet werden? - wird aber vom Portal via NameCache matching gemacht
1073 private String
cleanNameType(String typeText
) {
1075 String
[] split
= typeText
.split("\\[.*\\].?");
1084 * @param typeDesignation
1086 protected void makeSpecimenTypeDesignation(StringBuffer typeType
, String typeText
, TypeDesignationBase typeDesignation
) {
1087 if (typeType
.toString().trim().equalsIgnoreCase("Type")){
1089 }else if (typeType
.toString().trim().equalsIgnoreCase("Neotype") || typeType
.toString().trim().equalsIgnoreCase("Neotypes")){
1090 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.NEOTYPE());
1091 }else if (typeType
.toString().trim().equalsIgnoreCase("Syntype") || typeType
.toString().trim().equalsIgnoreCase("Syntypes")){
1092 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.SYNTYPE());
1093 }else if (typeType
.toString().trim().equalsIgnoreCase("Lectotype")){
1094 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.LECTOTYPE());
1095 }else if (typeType
.toString().trim().equalsIgnoreCase("Paratype")){
1096 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.PARATYPE());
1098 logger
.warn("Unhandled type string: " + typeType
);
1100 DerivedUnit specimen
= DerivedUnit
.NewPreservedSpecimenInstance();
1101 if (typeText
.length() > 255){
1102 specimen
.setTitleCache(typeText
.substring(0, 252) + "...", true);
1104 specimen
.setTitleCache(typeText
, true);
1106 specimen
.putDefinition(Language
.ENGLISH(), typeText
);
1107 ((SpecimenTypeDesignation
) typeDesignation
).setTypeSpecimen(specimen
);
1110 private TypeDesignationBase
getTypeDesignationAndReference(StringBuffer typeType
) {
1111 TypeDesignationBase result
;
1112 Reference ref
= parseTypeDesignationReference(typeType
);
1113 if (typeType
.indexOf(" species")>-1 || typeType
.indexOf("genus")>-1){
1114 if (typeType
.indexOf(" species")>-1 ){
1115 result
= NameTypeDesignation
.NewInstance();
1116 int start
= typeType
.indexOf(" species");
1117 typeType
.replace(start
, start
+ " species".length(), "");
1119 result
= NameTypeDesignation
.NewInstance();
1120 int start
= typeType
.indexOf(" genus");
1121 typeType
.replace(start
, start
+ " genus".length(), "");
1124 result
= SpecimenTypeDesignation
.NewInstance();
1126 result
.setCitation(ref
);
1131 private Reference
parseTypeDesignationReference(StringBuffer typeType
) {
1132 Reference result
= null;
1133 String reBracketReference
= "\\(.*\\)";
1134 Pattern patBracketReference
= Pattern
.compile(reBracketReference
);
1135 Matcher matcher
= patBracketReference
.matcher(typeType
);
1136 if (matcher
.find()){
1137 String refString
= matcher
.group();
1138 int start
= typeType
.indexOf(refString
);
1139 typeType
.replace(start
, start
+ refString
.length(), "");
1140 refString
= refString
.replace("(", "").replace(")", "").trim();
1141 Reference ref
= ReferenceFactory
.newGeneric();
1142 ref
.setTitleCache(refString
, true);
1155 private HomotypicalGroup
handleNomTaxon(EfloraImportState state
, Element elNom
, Taxon taxon
, HomotypicalGroup homotypicalGroup
, boolean isSynonym
) {
1156 INonViralName nvn
= makeName(taxon
, homotypicalGroup
, isSynonym
);
1157 TaxonName name
= TaxonName
.castAndDeproxy(nvn
);
1160 boolean hasGenusInfo
= false;
1161 TeamOrPersonBase
<?
> lastTeam
= null;
1164 List
<Element
> elGenus
= XmlHelp
.getAttributedChildListWithValue(elNom
, "name", "class", "genus");
1165 if (elGenus
.size() > 0){
1166 hasGenusInfo
= true;
1168 logger
.debug ("No Synonym Genus");
1170 //infra rank -> needed to handle authors correctly
1171 List
<Element
> elInfraRank
= XmlHelp
.getAttributedChildListWithValue(elNom
, "name", "class", "infrank");
1172 Rank infraRank
= null;
1173 infraRank
= handleInfRank(name
, elInfraRank
, infraRank
);
1175 //get left over elements
1176 List
<Element
> elements
= elNom
.getChildren();
1177 elements
.removeAll(elInfraRank
);
1179 for (Element element
: elements
){
1180 if (element
.getName().equals("name")){
1181 String classValue
= element
.getAttributeValue("class");
1182 String value
= element
.getValue().trim();
1183 if (classValue
.equalsIgnoreCase("genus") || classValue
.equalsIgnoreCase("family") ){
1184 name
.setGenusOrUninomial(value
);
1185 }else if (classValue
.equalsIgnoreCase("family") ){
1186 name
.setGenusOrUninomial(value
);
1187 name
.setRank(Rank
.FAMILY());
1188 }else if (classValue
.equalsIgnoreCase("subgenus")){
1189 //name.setInfraGenericEpithet(value);
1190 name
.setNameCache(value
.replace(":", "").trim());
1191 name
.setRank(Rank
.SUBGENUS());
1192 }else if (classValue
.equalsIgnoreCase("epithet") ){
1193 if (hasGenusInfo
== true){
1194 name
.setSpecificEpithet(value
);
1196 handleInfraspecificEpithet(element
, classValue
, name
);
1198 }else if (classValue
.equalsIgnoreCase("author")){
1199 handleNameAuthors(element
, name
);
1200 }else if (classValue
.equalsIgnoreCase("paraut")){
1201 handleBasionymAuthor(state
, element
, name
, false);
1202 }else if (classValue
.equalsIgnoreCase("infrauthor") || classValue
.equalsIgnoreCase("infraut")){
1203 handleInfrAuthor(state
, element
, name
, true);
1204 }else if (classValue
.equalsIgnoreCase("infrapar") || classValue
.equalsIgnoreCase("infrpar") || classValue
.equalsIgnoreCase("parauthor") ){
1205 handleBasionymAuthor(state
, element
, name
, true);
1206 }else if (classValue
.equalsIgnoreCase("infrepi")){
1207 handleInfrEpi(name
, infraRank
, value
);
1208 }else if (classValue
.equalsIgnoreCase("pub")){
1209 lastTeam
= handleNomenclaturalReference(name
, value
);
1210 }else if (classValue
.equalsIgnoreCase("usage")){
1211 lastTeam
= handleNameUsage(taxon
, name
, value
, lastTeam
);
1212 }else if (classValue
.equalsIgnoreCase("note")){
1213 handleNameNote(name
, value
);
1214 }else if (classValue
.equalsIgnoreCase("num")){
1216 logger
.warn("Duplicate num: " + value
);
1220 if (isSynonym
== true){
1221 logger
.warn("Synonym should not have a num");
1223 }else if (classValue
.equalsIgnoreCase("typification")){
1224 logger
.warn("Typification should not be a nom class");
1226 logger
.warn("Unhandled name class: " + classValue
);
1228 }else if(element
.getName().equals("homonym")){
1229 handleHomonym(state
, element
, name
);
1231 // child element is not "name"
1232 unhandledNomChildren
.add(element
.getName());
1238 String taxonString
= name
.getNameCache();
1239 //try to find matching lead nodes
1240 UnmatchedLeadsKey leadsKey
= UnmatchedLeadsKey
.NewInstance(num
, taxonString
);
1241 Set
<PolytomousKeyNode
> matchingNodes
= handleMatchingNodes(state
, taxon
, leadsKey
);
1242 //same without using the num
1244 UnmatchedLeadsKey noNumLeadsKey
= UnmatchedLeadsKey
.NewInstance("", taxonString
);
1245 handleMatchingNodes(state
, taxon
, noNumLeadsKey
);
1247 if (matchingNodes
.isEmpty() && num
!= null){
1248 logger
.warn("Taxon has num but no matching nodes exist: " + num
+ ", Key: " + leadsKey
.toString());
1252 //test nom element has no text
1253 if (StringUtils
.isNotBlank(elNom
.getTextNormalize().replace("—", "").replace("\u002d","").replace("\u2013", ""))){
1254 String strElNom
= elNom
.getTextNormalize();
1255 if ("?".equals(strElNom
)){
1256 handleQuestionMark(name
, taxon
);
1258 // Character c = strElNom.charAt(0);
1259 //System.out.println(CharUtils.unicodeEscaped(c));
1260 logger
.warn("Nom tag has text: " + strElNom
);
1263 return name
.getHomotypicalGroup();
1267 private void handleQuestionMark(INonViralName name
, Taxon taxon
) {
1268 int count
= name
.getTaxonBases().size();
1270 logger
.warn("Name has " + count
+ " taxa. This is not handled for question mark");
1272 TaxonBase taxonBase
= name
.getTaxonBases().iterator().next();
1273 taxonBase
.setDoubtful(true);
1278 //merge with handleNomTaxon
1279 private void handleHomonym(EfloraImportState state
, Element elHomonym
, TaxonName upperName
) {
1280 verifyNoAttribute(elHomonym
);
1283 TaxonName homonymName
= TaxonNameFactory
.NewBotanicalInstance(upperName
.getRank());
1284 homonymName
.setGenusOrUninomial(upperName
.getGenusOrUninomial());
1285 homonymName
.setInfraGenericEpithet(upperName
.getInfraGenericEpithet());
1286 homonymName
.setSpecificEpithet(upperName
.getSpecificEpithet());
1287 homonymName
.setInfraSpecificEpithet(upperName
.getInfraSpecificEpithet());
1289 for (Element elName
: (List
<Element
>)elHomonym
.getChildren("name")){
1290 String classValue
= elName
.getAttributeValue("class");
1291 String value
= elName
.getValue().trim();
1292 if (classValue
.equalsIgnoreCase("genus") ){
1293 homonymName
.setGenusOrUninomial(value
);
1294 }else if (classValue
.equalsIgnoreCase("epithet") ){
1295 homonymName
.setSpecificEpithet(value
);
1296 }else if (classValue
.equalsIgnoreCase("author")){
1297 handleNameAuthors(elName
, homonymName
);
1298 }else if (classValue
.equalsIgnoreCase("paraut")){
1299 handleBasionymAuthor(state
, elName
, homonymName
, true);
1300 }else if (classValue
.equalsIgnoreCase("pub")){
1301 handleNomenclaturalReference(homonymName
, value
);
1302 }else if (classValue
.equalsIgnoreCase("note")){
1303 handleNameNote(homonymName
, value
);
1305 logger
.warn("Unhandled class value: " + classValue
);
1308 //TODO verify other information
1312 boolean homonymIsLater
= false;
1313 NameRelationshipType relType
= NameRelationshipType
.LATER_HOMONYM();
1314 if (upperName
.getNomenclaturalReference() != null && homonymName
.getNomenclaturalReference() != null){
1315 TimePeriod homonymYear
= homonymName
.getNomenclaturalReference().getDatePublished();
1316 TimePeriod nameYear
= upperName
.getNomenclaturalReference().getDatePublished();
1317 homonymIsLater
= homonymYear
.getStart().compareTo(nameYear
.getStart()) > 0;
1319 if (upperName
.getNomenclaturalReference() == null){
1320 logger
.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName
.getTitleCache());
1322 if (homonymName
.getNomenclaturalReference() == null){
1323 logger
.warn("Homonym does not have a nomenclatural reference or year: " + homonymName
.getTitleCache());
1326 if (homonymIsLater
){
1327 homonymName
.addRelationshipToName(upperName
, relType
, null, null);
1329 upperName
.addRelationshipToName(homonymName
, relType
, null, null);
1340 private Set
<PolytomousKeyNode
> handleMatchingNodes(EfloraImportState state
, Taxon taxon
, UnmatchedLeadsKey leadsKey
) {
1341 Set
<PolytomousKeyNode
> matchingNodes
= state
.getUnmatchedLeads().getNodes(leadsKey
);
1342 for (PolytomousKeyNode matchingNode
: matchingNodes
){
1343 state
.getUnmatchedLeads().removeNode(leadsKey
, matchingNode
);
1344 matchingNode
.setTaxon(taxon
);
1345 state
.getPolytomousKeyNodesToSave().add(matchingNode
);
1347 return matchingNodes
;
1351 private void handleNameNote(INonViralName name
, String value
) {
1352 logger
.warn("Name note: " + value
+ ". Available in portal?");
1353 Annotation annotation
= Annotation
.NewInstance(value
, AnnotationType
.EDITORIAL(), Language
.DEFAULT());
1354 name
.addAnnotation(annotation
);
1363 protected TeamOrPersonBase
handleNameUsage(Taxon taxon
, TaxonName name
, String referenceTitle
, TeamOrPersonBase lastTeam
) {
1364 Reference ref
= ReferenceFactory
.newGeneric();
1365 referenceTitle
= removeStartingSymbols(referenceTitle
, ref
);
1367 ref
.setTitleCache(referenceTitle
, true);
1368 String microReference
= parseReferenceYearAndDetail(ref
);
1369 TeamOrPersonBase
<?
> team
= getReferenceAuthor(ref
);
1370 parseReferenceType(ref
);
1374 ref
.setAuthorship(team
);
1376 TaxonDescription description
= getDescription(taxon
);
1377 TextData textData
= TextData
.NewInstance(Feature
.CITATION());
1378 textData
.addSource(OriginalSourceType
.PrimaryTaxonomicSource
, null, null, ref
, microReference
,
1380 description
.addElement(textData
);
1386 * @param referenceTitle
1390 private String
removeStartingSymbols(String referenceTitle
, Reference ref
) {
1391 if (referenceTitle
.startsWith(";") || referenceTitle
.startsWith(",") || referenceTitle
.startsWith(":")){
1392 referenceTitle
= referenceTitle
.substring(1).trim();
1393 ref
.setTitleCache(referenceTitle
);
1395 return referenceTitle
;
1399 private void parseReferenceType(Reference ref
) {
1400 String title
= ref
.getTitle();
1404 title
= title
.trim();
1406 if (! title
.startsWith("in ")){
1407 ref
.setType(ReferenceType
.Book
);
1411 title
= title
.substring(3);
1414 if (title
.indexOf(",") == -1){
1415 ref
.setType(ReferenceType
.Article
);
1416 IJournal journal
= ReferenceFactory
.newJournal();
1417 journal
.setTitle(title
);
1419 ref
.setInJournal(journal
);
1423 ref
.setType(ReferenceType
.BookSection
);
1424 String
[] split
= (title
).split(",\\s*[A-Z]");
1425 if (split
.length
<= 1){
1426 logger
.warn("Can not fully decide what reference type. Guess it is a book section: " + title
);
1428 IBook book
= ReferenceFactory
.newBook();
1429 Team bookTeam
= Team
.NewTitledInstance(split
[0].trim(), split
[0].trim());
1431 title
= title
.substring(split
[0].length() + 1).trim();
1432 } catch (Exception e
) {
1433 logger
.error("ERROR occurred when trying to split title: " + title
+ "; split[0]: + " + split
[0]);
1435 book
.setTitle(title
);
1436 book
.setAuthorship(bookTeam
);
1437 book
.setDatePublished(ref
.getDatePublished());
1439 ref
.setInBook(book
);
1444 protected Team
getReferenceAuthor (Reference ref
) {
1445 boolean isCache
= false;
1446 String referenceTitle
= ref
.getTitle();
1447 if (referenceTitle
== null){
1449 referenceTitle
= ref
.getTitleCache();
1452 String
[] split
= (" " + referenceTitle
).split(" in ");
1453 if (split
.length
> 1){
1454 if (StringUtils
.isNotBlank(split
[0])){
1455 //' in ' is within the reference string, take the preceding string as the team
1456 Team team
= Team
.NewTitledInstance(split
[0].trim(), split
[0].trim());
1458 ref
.setTitle("in " + split
[1]);
1462 //string starts with in therefore no author is given
1467 split
= referenceTitle
.split(",");
1468 if (split
.length
< 2){
1469 //no author is given
1474 split
= (referenceTitle
).split(",\\s*[A-Z]");
1475 if (split
.length
> 1){
1476 Team team
= Team
.NewTitledInstance(split
[0].trim(), split
[0].trim());
1478 ref
.setTitle(referenceTitle
.substring(split
[0].length()+1).trim());
1482 logger
.warn("Can't decide if a usage has an author: " + referenceTitle
);
1489 * Replaced by <homonym> tag but still in use for exceptions
1494 protected String
parseHomonym(String detail
, TaxonName name
) {
1496 if (detail
== null){
1501 String reNon
= "(\\s|,)non\\s";
1502 Pattern patReference
= Pattern
.compile(reNon
);
1503 Matcher matcher
= patReference
.matcher(detail
);
1504 if (matcher
.find()){
1505 int start
= matcher
.start();
1506 int end
= matcher
.end();
1508 if (detail
!= null){
1509 logger
.warn("Unhandled non part: " + detail
.substring(start
));
1513 result
= detail
.substring(0, start
);
1516 String homonymString
= detail
.substring(end
);
1519 TaxonName homonymName
= TaxonNameFactory
.NewBotanicalInstance(name
.getRank());
1520 homonymName
.setGenusOrUninomial(name
.getGenusOrUninomial());
1521 homonymName
.setInfraGenericEpithet(name
.getInfraGenericEpithet());
1522 homonymName
.setSpecificEpithet(name
.getSpecificEpithet());
1523 homonymName
.setInfraSpecificEpithet(name
.getInfraSpecificEpithet());
1524 Reference homonymNomRef
= ReferenceFactory
.newGeneric();
1525 homonymNomRef
.setTitleCache(homonymString
, true);
1526 String homonymNomRefDetail
= parseReferenceYearAndDetail(homonymNomRef
);
1527 homonymName
.setNomenclaturalMicroReference(homonymNomRefDetail
);
1528 String authorTitle
= homonymNomRef
.getTitleCache();
1529 Team team
= Team
.NewTitledInstance(authorTitle
, authorTitle
);
1530 homonymNomRef
.setAuthorship(team
);
1531 homonymNomRef
.setTitle("");
1532 homonymNomRef
.setProtectedTitleCache(false);
1535 boolean homonymIsLater
= false;
1536 NameRelationshipType relType
= NameRelationshipType
.LATER_HOMONYM();
1537 TimePeriod homonymYear
= homonymNomRef
.getDatePublished();
1538 if (name
.getNomenclaturalReference() != null){
1539 TimePeriod nameYear
= name
.getNomenclaturalReference().getDatePublished();
1540 homonymIsLater
= homonymYear
.getStart().compareTo(nameYear
.getStart()) > 0;
1542 logger
.warn("Classification name has no nomenclatural reference");
1544 if (homonymIsLater
){
1545 homonymName
.addRelationshipToName(name
, relType
, null, null);
1547 name
.addRelationshipToName(homonymName
, relType
, null, null);
1558 * @Xpath body/taxon/nomenclature/homotypes/nom/name[@class="pub"]
1562 protected TeamOrPersonBase
handleNomenclaturalReference(TaxonName name
, String value
) {
1563 Reference nomRef
= ReferenceFactory
.newGeneric();
1564 nomRef
.setTitleCache(value
, true);
1565 parseNomStatus(nomRef
, name
);
1566 String microReference
= parseReferenceYearAndDetail(nomRef
);
1567 name
.setNomenclaturalReference(nomRef
);
1568 microReference
= parseHomonym(microReference
, name
);
1569 name
.setNomenclaturalMicroReference(microReference
);
1570 TeamOrPersonBase
<?
> team
= name
.getCombinationAuthorship();
1572 logger
.warn("Name has nom. ref. but no author team. Name: " + name
.getTitleCache() + ", Nom.Ref.: " + value
);
1574 nomRef
.setAuthorship(team
);
1579 private void handleInfrAuthor(EfloraImportState state
, Element elAuthor
, INonViralName name
, boolean overwrite
) {
1580 String strAuthor
= elAuthor
.getValue().trim();
1581 if (strAuthor
.endsWith(",")){
1582 strAuthor
= strAuthor
.substring(0, strAuthor
.length() -1);
1584 TeamOrPersonBase
[] team
= getTeam(strAuthor
);
1585 if (name
.getCombinationAuthorship() != null && overwrite
== false){
1586 logger
.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1588 name
.setCombinationAuthorship(team
[0]);
1589 name
.setExCombinationAuthorship(team
[1]);
1597 * Sets the names rank according to the infrank value
1600 * @param elInfraRank
1604 private Rank
handleInfRank(INonViralName name
, List
<Element
> elInfraRank
, Rank infraRank
) {
1605 if (elInfraRank
.size() == 1){
1606 String strRank
= elInfraRank
.get(0).getTextNormalize();
1608 infraRank
= Rank
.getRankByLatinNameOrIdInVoc(strRank
);
1609 } catch (UnknownCdmTypeException e
) {
1611 infraRank
= Rank
.getRankByLatinNameOrIdInVoc(strRank
+ ".");
1612 } catch (UnknownCdmTypeException e2
) {
1613 logger
.warn("Unknown infrank " + strRank
+ ". Set infraRank to (null).");
1616 }else if (elInfraRank
.size() > 1){
1617 logger
.warn ("There is more than 1 infrank");
1619 if (infraRank
!= null){
1620 name
.setRank(infraRank
);
1626 private void handleInfrEpi(INonViralName name
, Rank infraRank
, String value
) {
1627 if (infraRank
!= null && infraRank
.isInfraSpecific()){
1628 name
.setInfraSpecificEpithet(value
);
1629 if (CdmUtils
.isCapital(value
)){
1630 logger
.warn("Infraspecific epithet starts with a capital letter: " + value
);
1632 }else if (infraRank
!= null && infraRank
.isInfraGeneric()){
1633 name
.setInfraGenericEpithet(value
);
1634 if (! CdmUtils
.isCapital(value
)){
1635 logger
.warn("Infrageneric epithet does not start with a capital letter: " + value
);
1638 logger
.warn("Infrepi could not be handled: " + value
);
1645 * Returns the (empty) with the correct homotypical group depending on the taxon status
1647 * @param homotypicalGroup
1651 private TaxonName
makeName(Taxon taxon
,HomotypicalGroup homotypicalGroup
, boolean isSynonym
) {
1655 name
= TaxonNameFactory
.NewBotanicalInstance(Rank
.SPECIES(), homotypicalGroup
);
1656 SynonymType synonymType
= SynonymType
.HETEROTYPIC_SYNONYM_OF
;
1657 if (taxon
.getHomotypicGroup().equals(homotypicalGroup
)){
1658 synonymType
= SynonymType
.HOMOTYPIC_SYNONYM_OF
;
1660 taxon
.addSynonymName(name
, synonymType
);
1662 name
= taxon
.getName();
1667 private void handleInfraspecificEpithet(Element element
, String attrValue
, INonViralName name
) {
1668 String value
= element
.getTextNormalize();
1669 if (value
.indexOf("subsp.") != -1){
1670 //TODO genus and species epi
1671 String infrEpi
= value
.substring(value
.indexOf("subsp.") + 6).trim();
1672 name
.setInfraSpecificEpithet(infrEpi
);
1673 name
.setRank(Rank
.SUBSPECIES());
1674 }else if (value
.indexOf("var.") != -1){
1675 //TODO genus and species epi
1676 String infrEpi
= value
.substring(value
.indexOf("var.") + 4).trim();
1677 name
.setInfraSpecificEpithet(infrEpi
);
1678 name
.setRank(Rank
.VARIETY());
1680 logger
.warn("Unhandled infraspecific type: " + value
);
1689 private void handleBasionymAuthor(EfloraImportState state
, Element elBasionymAuthor
, INonViralName name
, boolean overwrite
) {
1690 String strAuthor
= elBasionymAuthor
.getValue().trim();
1691 Pattern reBasionymAuthor
= Pattern
.compile("^\\(.*\\)$");
1692 if (reBasionymAuthor
.matcher(strAuthor
).matches()){
1693 strAuthor
= strAuthor
.substring(1, strAuthor
.length()-1);
1695 logger
.warn("Brackets are missing for original combination author " + strAuthor
);
1697 TeamOrPersonBase
[] basionymTeam
= getTeam(strAuthor
);
1698 if (name
.getBasionymAuthorship() != null && overwrite
== false){
1699 logger
.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1701 name
.setBasionymAuthorship(basionymTeam
[0]);
1702 name
.setExBasionymAuthorship(basionymTeam
[1]);
1707 private final Map
<String
, UUID
> teamMap
= new HashMap
<String
, UUID
>();
1713 private void handleNameAuthors(Element elAuthor
, INonViralName name
) {
1714 if (name
.getCombinationAuthorship() != null){
1715 logger
.warn("Name already has a combination author. Name: " + name
.getTitleCache() + ", Author: " + elAuthor
.getTextNormalize());
1717 String strAuthor
= elAuthor
.getValue().trim();
1718 if (strAuthor
.endsWith(",")){
1719 strAuthor
= strAuthor
.substring(0, strAuthor
.length() -1);
1721 if (strAuthor
.indexOf("(") > -1 || strAuthor
.indexOf(")") > -1){
1722 logger
.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor
);
1724 TeamOrPersonBase
[] team
= getTeam(strAuthor
);
1725 name
.setCombinationAuthorship(team
[0]);
1726 name
.setExCombinationAuthorship(team
[1]);
1734 private TeamOrPersonBase
[] getTeam(String strAuthor
) {
1735 TeamOrPersonBase
[] result
= new TeamOrPersonBase
[2];
1736 String
[] split
= strAuthor
.split(" ex ");
1737 String strBaseAuthor
= null;
1738 String strExAuthor
= null;
1740 if (split
.length
== 2){
1741 strBaseAuthor
= split
[1];
1742 strExAuthor
= split
[0];
1743 }else if (split
.length
== 1){
1744 strBaseAuthor
= split
[0];
1746 logger
.warn("Could not parse (ex) author: " + strAuthor
);
1748 result
[0] = getUuidTeam(strBaseAuthor
);
1749 if (result
[0] == null){
1750 result
[0] = parseSingleTeam(strBaseAuthor
);
1751 teamMap
.put(strBaseAuthor
, result
[0].getUuid());
1753 if (strExAuthor
!= null){
1754 result
[1] = getUuidTeam(strExAuthor
);
1755 if (result
[1] == null){
1756 result
[1] = Team
.NewInstance();
1757 result
[1].setTitleCache(strExAuthor
, true);
1758 teamMap
.put(strExAuthor
, result
[1].getUuid());
1766 protected TeamOrPersonBase
parseSingleTeam(String strBaseAuthor
) {
1767 TeamOrPersonBase result
;
1768 String
[] split
= strBaseAuthor
.split("&");
1769 if (split
.length
> 1){
1770 result
= Team
.NewInstance();
1771 for (String personString
: split
){
1772 Person person
= makePerson(personString
);
1773 ((Team
)result
).addTeamMember(person
);
1776 result
= makePerson(strBaseAuthor
.trim());
1783 * @param personString
1786 private Person
makePerson(String personString
) {
1787 personString
= personString
.trim();
1788 Person person
= Person
.NewTitledInstance(personString
);
1789 person
.setNomenclaturalTitle(personString
);
1796 * @param strBaseAuthor
1798 private TeamOrPersonBase
getUuidTeam(String strBaseAuthor
) {
1799 UUID uuidTeam
= teamMap
.get(strBaseAuthor
);
1800 return CdmBase
.deproxy(getAgentService().find(uuidTeam
), TeamOrPersonBase
.class);
1804 private void handleDescription(EfloraImportState state
, Element elDescription
, Taxon taxon
, Set
<String
> unhandledChildren
) {
1805 verifyNoAttribute(elDescription
);
1807 List
<Element
> elements
= elDescription
.getChildren();
1808 for (Element element
: elements
){
1809 if (element
.getName().equalsIgnoreCase("char")){
1810 handleChar(state
, element
, taxon
);
1812 logger
.warn("Unhandled description child: " + element
.getName());
1824 private void handleChar(EfloraImportState state
, Element element
, Taxon taxon
) {
1825 List
<Attribute
> attributes
= element
.getAttributes();
1826 for (Attribute attribute
: attributes
){
1827 if (! attribute
.getName().equalsIgnoreCase("class")){
1828 logger
.warn("Char has unhandled attribute " + attribute
.getName());
1830 String classValue
= attribute
.getValue();
1831 Feature feature
= getFeature(classValue
, state
);
1832 if (feature
== null){
1833 logger
.warn("Unhandled feature: " + classValue
);
1835 String value
= element
.getValue();
1836 addDescriptionElement(state
, taxon
, value
, feature
, null);
1842 List
<Element
> elements
= element
.getChildren();
1843 if (! elements
.isEmpty()){
1844 logger
.warn("Char has unhandled children");
1853 protected TaxonDescription
getDescription(Taxon taxon
) {
1854 for (TaxonDescription description
: taxon
.getDescriptions()){
1855 if (! description
.isImageGallery()){
1859 TaxonDescription newDescription
= TaxonDescription
.NewInstance(taxon
);
1860 return newDescription
;
1868 * @throws UndefinedTransformerMethodException
1870 private Feature
getFeature(String classValue
, EfloraImportState state
) {
1873 uuid
= state
.getTransformer().getFeatureUuid(classValue
);
1875 logger
.info("Uuid is null for " + classValue
);
1877 String featureText
= StringUtils
.capitalize(classValue
);
1878 //TODO eFlora feature vocabulary
1879 Feature feature
= getFeature(state
, uuid
, featureText
, featureText
, classValue
, null);
1880 if (feature
== null){
1881 throw new NullPointerException(classValue
+ " not recognized as a feature");
1884 } catch (Exception e
) {
1885 logger
.warn("Could not create feature for " + classValue
+ ": " + e
.getMessage()) ;
1886 return Feature
.UNKNOWN();
1895 * @param unhandledTitleClassess
1897 private void handleTitle(EfloraImportState state
, Element element
, Taxon taxon
, Set
<String
> unhandledTitleClassess
) {
1899 List
<Attribute
> attributes
= element
.getAttributes();
1900 for (Attribute attribute
: attributes
){
1901 if (! attribute
.getName().equalsIgnoreCase("class") ){
1902 if (! attribute
.getName().equalsIgnoreCase("num")){
1903 logger
.warn("Title has unhandled attribute " + attribute
.getName());
1905 //TODO num attribute in taxon
1908 String classValue
= attribute
.getValue();
1912 rank
= Rank
.getRankByLatinNameOrIdInVoc(classValue
);
1913 } catch (Exception e
) {
1915 rank
= Rank
.getRankByEnglishName(classValue
, NomenclaturalCode
.ICNAFP
, false);
1917 taxon
.getName().setRank(rank
);
1918 if (rank
.equals(Rank
.FAMILY()) || rank
.equals(Rank
.GENUS())){
1919 handleGenus(element
.getValue(), taxon
.getName());
1920 }else if (rank
.equals(Rank
.SUBGENUS())){
1921 handleSubGenus(element
.getValue(), taxon
.getName());
1922 }else if (rank
.equals(Rank
.SECTION_BOTANY())){
1923 handleSection(element
.getValue(), taxon
.getName());
1924 }else if (rank
.equals(Rank
.SPECIES())){
1925 handleSpecies(element
.getValue(), taxon
.getName());
1926 }else if (rank
.equals(Rank
.SUBSPECIES())){
1927 handleSubSpecies(element
.getValue(), taxon
.getName());
1928 }else if (rank
.equals(Rank
.VARIETY())){
1929 handleVariety(element
.getValue(), taxon
.getName());
1931 logger
.warn("Unhandled rank: " + rank
.getLabel());
1933 } catch (UnknownCdmTypeException e
) {
1934 logger
.warn("Unknown rank " + classValue
);
1935 unhandledTitleClassess
.add(classValue
);
1939 List
<Element
> elements
= element
.getChildren();
1940 if (! elements
.isEmpty()){
1941 logger
.warn("Title has unexpected children");
1943 UUID uuidTitle
= EfloraTransformer
.uuidTitle
;
1944 ExtensionType titleExtension
= this.getExtensionType(state
, uuidTitle
, "title", "title", "title");
1945 taxon
.addExtension(element
.getTextNormalize(), titleExtension
);
1952 * @param taxonNameBase
1954 private void handleSubGenus(String value
, INonViralName taxonNameBase
) {
1955 String name
= value
.replace("Subgenus", "").trim();
1956 taxonNameBase
.setInfraGenericEpithet(name
);
1961 * @param taxonNameBase
1963 private void handleSection(String value
, INonViralName taxonNameBase
) {
1964 String name
= value
.replace("Section", "").trim();
1965 taxonNameBase
.setInfraGenericEpithet(name
);
1970 * @param taxonNameBase
1972 private void handleSpecies(String value
, TaxonName taxonNameBase
) {
1978 * @param taxonNameBase
1980 private void handleVariety(String value
, TaxonName taxonNameBase
) {
1986 * @param taxonNameBase
1988 private void handleSubSpecies(String value
, TaxonName taxonNameBase
) {
1993 private final Pattern rexGenusAuthor
= Pattern
.compile("(\\[|\\().*(\\]|\\))");
1995 protected void handleGenus(String value
, INonViralName taxonName
) {
1996 Matcher matcher
= rexGenusAuthor
.matcher(value
);
1997 if (matcher
.find()){
1998 String author
= matcher
.group();
1999 // String genus = value.replace(author, "");
2000 author
= author
.substring(1, author
.length() - 1);
2001 Team team
= Team
.NewInstance();
2002 team
.setTitleCache(author
, true);
2003 Credit credit
= Credit
.NewInstance(team
, null, null);
2004 taxonName
.addCredit(credit
);
2005 // taxonName.setCombinationAuthorship(team);
2006 // taxonName.setGenusOrUninomial(genus);
2008 logger
.info("No Author match for " + value
);
2012 private void handleTaxonRelation(EfloraImportState state
, Taxon taxon
, Taxon lastTaxon
) {
2014 Classification tree
= getTree(state
);
2015 if (lastTaxon
== null){
2016 tree
.addChildTaxon(taxon
, null, null);
2019 Rank thisRank
= taxon
.getName().getRank();
2020 Rank lastRank
= lastTaxon
.getName().getRank();
2021 if (lastTaxon
.getTaxonNodes().size() > 0){
2022 TaxonNode lastNode
= lastTaxon
.getTaxonNodes().iterator().next();
2023 if (thisRank
.isLower(lastRank
) ){
2024 lastNode
.addChildTaxon(taxon
, null, null);
2025 fillMissingEpithetsForTaxa(lastTaxon
, taxon
);
2026 }else if (thisRank
.equals(lastRank
)){
2027 TaxonNode parent
= lastNode
.getParent();
2028 if (parent
!= null){
2029 parent
.addChildTaxon(taxon
, null, null);
2030 fillMissingEpithetsForTaxa(parent
.getTaxon(), taxon
);
2032 tree
.addChildTaxon(taxon
, null, null);
2034 }else if (thisRank
.isHigher(lastRank
)){
2035 handleTaxonRelation(state
, taxon
, lastNode
.getParent().getTaxon());
2036 // TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2037 // parentNode.addChildTaxon(taxon, null, null, null);
2040 logger
.warn("Last taxon has no node");
2050 private Classification
getTree(EfloraImportState state
) {
2051 Classification result
= state
.getTree(null);
2052 if (result
== null){
2053 UUID uuid
= state
.getConfig().getClassificationUuid();
2055 logger
.warn("No classification uuid is defined");
2056 result
= getNewClassification(state
);
2058 result
= getClassificationService().find(uuid
);
2059 if (result
== null){
2060 result
= getNewClassification(state
);
2061 result
.setUuid(uuid
);
2064 state
.putTree(null, result
);
2070 private Classification
getNewClassification(EfloraImportState state
) {
2071 Classification result
;
2072 result
= Classification
.NewInstance(state
.getConfig().getClassificationTitle());
2073 state
.putTree(null, result
);
2085 private TextData
addDescriptionElement(EfloraImportState state
, Taxon taxon
, String value
, Feature feature
, String references
) {
2086 TextData textData
= TextData
.NewInstance(feature
);
2087 Language textLanguage
= getDefaultLanguage(state
);
2088 textData
.putText(textLanguage
, value
);
2089 TaxonDescription description
= getDescription(taxon
);
2090 description
.addElement(textData
);
2091 if (references
!= null){
2092 makeOriginalSourceReferences(textData
, ";", references
);
2097 private Language
getDefaultLanguage(EfloraImportState state
) {
2098 UUID defaultLanguageUuid
= state
.getConfig().getDefaultLanguageUuid();
2099 if (defaultLanguageUuid
!= null){
2100 Language result
= state
.getDefaultLanguage();
2101 if (result
== null || ! result
.getUuid().equals(defaultLanguageUuid
)){
2102 result
= (Language
)getTermService().find(defaultLanguageUuid
);
2103 state
.setDefaultLanguage(result
);
2104 if (result
== null){
2105 logger
.warn("Default language for " + defaultLanguageUuid
+ " does not exist.");
2110 return Language
.DEFAULT();
2116 * @param elNomenclature
2118 private void verifyNoAttribute(Element element
) {
2119 List
<Attribute
> attributes
= element
.getAttributes();
2120 if (! attributes
.isEmpty()){
2121 logger
.warn(element
.getName() + " has unhandled attributes: " + attributes
.get(0).getValue() + "..." );
2126 * @param elNomenclature
2128 protected void verifyNoChildren(Element element
) {
2129 verifyNoChildren(element
, false);
2133 * @param elNomenclature
2135 private void verifyNoChildren(Element element
, boolean ignoreLineBreak
) {
2136 List
<Element
> children
= element
.getChildren();
2137 if (! children
.isEmpty()){
2138 if (ignoreLineBreak
== true){
2139 for (Element child
: children
){
2140 if (! child
.getName().equalsIgnoreCase("BR")){
2141 logger
.warn(element
.getName() + " has unhandled child: " + child
.getName());
2145 logger
.warn(element
.getName() + " has unhandled children");
2153 * Parses the nomenclatural status from the references titleCache. If a nomenclatural status
2154 * exists it is added to the name and the nom. status part of the references title cache is
2155 * removed. Requires protected title cache.
2157 * @param nonViralName
2159 protected void parseNomStatus(Reference ref
, INonViralName nonViralName
) {
2160 String titleToParse
= ref
.getTitleCache();
2162 String noStatusTitle
= parser
.parseNomStatus(titleToParse
, nonViralName
, true);
2163 if (! noStatusTitle
.equals(titleToParse
)){
2164 ref
.setTitleCache(noStatusTitle
, true);
2170 * Extracts the date published part and returns micro reference
2174 private String
parseReferenceYearAndDetail(Reference ref
){
2175 String detailResult
= null;
2176 String titleToParse
= ref
.getTitleCache();
2177 titleToParse
= removeStartingSymbols(titleToParse
, ref
);
2178 String reReference
= "^\\.{1,}";
2179 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2180 String oneMonth
= "(Feb.|Dec.|March|June|July)";
2181 String reYear
= oneMonth
+ "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2182 String secondYear
= "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2184 String reYearPeriod
= "\\(" + reYear
+ "(\\-" + secondYear
+ ")?\\)";
2185 String reDetail
= "\\.{1,10}$";
2187 //pattern for the whole string
2188 Pattern patReference
= Pattern
.compile(/*reReference +*/ reYearPeriod
/*+ reDetail */);
2189 Matcher matcher
= patReference
.matcher(titleToParse
);
2190 if (matcher
.find()){
2191 int start
= matcher
.start();
2192 int end
= matcher
.end();
2194 //title and other information precedes the year part
2195 String title
= titleToParse
.substring(0, start
).trim();
2196 //detail follows the year part
2197 String detail
= titleToParse
.substring(end
).trim();
2200 String strPeriod
= matcher
.group().trim();
2201 strPeriod
= strPeriod
.substring(1, strPeriod
.length()-1); //remove brackets
2202 Pattern patStartMonth
= Pattern
.compile("^" + oneMonth
);
2203 matcher
= patStartMonth
.matcher(strPeriod
);
2204 strPeriod
= strPeriod
.replace(" ", "");
2205 Integer startMonth
= null;
2206 if (matcher
.find()){
2207 end
= matcher
.end();
2208 strPeriod
= strPeriod
.substring(0, end
) + " " + strPeriod
.substring(end
);
2209 startMonth
= getMonth(strPeriod
.substring(0, end
));
2212 VerbatimTimePeriod datePublished
= TimePeriodParser
.parseStringVerbatim(strPeriod
);
2213 if (startMonth
!= null){
2214 datePublished
.setStartMonth(startMonth
);
2216 ref
.setDatePublished(datePublished
);
2217 ref
.setTitle(title
);
2218 detailResult
= CdmUtils
.removeTrailingDots(detail
);
2219 if (detailResult
.endsWith(".") || detailResult
.endsWith(";") || detailResult
.endsWith(",") ){
2220 detailResult
= detailResult
.substring(0, detailResult
.length() -1);
2222 ref
.setProtectedTitleCache(false);
2224 logger
.warn("Could not parse reference: " + titleToParse
);
2226 return detailResult
;
2232 private Integer
getMonth(String month
) {
2233 if (month
.startsWith("Jan")){
2235 }else if (month
.startsWith("Feb")){
2237 }else if (month
.startsWith("Mar")){
2239 }else if (month
.startsWith("Apr")){
2241 }else if (month
.startsWith("May")){
2243 }else if (month
.startsWith("Jun")){
2245 }else if (month
.startsWith("Jul")){
2247 }else if (month
.startsWith("Aug")){
2249 }else if (month
.startsWith("Sep")){
2251 }else if (month
.startsWith("Oct")){
2253 }else if (month
.startsWith("Nov")){
2255 }else if (month
.startsWith("Dec")){
2258 logger
.warn("Month not yet supported: " + month
);
2265 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2268 protected boolean isIgnore(EfloraImportState state
){
2269 return ! state
.getConfig().isDoTaxa();