2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.eflora
;
12 import java
.util
.ArrayList
;
13 import java
.util
.HashMap
;
14 import java
.util
.HashSet
;
15 import java
.util
.List
;
18 import java
.util
.UUID
;
19 import java
.util
.regex
.Matcher
;
20 import java
.util
.regex
.Pattern
;
22 import org
.apache
.commons
.lang
.CharUtils
;
23 import org
.apache
.commons
.lang
.StringUtils
;
24 import org
.apache
.log4j
.Logger
;
25 import org
.jdom
.Attribute
;
26 import org
.jdom
.Element
;
27 import org
.springframework
.stereotype
.Component
;
28 import org
.springframework
.transaction
.TransactionStatus
;
30 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
31 import eu
.etaxonomy
.cdm
.common
.ResultWrapper
;
32 import eu
.etaxonomy
.cdm
.common
.XmlHelp
;
33 import eu
.etaxonomy
.cdm
.io
.common
.ICdmIO
;
34 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
35 import eu
.etaxonomy
.cdm
.io
.eflora
.UnmatchedLeads
.UnmatchedLeadsKey
;
36 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
37 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
38 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
39 import eu
.etaxonomy
.cdm
.model
.common
.AnnotatableEntity
;
40 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
41 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
42 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
43 import eu
.etaxonomy
.cdm
.model
.common
.Credit
;
44 import eu
.etaxonomy
.cdm
.model
.common
.ExtensionType
;
45 import eu
.etaxonomy
.cdm
.model
.common
.ISourceable
;
46 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
47 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
48 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
49 import eu
.etaxonomy
.cdm
.model
.common
.Representation
;
50 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
51 import eu
.etaxonomy
.cdm
.model
.description
.CommonTaxonName
;
52 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
53 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
54 import eu
.etaxonomy
.cdm
.model
.description
.FeatureNode
;
55 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKey
;
56 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
57 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
58 import eu
.etaxonomy
.cdm
.model
.name
.BotanicalName
;
59 import eu
.etaxonomy
.cdm
.model
.name
.HomotypicalGroup
;
60 import eu
.etaxonomy
.cdm
.model
.name
.NameRelationshipType
;
61 import eu
.etaxonomy
.cdm
.model
.name
.NameTypeDesignation
;
62 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
63 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
64 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
65 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignation
;
66 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignationStatus
;
67 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
68 import eu
.etaxonomy
.cdm
.model
.name
.TypeDesignationBase
;
69 import eu
.etaxonomy
.cdm
.model
.occurrence
.Specimen
;
70 import eu
.etaxonomy
.cdm
.model
.reference
.IBook
;
71 import eu
.etaxonomy
.cdm
.model
.reference
.IJournal
;
72 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceBase
;
73 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
74 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceType
;
75 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
76 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
77 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
78 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
79 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonomicTree
;
80 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
81 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
89 public class EfloraTaxonImport
extends EfloraImportBase
implements ICdmIO
<EfloraImportState
> {
90 private static final Logger logger
= Logger
.getLogger(EfloraTaxonImport
.class);
92 private static int modCount
= 30000;
93 private NonViralNameParserImpl parser
= new NonViralNameParserImpl();
95 public EfloraTaxonImport(){
101 public boolean doCheck(EfloraImportState state
){
102 boolean result
= true;
106 //TODO make part of state, but state is renewed when invoking the import a second time
107 private UnmatchedLeads unmatchedLeads
;
110 public boolean doInvoke(EfloraImportState state
){
111 logger
.info("start make Taxa ...");
114 state
.putTree(null, null);
115 // UnmatchedLeads unmatchedLeads = state.getOpenKeys();
116 if (unmatchedLeads
== null){
117 unmatchedLeads
= UnmatchedLeads
.NewInstance();
119 state
.setUnmatchedLeads(unmatchedLeads
);
121 TransactionStatus tx
= startTransaction();
122 unmatchedLeads
.saveToSession(getFeatureTreeService());
125 //TODO generally do not store the reference object in the config
126 ReferenceBase sourceReference
= state
.getConfig().getSourceReference();
127 getReferenceService().saveOrUpdate(sourceReference
);
129 Set
<TaxonBase
> taxaToSave
= new HashSet
<TaxonBase
>();
130 ResultWrapper
<Boolean
> success
= ResultWrapper
.NewInstance(true);
132 Element elbody
= getBodyElement(state
.getConfig());
133 List
<Element
> elTaxonList
= elbody
.getChildren();
137 Set
<String
> unhandledTitleClassess
= new HashSet
<String
>();
138 Set
<String
> unhandledNomeclatureChildren
= new HashSet
<String
>();
139 Set
<String
> unhandledDescriptionChildren
= new HashSet
<String
>();
141 Taxon lastTaxon
= getLastTaxon(state
);
144 for (Element elTaxon
: elTaxonList
){
146 if ((i
++ % modCount
) == 0 && i
> 1){ logger
.info("Taxa handled: " + (i
-1));}
147 if (! elTaxon
.getName().equalsIgnoreCase("taxon")){
148 logger
.warn("body has element other than 'taxon'");
151 BotanicalName botanicalName
= BotanicalName
.NewInstance(Rank
.SPECIES());
152 Taxon taxon
= Taxon
.NewInstance(botanicalName
, state
.getConfig().getSourceReference());
154 handleTaxonAttributes(elTaxon
, taxon
, state
);
157 List
<Element
> children
= elTaxon
.getChildren();
158 handleTaxonElement(state
, unhandledTitleClassess
, unhandledNomeclatureChildren
, unhandledDescriptionChildren
, taxon
, children
);
159 handleTaxonRelation(state
, taxon
, lastTaxon
);
161 taxaToSave
.add(taxon
);
162 state
.getConfig().setLastTaxonUuid(lastTaxon
.getUuid());
164 } catch (Exception e
) {
165 logger
.warn("Exception occurred in Sapindacea taxon import: " + e
);
171 System
.out
.println(state
.getUnmatchedLeads().toString());
172 logger
.warn("There are taxa with attributes 'excluded' and 'dubious'");
174 logger
.info("Children for nomenclature are: " + unhandledNomeclatureChildren
);
175 logger
.info("Children for description are: " + unhandledDescriptionChildren
);
176 logger
.info("Children for homotypes are: " + unhandledHomotypeChildren
);
177 logger
.info("Children for nom are: " + unhandledNomChildren
);
180 //invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
181 logger
.info(i
+ " taxa handled. Saving ...");
182 getTaxonService().saveOrUpdate(taxaToSave
);
183 getFeatureTreeService().saveOrUpdateFeatureNodesAll(state
.getFeatureNodesToSave());
184 state
.getFeatureNodesToSave().clear();
185 commitTransaction(tx
);
187 logger
.info("end makeTaxa ...");
188 logger
.info("start makeKey ...");
189 // invokeDoKey(state);
190 logger
.info("end makeKey ...");
192 return success
.getValue();
196 private void handleTaxonAttributes(Element elTaxon
, Taxon taxon
, EfloraImportState state
) {
197 List
<Attribute
> attrList
= elTaxon
.getAttributes();
198 for (Attribute attr
: attrList
){
199 String attrName
= attr
.getName();
200 String attrValue
= attr
.getValue();
201 if ("class".equals(attrName
)){
202 if (attrValue
.equalsIgnoreCase("dubious") || attrValue
.equalsIgnoreCase("DUBIOUS GENUS") || attrValue
.equalsIgnoreCase("DOUBTFUL SPECIES") ){
203 taxon
.setDoubtful(true);
205 MarkerType markerType
= getMarkerType(state
, attrValue
);
206 if (markerType
== null){
207 logger
.warn("Class attribute value for taxon not yet supported: " + attrValue
);
209 taxon
.addMarker(Marker
.NewInstance(markerType
, true));
212 }else if ("num".equals(attrName
)){
213 logger
.warn("num not yet supported");
215 logger
.warn("Attribute " + attrName
+ " not yet supported for element taxon");
222 private Taxon
getLastTaxon(EfloraImportState state
) {
223 if (state
.getConfig().getLastTaxonUuid() == null){
226 return (Taxon
)getTaxonService().find(state
.getConfig().getLastTaxonUuid());
231 // private void invokeDoKey(SapindaceaeImportState state) {
232 // TransactionStatus tx = startTransaction();
234 // Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
235 // ITaxonService taxonService = getTaxonService();
236 // ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
238 // Element elbody= getBodyElement(state.getConfig());
239 // List<Element> elTaxonList = elbody.getChildren();
244 // for (Element elTaxon : elTaxonList){
245 // if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
246 // if (! elTaxon.getName().equalsIgnoreCase("taxon")){
250 // List<Element> children = elTaxon.getChildren("key");
251 // for (Element element : children){
252 // handleKeys(state, element, null);
254 // nodesToSave.add(taxon);
262 private void handleTaxonElement(EfloraImportState state
, Set
<String
> unhandledTitleClassess
, Set
<String
> unhandledNomeclatureChildren
, Set
<String
> unhandledDescriptionChildren
, Taxon taxon
, List
<Element
> children
) {
263 AnnotatableEntity lastEntity
= null;
264 for (Element element
: children
){
265 String elName
= element
.getName();
267 if (elName
.equalsIgnoreCase("title")){
268 handleTitle(state
, element
, taxon
, unhandledTitleClassess
);
270 }else if(elName
.equalsIgnoreCase("nomenclature")){
271 handleNomenclature(state
, element
, taxon
, unhandledNomeclatureChildren
);
273 }else if(elName
.equalsIgnoreCase("description")){
274 handleDescription(state
, element
, taxon
, unhandledDescriptionChildren
);
276 }else if(elName
.equalsIgnoreCase("habitatecology")){
277 lastEntity
= handleEcology(state
, element
, taxon
);
278 }else if(elName
.equalsIgnoreCase("distribution")){
279 lastEntity
= handleDistribution(state
, element
, taxon
);
280 }else if(elName
.equalsIgnoreCase("uses")){
281 lastEntity
= handleUses(state
, element
, taxon
);
282 }else if(elName
.equalsIgnoreCase("notes")){
283 lastEntity
= handleTaxonNotes(state
, element
, taxon
);
284 }else if(elName
.equalsIgnoreCase("chromosomes")){
285 lastEntity
= handleChromosomes(state
, element
, taxon
);
286 }else if(elName
.equalsIgnoreCase("vernacularnames")){
287 handleVernaculars(state
, element
, taxon
);
288 }else if(elName
.equalsIgnoreCase("key")){
289 lastEntity
= handleKeys(state
, element
, taxon
);
290 }else if(elName
.equalsIgnoreCase("references")){
291 handleReferences(state
, element
, taxon
, lastEntity
);
293 }else if(elName
.equalsIgnoreCase("taxon")){
294 logger
.warn("A taxon should not be part of a taxon");
295 }else if(elName
.equalsIgnoreCase("homotypes")){
296 logger
.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
298 logger
.warn("Unexpected child for taxon: " + elName
);
304 private void handleVernaculars(EfloraImportState state
, Element elVernacular
, Taxon taxon
) {
305 verifyNoAttribute(elVernacular
);
306 verifyNoChildren(elVernacular
, false);
307 String value
= elVernacular
.getTextNormalize();
308 Feature feature
= Feature
.COMMON_NAME();
309 value
= replaceStart(value
, "Noms vernaculaires");
310 String
[] dialects
= value
.split(";");
311 for (String singleDialect
: dialects
){
312 handleSingleDialect(taxon
, singleDialect
, feature
, state
);
318 private void handleSingleDialect(Taxon taxon
, String singleDialect
, Feature feature
, EfloraImportState state
) {
319 singleDialect
= singleDialect
.trim();
320 TaxonDescription description
= getDescription(taxon
);
321 String reDialect
= "\\(dial\\.\\s.*\\)";
322 // String reDialect = "\\(.*\\)";
323 Pattern patDialect
= Pattern
.compile(reDialect
);
324 Matcher matcher
= patDialect
.matcher(singleDialect
);
326 String dialect
= singleDialect
.substring(matcher
.start(), matcher
.end());
327 dialect
= dialect
.replace("(dial. ", "").replace(")", "");
329 Language language
= null;
331 language
= this.getLanguage(state
, state
.getTransformer().getLanguageUuid(dialect
), dialect
, dialect
, dialect
);
332 } catch (UndefinedTransformerMethodException e
) {
333 logger
.error(e
.getMessage());
336 String commonNames
= singleDialect
.substring(0, matcher
.start());
337 String
[] splitNames
= commonNames
.split(",");
338 for (String commonNameString
: splitNames
){
339 commonNameString
= commonNameString
.trim();
340 CommonTaxonName commonName
= CommonTaxonName
.NewInstance(commonNameString
, language
);
341 description
.addElement(commonName
);
344 logger
.warn("No dialect match: " + singleDialect
);
349 private void handleReferences(EfloraImportState state
, Element elReferences
, Taxon taxon
, AnnotatableEntity lastEntity
) {
350 verifyNoAttribute(elReferences
);
351 verifyNoChildren(elReferences
, true);
352 String refString
= elReferences
.getTextNormalize();
353 if (lastEntity
== null){
354 logger
.warn("No last entity defined: " + refString
);
358 Annotation annotation
= Annotation
.NewInstance(refString
, AnnotationType
.EDITORIAL(), Language
.DEFAULT());
359 lastEntity
.addAnnotation(annotation
);
363 private PolytomousKey
handleKeys(EfloraImportState state
, Element elKey
, Taxon taxon
) {
364 UnmatchedLeads openKeys
= state
.getUnmatchedLeads();
367 String title
= makeKeyTitle(elKey
);
370 PolytomousKey key
= PolytomousKey
.NewTitledInstance(title
);
372 //TODO add covered taxa etc.
373 verifyNoAttribute(elKey
);
376 makeKeyNotes(elKey
, key
);
379 List
<Element
> keychoices
= new ArrayList
<Element
>();
380 keychoices
.addAll(elKey
.getChildren("keycouplet"));
381 keychoices
.addAll(elKey
.getChildren("keychoice"));
384 for (Element elKeychoice
: keychoices
){
385 handleKeyChoices(state
, openKeys
, key
, elKeychoice
, taxon
);
386 elKey
.removeContent(elKeychoice
);
390 verifyNoChildren(elKey
);
391 logger
.info("Unmatched leads after key handling:" + openKeys
.toString());
394 if (state
.getConfig().isDoPrintKeys()){
395 key
.print(System
.err
);
397 getFeatureTreeService().save(key
);
410 private void handleKeyChoices(EfloraImportState state
, UnmatchedLeads openKeys
, PolytomousKey key
, Element elKeychoice
, Taxon taxon
) {
413 Feature feature
= handleKeychoiceChar(state
, elKeychoice
);
416 List
<FeatureNode
> childNodes
= handleKeychoiceLeads(state
, key
, elKeychoice
, taxon
, feature
);
418 //num -> match with unmatched leads
419 handleKeychoiceNum(openKeys
, key
, elKeychoice
, childNodes
);
422 verifyNoAttribute(elKeychoice
);
432 private void handleKeychoiceNum(UnmatchedLeads openKeys
, PolytomousKey key
, Element elKeychoice
, List
<FeatureNode
> childNodes
) {
433 Attribute numAttr
= elKeychoice
.getAttribute("num");
434 String num
= CdmUtils
.removeTrailingDot(numAttr
== null?
"":numAttr
.getValue());
435 UnmatchedLeadsKey okk
= UnmatchedLeadsKey
.NewInstance(key
, num
);
436 Set
<FeatureNode
> matchingNodes
= openKeys
.getNodes(okk
);
437 for (FeatureNode matchingNode
: matchingNodes
){
438 for (FeatureNode childNode
: childNodes
){
439 matchingNode
.addChild(childNode
);
441 openKeys
.removeNode(okk
, matchingNode
);
443 if (matchingNodes
.isEmpty()){
444 for (FeatureNode childNode
: childNodes
){
445 key
.getRoot().addChild(childNode
);
449 elKeychoice
.removeAttribute("num");
461 private List
<FeatureNode
> handleKeychoiceLeads( EfloraImportState state
, PolytomousKey key
, Element elKeychoice
, Taxon taxon
, Feature feature
) {
462 List
<FeatureNode
> childNodes
= new ArrayList
<FeatureNode
>();
463 List
<Element
> leads
= elKeychoice
.getChildren("lead");
464 for(Element elLead
: leads
){
465 FeatureNode childNode
= handleLead(state
, key
, elLead
, taxon
, feature
);
466 childNodes
.add(childNode
);
477 private Feature
handleKeychoiceChar(EfloraImportState state
, Element elKeychoice
) {
478 Feature feature
= null;
479 Attribute charAttr
= elKeychoice
.getAttribute("char");
480 if (charAttr
!= null){
481 String charStr
= charAttr
.getValue();
482 feature
= getFeature(charStr
, state
);
483 elKeychoice
.removeAttribute("char");
489 private FeatureNode
handleLead(EfloraImportState state
, PolytomousKey key
, Element elLead
, Taxon taxon
, Feature feature
) {
490 FeatureNode node
= FeatureNode
.NewInstance();
491 node
.setFeature(feature
);
494 String text
= handleLeadText(elLead
, node
);
497 handleLeadNum(elLead
, text
);
500 handleLeadGoto(state
, key
, elLead
, taxon
, node
);
503 verifyNoAttribute(elLead
);
514 private String
handleLeadText(Element elLead
, FeatureNode node
) {
515 String text
= elLead
.getAttributeValue("text").trim();
516 if (StringUtils
.isBlank(text
)){
517 logger
.warn("Empty text in lead");
519 elLead
.removeAttribute("text");
520 node
.addQuestion(Representation
.NewInstance(text
, null, null, Language
.DEFAULT()));
532 private void handleLeadGoto(EfloraImportState state
, PolytomousKey key
, Element elLead
, Taxon taxon
, FeatureNode node
) {
533 Attribute gotoAttr
= elLead
.getAttribute("goto");
534 if (gotoAttr
!= null){
535 String strGoto
= gotoAttr
.getValue().trim();
537 UnmatchedLeadsKey gotoKey
= null;
538 if (isInternalNode(strGoto
)){
539 gotoKey
= UnmatchedLeadsKey
.NewInstance(key
, strGoto
);
541 String taxonKey
= makeTaxonKey(strGoto
, taxon
);
542 gotoKey
= UnmatchedLeadsKey
.NewInstance(taxonKey
);
545 UnmatchedLeads openKeys
= state
.getUnmatchedLeads();
546 openKeys
.addKey(gotoKey
, node
);
547 if (gotoKey
.isInnerLead()){
548 Set
<FeatureNode
> existingNodes
= openKeys
.getNodes(gotoKey
);
549 for (FeatureNode existingNode
: existingNodes
){
550 node
.addChild(existingNode
);
553 //remove attribute (need for consistency check)
554 elLead
.removeAttribute("goto");
556 logger
.warn("lead has no goto attribute");
565 private void handleLeadNum(Element elLead
, String text
) {
566 Attribute numAttr
= elLead
.getAttribute("num");
567 if (numAttr
!= null){
569 String num
= numAttr
.getValue();
570 elLead
.removeAttribute("num");
572 logger
.info("Keychoice has no num attribute: " + text
);
577 private String
makeTaxonKey(String strGoto
, Taxon taxon
) {
579 if (strGoto
== null){
582 String strGenusName
= CdmBase
.deproxy(taxon
.getName(), NonViralName
.class).getGenusOrUninomial();
583 strGoto
= strGoto
.replaceAll("\\([^\\(\\)]*\\)", ""); //replace all brackets
584 strGoto
= strGoto
.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
586 strGoto
= strGoto
.trim();
587 String
[] split
= strGoto
.split("\\s");
588 for (int i
= 0; i
<split
.length
; i
++){
589 String single
= split
[i
];
590 if (isGenusAbbrev(single
, strGenusName
)){
591 split
[i
] = strGenusName
;
593 // if (isInfraSpecificMarker(single)){
594 // String strSpeciesName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getSpecificEpithet();
595 // split[i] = strGenusName + " " + strSpeciesName + " ";
597 result
= (result
+ " " + split
[i
]).trim();
603 private boolean isInfraSpecificMarker(String single
) {
605 if (Rank
.getRankByAbbreviation(single
).isInfraSpecific()){
608 } catch (UnknownCdmTypeException e
) {
615 private boolean isGenusAbbrev(String single
, String strGenusName
) {
616 if (! single
.matches("[A-Z]\\.?")) {
618 }else if (single
.length() == 0 || strGenusName
== null || strGenusName
.length() == 0){
621 return single
.charAt(0) == strGenusName
.charAt(0);
626 private boolean isInternalNode(String strGoto
) {
627 return CdmUtils
.isNumeric(strGoto
);
631 private void makeKeyNotes(Element keyElement
, PolytomousKey key
) {
632 Element elNotes
= keyElement
.getChild("notes");
633 if (elNotes
!= null){
634 keyElement
.removeContent(elNotes
);
635 String notes
= elNotes
.getTextNormalize();
636 if (StringUtils
.isNotBlank(notes
)){
637 key
.addAnnotation(Annotation
.NewInstance(notes
, AnnotationType
.EDITORIAL(), Language
.DEFAULT()));
643 private String
makeKeyTitle(Element keyElement
) {
644 String title
= "- no title - ";
645 Attribute titleAttr
= keyElement
.getAttribute("title");
646 keyElement
.removeAttribute(titleAttr
);
647 if (titleAttr
== null){
648 Element elTitle
= keyElement
.getChild("keytitle");
649 keyElement
.removeContent(elTitle
);
650 if (elTitle
!= null){
651 title
= elTitle
.getTextNormalize();
654 title
= titleAttr
.getValue();
665 private TextData
handleChromosomes(EfloraImportState state
, Element element
, Taxon taxon
) {
666 Feature chromosomeFeature
= getFeature("chromosomes", state
);
667 verifyNoAttribute(element
);
668 verifyNoChildren(element
);
669 String value
= element
.getTextNormalize();
670 value
= replaceStart(value
, "Chromosomes");
671 String chromosomesPart
= getChromosomesPart(value
);
672 String references
= value
.replace(chromosomesPart
, "").trim();
673 chromosomesPart
= chromosomesPart
.replace(":", "").trim();
674 return addDescriptionElement(state
, taxon
, chromosomesPart
, chromosomeFeature
, references
);
683 private void makeOriginalSourceReferences(ISourceable sourcable
, String splitter
, String refAll
) {
684 String
[] splits
= refAll
.split(splitter
);
685 for (String strRef
: splits
){
686 ReferenceBase ref
= ReferenceFactory
.newGeneric();
687 ref
.setTitleCache(strRef
, true);
688 String refDetail
= parseReferenceYearAndDetail(ref
);
689 sourcable
.addSource(null, null, ref
, refDetail
);
693 //TODO use regex instead
694 /* String detailResult = null;
695 String titleToParse = ref.getTitleCache();
696 String reReference = "^\\.{1,}";
697 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
698 String reYear = "\\([1-2]{1}[0-9]{3}\\)";
699 String reYearPeriod = reYear + "(-" + reYear + ")+";
700 String reDetail = "\\.{1,10}$";
709 private String
getChromosomesPart(String str
) {
710 Pattern pattern
= Pattern
.compile("2n\\s*=\\s*\\d{1,2}:");
711 Matcher matcher
= pattern
.matcher(str
);
713 return matcher
.group(0);
715 logger
.warn("Chromosomes could not be parsed: " + str
);
726 private TextData
handleTaxonNotes(EfloraImportState state
, Element element
, Taxon taxon
) {
727 TextData result
= null;
728 verifyNoChildren(element
, true);
729 //verifyNoAttribute(element);
730 List
<Attribute
> attributes
= element
.getAttributes();
731 for (Attribute attribute
: attributes
){
732 if (! attribute
.getName().equalsIgnoreCase("class")){
733 logger
.warn("Char has unhandled attribute " + attribute
.getName());
735 String classValue
= attribute
.getValue();
736 result
= handleDescriptiveElement(state
, element
, taxon
, classValue
);
739 //if no class attribute exists, handle as note
740 if (attributes
.isEmpty()){
741 result
= handleDescriptiveElement(state
, element
, taxon
, "Note");
744 //Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
745 //taxon.addAnnotation(annotation);
746 return result
; //annotation;
758 private TextData
handleDescriptiveElement(EfloraImportState state
, Element element
, Taxon taxon
, String classValue
) {
759 TextData result
= null;
760 Feature feature
= getFeature(classValue
, state
);
761 if (feature
== null){
762 logger
.warn("Unhandled feature: " + classValue
);
764 String value
= element
.getValue();
765 value
= replaceStart(value
, "Notes");
766 value
= replaceStart(value
, "Note");
767 result
= addDescriptionElement(state
, taxon
, value
, feature
, null);
773 private void removeBr(Element element
) {
774 element
.removeChildren("Br");
775 element
.removeChildren("br");
776 element
.removeChildren("BR");
785 private TextData
handleUses(EfloraImportState state
, Element element
, Taxon taxon
) {
786 verifyNoAttribute(element
);
787 verifyNoChildren(element
, true);
788 String value
= element
.getTextNormalize();
789 value
= replaceStart(value
, "Uses");
790 Feature feature
= Feature
.USES();
791 return addDescriptionElement(state
, taxon
, value
, feature
, null);
800 * @param unhandledDescriptionChildren
802 private DescriptionElementBase
handleDistribution(EfloraImportState state
, Element element
, Taxon taxon
) {
803 verifyNoAttribute(element
);
804 verifyNoChildren(element
, true);
805 String value
= element
.getTextNormalize();
806 value
= replaceStart(value
, "Distribution");
807 Feature feature
= Feature
.DISTRIBUTION();
808 //distribution parsing almost impossible as there is lots of freetext in the distribution tag
809 return addDescriptionElement(state
, taxon
, value
, feature
, null);
817 * @param unhandledDescriptionChildren
819 private TextData
handleEcology(EfloraImportState state
, Element elEcology
, Taxon taxon
) {
820 verifyNoAttribute(elEcology
);
821 verifyNoChildren(elEcology
, true);
822 String value
= elEcology
.getTextNormalize();
823 Feature feature
= Feature
.ECOLOGY();
824 if (value
.startsWith("Habitat & Ecology")){
825 feature
= getFeature("Habitat & Ecology", state
);
826 value
= replaceStart(value
, "Habitat & Ecology");
827 }else if (value
.startsWith("Habitat")){
828 value
= replaceStart(value
, "Habitat");
829 feature
= getFeature("Habitat", state
);
831 return addDescriptionElement(state
, taxon
, value
, feature
, null);
838 * @param replacementString
840 private String
replaceStart(String value
, String replacementString
) {
841 if (value
.startsWith(replacementString
) ){
842 value
= value
.substring(replacementString
.length()).trim();
844 while (value
.startsWith("-") || value
.startsWith("\96") ){
845 value
= value
.substring("-".length()).trim();
853 * @param replacementString
855 protected String
removeTrailing(String value
, String replacementString
) {
859 if (value
.endsWith(replacementString
) ){
860 value
= value
.substring(0, value
.length() - replacementString
.length()).trim();
869 * @param unhandledNomeclatureChildren
871 private void handleNomenclature(EfloraImportState state
, Element elNomenclature
, Taxon taxon
, Set
<String
> unhandledChildren
) {
872 verifyNoAttribute(elNomenclature
);
874 List
<Element
> elements
= elNomenclature
.getChildren();
875 for (Element element
: elements
){
876 if (element
.getName().equals("homotypes")){
877 handleHomotypes(state
, element
, taxon
);
878 }else if (element
.getName().equals("notes")){
879 handleNomenclatureNotes(state
, element
, taxon
);
881 unhandledChildren
.add(element
.getName());
889 private void handleNomenclatureNotes(EfloraImportState state
, Element elNotes
, Taxon taxon
) {
890 verifyNoAttribute(elNotes
);
891 verifyNoChildren(elNotes
);
892 String notesText
= elNotes
.getTextNormalize();
893 Annotation annotation
= Annotation
.NewInstance(notesText
, AnnotationType
.EDITORIAL(), Language
.DEFAULT());
894 taxon
.addAnnotation(annotation
);
899 private static Set
<String
> unhandledHomotypeChildren
= new HashSet
<String
>();
905 private void handleHomotypes(EfloraImportState state
, Element elHomotypes
, Taxon taxon
) {
906 verifyNoAttribute(elHomotypes
);
908 List
<Element
> elements
= elHomotypes
.getChildren();
909 HomotypicalGroup homotypicalGroup
= null;
910 for (Element element
: elements
){
911 if (element
.getName().equals("nom")){
912 homotypicalGroup
= handleNom(state
, element
, taxon
, homotypicalGroup
);
914 unhandledHomotypeChildren
.add(element
.getName());
920 private static Set
<String
> unhandledNomChildren
= new HashSet
<String
>();
927 private HomotypicalGroup
handleNom(EfloraImportState state
, Element elNom
, Taxon taxon
, HomotypicalGroup homotypicalGroup
) {
928 List
<Attribute
> attributes
= elNom
.getAttributes();
930 boolean taxonBaseClassType
= false;
931 for (Attribute attribute
: attributes
){
932 if (! attribute
.getName().equalsIgnoreCase("class")){
933 logger
.warn("Nom has unhandled attribute " + attribute
.getName());
935 String classValue
= attribute
.getValue();
936 if (classValue
.equalsIgnoreCase("acceptedname")){
937 homotypicalGroup
= handleNomTaxon(state
, elNom
, taxon
,homotypicalGroup
, false);
938 taxonBaseClassType
= true;
939 }else if (classValue
.equalsIgnoreCase("synonym")){
940 homotypicalGroup
= handleNomTaxon(state
, elNom
, taxon
, homotypicalGroup
, true);
941 taxonBaseClassType
= true;
942 }else if (classValue
.equalsIgnoreCase("typeref")){
943 handleTypeRef(state
, elNom
, taxon
, homotypicalGroup
);
945 logger
.warn("Unhandled class value for nom: " + classValue
);
951 List
<Element
> elements
= elNom
.getChildren();
952 for (Element element
: elements
){
953 if (element
.getName().equals("name") || element
.getName().equals("homonym") ){
954 if (taxonBaseClassType
== false){
955 logger
.warn("Name or homonym tag not allowed in non taxon nom tag");
958 unhandledNomChildren
.add(element
.getName());
962 return homotypicalGroup
;
970 * @param homotypicalGroup
972 protected void handleTypeRef(EfloraImportState state
, Element elNom
, Taxon taxon
, HomotypicalGroup homotypicalGroup
) {
973 verifyNoChildren(elNom
);
974 String typeRef
= elNom
.getTextNormalize();
975 typeRef
= removeStartingTypeRefMinus(typeRef
);
977 String
[] split
= typeRef
.split(":");
978 if (split
.length
< 2){
979 logger
.warn("typeRef has no ':' : " + typeRef
);
980 }else if (split
.length
> 2){
981 logger
.warn("typeRef has more than 1 ':' : " + typeRef
);
983 StringBuffer typeType
= new StringBuffer(split
[0]);
984 String typeText
= split
[1].trim();
985 TypeDesignationBase typeDesignation
= getTypeDesignationAndReference(typeType
);
987 //Name Type Desitnations
988 if (typeDesignation
instanceof NameTypeDesignation
){
989 makeNameTypeDesignations(typeType
, typeText
, typeDesignation
);
991 //SpecimenTypeDesignations
992 else if (typeDesignation
instanceof SpecimenTypeDesignation
){
993 makeSpecimenTypeDesignation(typeType
, typeText
, typeDesignation
);
995 logger
.error("Unhandled type designation class" + typeDesignation
.getClass().getName());
997 for (TaxonNameBase name
: homotypicalGroup
.getTypifiedNames()){
998 name
.addTypeDesignation(typeDesignation
, true);
1008 protected String
removeStartingTypeRefMinus(String typeRef
) {
1009 typeRef
= replaceStart(typeRef
, "-");
1010 typeRef
= replaceStart(typeRef
, "\97");
1011 typeRef
= replaceStart(typeRef
, "\u002d");
1012 typeRef
= replaceStart(typeRef
, "\u2013");
1013 typeRef
= replaceStart(typeRef
, "--");
1020 * @param typeDesignation
1022 private void makeNameTypeDesignations(StringBuffer typeType
, String typeText
, TypeDesignationBase typeDesignation
) {
1023 if (typeType
.toString().trim().equalsIgnoreCase("Type")){
1025 }else if (typeType
.toString().trim().equalsIgnoreCase("Lectotype")){
1026 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.LECTOTYPE());
1027 }else if (typeType
.toString().trim().equalsIgnoreCase("Syntype")){
1028 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.SYNTYPE());
1030 logger
.warn("Unhandled type string: " + typeType
+ "(" + CharUtils
.unicodeEscaped(typeType
.charAt(0)) + ")");
1033 typeText
= cleanNameType(typeText
);
1035 BotanicalName nameType
= (BotanicalName
)parser
.parseFullName(typeText
, NomenclaturalCode
.ICBN
, Rank
.SPECIES());
1036 ((NameTypeDesignation
) typeDesignation
).setTypeName(nameType
);
1037 //TODO wie können NameTypes den Namen zugeordnet werden? - wird aber vom Portal via NameCache matching gemacht
1041 private String
cleanNameType(String typeText
) {
1043 String
[] split
= typeText
.split("\\[.*\\].?");
1052 * @param typeDesignation
1054 protected void makeSpecimenTypeDesignation(StringBuffer typeType
, String typeText
, TypeDesignationBase typeDesignation
) {
1055 if (typeType
.toString().trim().equalsIgnoreCase("Type")){
1057 }else if (typeType
.toString().trim().equalsIgnoreCase("Neotype") || typeType
.toString().trim().equalsIgnoreCase("Neotypes")){
1058 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.NEOTYPE());
1059 }else if (typeType
.toString().trim().equalsIgnoreCase("Syntype") || typeType
.toString().trim().equalsIgnoreCase("Syntypes")){
1060 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.SYNTYPE());
1061 }else if (typeType
.toString().trim().equalsIgnoreCase("Lectotype")){
1062 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.LECTOTYPE());
1063 }else if (typeType
.toString().trim().equalsIgnoreCase("Paratype")){
1064 typeDesignation
.setTypeStatus(SpecimenTypeDesignationStatus
.PARATYPE());
1066 logger
.warn("Unhandled type string: " + typeType
);
1068 Specimen specimen
= Specimen
.NewInstance();
1069 if (typeText
.length() > 255){
1070 specimen
.setTitleCache(typeText
.substring(0, 252) + "...", true);
1072 specimen
.setTitleCache(typeText
, true);
1074 specimen
.addDefinition(typeText
, Language
.ENGLISH());
1075 ((SpecimenTypeDesignation
) typeDesignation
).setTypeSpecimen(specimen
);
1078 private TypeDesignationBase
getTypeDesignationAndReference(StringBuffer typeType
) {
1079 TypeDesignationBase result
;
1080 ReferenceBase ref
= parseTypeDesignationReference(typeType
);
1081 if (typeType
.indexOf(" species")>-1 || typeType
.indexOf("genus")>-1){
1082 if (typeType
.indexOf(" species")>-1 ){
1083 result
= NameTypeDesignation
.NewInstance();
1084 int start
= typeType
.indexOf(" species");
1085 typeType
.replace(start
, start
+ " species".length(), "");
1087 result
= NameTypeDesignation
.NewInstance();
1088 int start
= typeType
.indexOf(" genus");
1089 typeType
.replace(start
, start
+ " genus".length(), "");
1092 result
= SpecimenTypeDesignation
.NewInstance();
1094 result
.setCitation(ref
);
1099 private ReferenceBase
parseTypeDesignationReference(StringBuffer typeType
) {
1100 ReferenceBase result
= null;
1101 String reBracketReference
= "\\(.*\\)";
1102 Pattern patBracketReference
= Pattern
.compile(reBracketReference
);
1103 Matcher matcher
= patBracketReference
.matcher(typeType
);
1104 if (matcher
.find()){
1105 String refString
= matcher
.group();
1106 int start
= typeType
.indexOf(refString
);
1107 typeType
.replace(start
, start
+ refString
.length(), "");
1108 refString
= refString
.replace("(", "").replace(")", "").trim();
1109 ReferenceBase ref
= ReferenceFactory
.newGeneric();
1110 ref
.setTitleCache(refString
, true);
1123 private HomotypicalGroup
handleNomTaxon(EfloraImportState state
, Element elNom
, Taxon taxon
, HomotypicalGroup homotypicalGroup
, boolean isSynonym
) {
1124 NonViralName name
= makeName(taxon
, homotypicalGroup
, isSynonym
);
1127 boolean hasGenusInfo
= false;
1128 TeamOrPersonBase lastTeam
= null;
1131 List
<Element
> elGenus
= XmlHelp
.getAttributedChildListWithValue(elNom
, "name", "class", "genus");
1132 if (elGenus
.size() > 0){
1133 hasGenusInfo
= true;
1135 logger
.debug ("No Synonym Genus");
1137 //infra rank -> needed to handle authors correctly
1138 List
<Element
> elInfraRank
= XmlHelp
.getAttributedChildListWithValue(elNom
, "name", "class", "infrank");
1139 Rank infraRank
= null;
1140 infraRank
= handleInfRank(name
, elInfraRank
, infraRank
);
1142 //get left over elements
1143 List
<Element
> elements
= elNom
.getChildren();
1144 elements
.removeAll(elInfraRank
);
1146 for (Element element
: elements
){
1147 if (element
.getName().equals("name")){
1148 String classValue
= element
.getAttributeValue("class");
1149 String value
= element
.getValue().trim();
1150 if (classValue
.equalsIgnoreCase("genus") || classValue
.equalsIgnoreCase("family") ){
1151 name
.setGenusOrUninomial(value
);
1152 }else if (classValue
.equalsIgnoreCase("family") ){
1153 name
.setGenusOrUninomial(value
);
1154 name
.setRank(Rank
.FAMILY());
1155 }else if (classValue
.equalsIgnoreCase("subgenus")){
1156 //name.setInfraGenericEpithet(value);
1157 name
.setNameCache(value
.replace(":", "").trim());
1158 name
.setRank(Rank
.SUBGENUS());
1159 }else if (classValue
.equalsIgnoreCase("epithet") ){
1160 if (hasGenusInfo
== true){
1161 name
.setSpecificEpithet(value
);
1163 handleInfraspecificEpithet(element
, classValue
, name
);
1165 }else if (classValue
.equalsIgnoreCase("author")){
1166 handleNameAuthors(element
, name
);
1167 }else if (classValue
.equalsIgnoreCase("paraut")){
1168 handleBasionymAuthor(state
, element
, name
, false);
1169 }else if (classValue
.equalsIgnoreCase("infrauthor") || classValue
.equalsIgnoreCase("infraut")){
1170 handleInfrAuthor(state
, element
, name
, true);
1171 }else if (classValue
.equalsIgnoreCase("infrapar") || classValue
.equalsIgnoreCase("infrpar") || classValue
.equalsIgnoreCase("parauthor") ){
1172 handleBasionymAuthor(state
, element
, name
, true);
1173 }else if (classValue
.equalsIgnoreCase("infrepi")){
1174 handleInfrEpi(name
, infraRank
, value
);
1175 }else if (classValue
.equalsIgnoreCase("pub")){
1176 lastTeam
= handleNomenclaturalReference(name
, value
);
1177 }else if (classValue
.equalsIgnoreCase("usage")){
1178 lastTeam
= handleNameUsage(taxon
, name
, value
, lastTeam
);
1179 }else if (classValue
.equalsIgnoreCase("note")){
1180 handleNameNote(name
, value
);
1181 }else if (classValue
.equalsIgnoreCase("num")){
1183 logger
.warn("Duplicate num: " + value
);
1187 if (isSynonym
== true){
1188 logger
.warn("Synonym should not have a num");
1190 }else if (classValue
.equalsIgnoreCase("typification")){
1191 logger
.warn("Typification should not be a nom class");
1193 logger
.warn("Unhandled name class: " + classValue
);
1195 }else if(element
.getName().equals("homonym")){
1196 handleHomonym(state
, element
, name
);
1198 // child element is not "name"
1199 unhandledNomChildren
.add(element
.getName());
1205 String taxonString
= name
.getNameCache();
1206 //try to find matching lead nodes
1207 UnmatchedLeadsKey leadsKey
= UnmatchedLeadsKey
.NewInstance(num
, taxonString
);
1208 Set
<FeatureNode
> matchingNodes
= handleMatchingNodes(state
, taxon
, leadsKey
);
1209 //same without using the num
1211 UnmatchedLeadsKey noNumLeadsKey
= UnmatchedLeadsKey
.NewInstance("", taxonString
);
1212 handleMatchingNodes(state
, taxon
, noNumLeadsKey
);
1214 if (matchingNodes
.isEmpty() && num
!= null){
1215 logger
.warn("Taxon has num but no matching nodes exist: " + num
+ ", Key: " + leadsKey
.toString());
1219 //test nom element has no text
1220 if (StringUtils
.isNotBlank(elNom
.getTextNormalize().replace("\97", "").replace("\u002d","").replace("\u2013", ""))){
1221 String strElNom
= elNom
.getTextNormalize();
1222 if ("?".equals(strElNom
)){
1223 handleQuestionMark(name
, taxon
);
1225 // Character c = strElNom.charAt(0);
1226 //System.out.println(CharUtils.unicodeEscaped(c));
1227 logger
.warn("Nom tag has text: " + strElNom
);
1230 return name
.getHomotypicalGroup();
1234 private void handleQuestionMark(NonViralName name
, Taxon taxon
) {
1235 int count
= name
.getTaxonBases().size();
1237 logger
.warn("Name has " + count
+ " taxa. This is not handled for question mark");
1239 TaxonBase taxonBase
= (TaxonBase
)name
.getTaxonBases().iterator().next();
1240 taxonBase
.setDoubtful(true);
1245 //merge with handleNomTaxon
1246 private void handleHomonym(EfloraImportState state
, Element elHomonym
, NonViralName upperName
) {
1247 verifyNoAttribute(elHomonym
);
1250 BotanicalName homonymName
= BotanicalName
.NewInstance(upperName
.getRank());
1251 homonymName
.setGenusOrUninomial(upperName
.getGenusOrUninomial());
1252 homonymName
.setInfraGenericEpithet(upperName
.getInfraGenericEpithet());
1253 homonymName
.setSpecificEpithet(upperName
.getSpecificEpithet());
1254 homonymName
.setInfraSpecificEpithet(upperName
.getInfraSpecificEpithet());
1256 for (Element elName
: (List
<Element
>)elHomonym
.getChildren("name")){
1257 String classValue
= elName
.getAttributeValue("class");
1258 String value
= elName
.getValue().trim();
1259 if (classValue
.equalsIgnoreCase("genus") ){
1260 homonymName
.setGenusOrUninomial(value
);
1261 }else if (classValue
.equalsIgnoreCase("epithet") ){
1262 homonymName
.setSpecificEpithet(value
);
1263 }else if (classValue
.equalsIgnoreCase("author")){
1264 handleNameAuthors(elName
, homonymName
);
1265 }else if (classValue
.equalsIgnoreCase("paraut")){
1266 handleBasionymAuthor(state
, elName
, homonymName
, true);
1267 }else if (classValue
.equalsIgnoreCase("pub")){
1268 handleNomenclaturalReference(homonymName
, value
);
1269 }else if (classValue
.equalsIgnoreCase("note")){
1270 handleNameNote(homonymName
, value
);
1272 logger
.warn("Unhandled class value: " + classValue
);
1275 //TODO verify other information
1279 boolean homonymIsLater
= false;
1280 NameRelationshipType relType
= NameRelationshipType
.LATER_HOMONYM();
1281 if (upperName
.getNomenclaturalReference() != null && homonymName
.getNomenclaturalReference() != null){
1282 TimePeriod homonymYear
= homonymName
.getNomenclaturalReference().getDatePublished();
1283 TimePeriod nameYear
= upperName
.getNomenclaturalReference().getDatePublished();
1284 homonymIsLater
= homonymYear
.getStart().compareTo(nameYear
.getStart()) > 0;
1286 if (upperName
.getNomenclaturalReference() == null){
1287 logger
.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName
.getTitleCache());
1289 if (homonymName
.getNomenclaturalReference() == null){
1290 logger
.warn("Homonym does not have a nomenclatural reference or year: " + homonymName
.getTitleCache());
1293 if (homonymIsLater
){
1294 homonymName
.addRelationshipToName(upperName
, relType
, null);
1296 upperName
.addRelationshipToName(homonymName
, relType
, null);
1308 private Set
<FeatureNode
> handleMatchingNodes(EfloraImportState state
, Taxon taxon
, UnmatchedLeadsKey leadsKey
) {
1309 Set
<FeatureNode
> matchingNodes
= state
.getUnmatchedLeads().getNodes(leadsKey
);
1310 for (FeatureNode matchingNode
: matchingNodes
){
1311 state
.getUnmatchedLeads().removeNode(leadsKey
, matchingNode
);
1312 matchingNode
.setTaxon(taxon
);
1313 state
.getFeatureNodesToSave().add(matchingNode
);
1315 return matchingNodes
;
1319 private void handleNameNote(NonViralName name
, String value
) {
1320 logger
.warn("Name note: " + value
+ ". Available in portal?");
1321 Annotation annotation
= Annotation
.NewInstance(value
, AnnotationType
.EDITORIAL(), Language
.DEFAULT());
1322 name
.addAnnotation(annotation
);
1331 protected TeamOrPersonBase
handleNameUsage(Taxon taxon
, NonViralName name
, String referenceTitle
, TeamOrPersonBase lastTeam
) {
1332 ReferenceBase ref
= ReferenceFactory
.newGeneric();
1333 referenceTitle
= removeStartingSymbols(referenceTitle
, ref
);
1335 ref
.setTitleCache(referenceTitle
, true);
1336 String microReference
= parseReferenceYearAndDetail(ref
);
1337 TeamOrPersonBase team
= getReferenceAuthor(ref
);
1338 parseReferenceType(ref
);
1342 ref
.setAuthorTeam(team
);
1344 TaxonDescription description
= getDescription(taxon
);
1345 TextData textData
= TextData
.NewInstance(Feature
.CITATION());
1346 textData
.addSource(null, null, ref
, microReference
, name
, null);
1347 description
.addElement(textData
);
1353 * @param referenceTitle
1357 private String
removeStartingSymbols(String referenceTitle
, ReferenceBase ref
) {
1358 if (referenceTitle
.startsWith(";") || referenceTitle
.startsWith(",") || referenceTitle
.startsWith(":")){
1359 referenceTitle
= referenceTitle
.substring(1).trim();
1360 ref
.setTitleCache(referenceTitle
);
1362 return referenceTitle
;
1366 private void parseReferenceType(ReferenceBase ref
) {
1367 String title
= ref
.getTitle();
1371 title
= title
.trim();
1373 if (! title
.startsWith("in ")){
1374 ref
.setType(ReferenceType
.Book
);
1378 title
= title
.substring(3);
1381 if (title
.indexOf(",") == -1){
1382 ref
.setType(ReferenceType
.Article
);
1383 IJournal journal
= ReferenceFactory
.newJournal();
1384 journal
.setTitle(title
);
1386 ref
.setInJournal(journal
);
1390 ref
.setType(ReferenceType
.BookSection
);
1391 String
[] split
= (title
).split(",\\s*[A-Z]");
1392 if (split
.length
<= 1){
1393 logger
.warn("Can not fully decide what reference type. Guess it is a book section: " + title
);
1395 IBook book
= ReferenceFactory
.newBook();
1396 Team bookTeam
= Team
.NewTitledInstance(split
[0].trim(), split
[0].trim());
1398 title
= title
.substring(split
[0].length() + 1).trim();
1399 } catch (Exception e
) {
1400 logger
.error("ERROR occurred when trying to split title: " + title
+ "; split[0]: + " + split
[0]);
1402 book
.setTitle(title
);
1403 book
.setAuthorTeam(bookTeam
);
1404 book
.setDatePublished(ref
.getDatePublished());
1406 ref
.setInBook(book
);
1411 protected Team
getReferenceAuthor (ReferenceBase ref
) {
1412 boolean isCache
= false;
1413 String referenceTitle
= ref
.getTitle();
1414 if (referenceTitle
== null){
1416 referenceTitle
= ref
.getTitleCache();
1419 String
[] split
= (" " + referenceTitle
).split(" in ");
1420 if (split
.length
> 1){
1421 if (StringUtils
.isNotBlank(split
[0])){
1422 //' in ' is within the reference string, take the preceding string as the team
1423 Team team
= Team
.NewTitledInstance(split
[0].trim(), split
[0].trim());
1425 ref
.setTitle("in " + split
[1]);
1429 //string starts with in therefore no author is given
1434 split
= referenceTitle
.split(",");
1435 if (split
.length
< 2){
1436 //no author is given
1441 split
= (referenceTitle
).split(",\\s*[A-Z]");
1442 if (split
.length
> 1){
1443 Team team
= Team
.NewTitledInstance(split
[0].trim(), split
[0].trim());
1445 ref
.setTitle(referenceTitle
.substring(split
[0].length()+1).trim());
1449 logger
.warn("Can't decide if a usage has an author: " + referenceTitle
);
1456 * Replaced by <homonym> tag but still in use for exceptions
1461 protected String
parseHomonym(String detail
, NonViralName name
) {
1463 if (detail
== null){
1469 String reNon
= "(\\s|,)non\\s";
1470 Pattern patReference
= Pattern
.compile(reNon
);
1471 Matcher matcher
= patReference
.matcher(detail
);
1472 if (matcher
.find()){
1473 int start
= matcher
.start();
1474 int end
= matcher
.end();
1476 if (detail
!= null){
1477 logger
.warn("Unhandled non part: " + detail
.substring(start
));
1481 result
= detail
.substring(0, start
);
1484 String homonymString
= detail
.substring(end
);
1487 BotanicalName homonymName
= BotanicalName
.NewInstance(name
.getRank());
1488 homonymName
.setGenusOrUninomial(name
.getGenusOrUninomial());
1489 homonymName
.setInfraGenericEpithet(name
.getInfraGenericEpithet());
1490 homonymName
.setSpecificEpithet(name
.getSpecificEpithet());
1491 homonymName
.setInfraSpecificEpithet(name
.getInfraSpecificEpithet());
1492 ReferenceBase homonymNomRef
= ReferenceFactory
.newGeneric();
1493 homonymNomRef
.setTitleCache(homonymString
);
1494 String homonymNomRefDetail
= parseReferenceYearAndDetail(homonymNomRef
);
1495 homonymName
.setNomenclaturalMicroReference(homonymNomRefDetail
);
1496 String authorTitle
= homonymNomRef
.getTitleCache();
1497 Team team
= Team
.NewTitledInstance(authorTitle
, authorTitle
);
1498 homonymNomRef
.setAuthorTeam(team
);
1499 homonymNomRef
.setTitle("");
1500 homonymNomRef
.setProtectedTitleCache(false);
1503 boolean homonymIsLater
= false;
1504 NameRelationshipType relType
= NameRelationshipType
.LATER_HOMONYM();
1505 TimePeriod homonymYear
= homonymNomRef
.getDatePublished();
1506 if (name
.getNomenclaturalReference() != null){
1507 TimePeriod nameYear
= name
.getNomenclaturalReference().getDatePublished();
1508 homonymIsLater
= homonymYear
.getStart().compareTo(nameYear
.getStart()) > 0;
1510 logger
.warn("Classification name has no nomenclatural reference");
1512 if (homonymIsLater
){
1513 homonymName
.addRelationshipToName(name
, relType
, null);
1515 name
.addRelationshipToName(homonymName
, relType
, null);
1526 * @Xpath body/taxon/nomenclature/homotypes/nom/name[@class="pub"]
1530 protected TeamOrPersonBase
handleNomenclaturalReference(NonViralName name
, String value
) {
1531 ReferenceBase nomRef
= ReferenceFactory
.newGeneric();
1532 nomRef
.setTitleCache(value
, true);
1533 parseNomStatus(nomRef
, name
);
1534 String microReference
= parseReferenceYearAndDetail(nomRef
);
1535 name
.setNomenclaturalReference(nomRef
);
1536 microReference
= parseHomonym(microReference
, name
);
1537 name
.setNomenclaturalMicroReference(microReference
);
1538 TeamOrPersonBase team
= (TeamOrPersonBase
)name
.getCombinationAuthorTeam();
1540 logger
.warn("Name has nom. ref. but no author team. Name: " + name
.getTitleCache() + ", Nom.Ref.: " + value
);
1542 nomRef
.setAuthorTeam(team
);
1547 private void handleInfrAuthor(EfloraImportState state
, Element elAuthor
, NonViralName name
, boolean overwrite
) {
1548 String strAuthor
= elAuthor
.getValue().trim();
1549 if (strAuthor
.endsWith(",")){
1550 strAuthor
= strAuthor
.substring(0, strAuthor
.length() -1);
1552 TeamOrPersonBase
[] team
= getTeam(strAuthor
);
1553 if (name
.getCombinationAuthorTeam() != null && overwrite
== false){
1554 logger
.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1556 name
.setCombinationAuthorTeam(team
[0]);
1557 name
.setExCombinationAuthorTeam(team
[1]);
1565 * Sets the names rank according to the infrank value
1568 * @param elInfraRank
1572 private Rank
handleInfRank(NonViralName name
, List
<Element
> elInfraRank
, Rank infraRank
) {
1573 if (elInfraRank
.size() == 1){
1574 String strRank
= elInfraRank
.get(0).getTextNormalize();
1576 infraRank
= Rank
.getRankByNameOrAbbreviation(strRank
);
1577 } catch (UnknownCdmTypeException e
) {
1579 infraRank
= Rank
.getRankByNameOrAbbreviation(strRank
+ ".");
1580 } catch (UnknownCdmTypeException e2
) {
1581 logger
.warn("Unknown infrank " + strRank
+ ". Set infraRank to (null).");
1584 }else if (elInfraRank
.size() > 1){
1585 logger
.warn ("There is more than 1 infrank");
1587 if (infraRank
!= null){
1588 name
.setRank(infraRank
);
1594 private void handleInfrEpi(NonViralName name
, Rank infraRank
, String value
) {
1595 if (infraRank
!= null && infraRank
.isInfraSpecific()){
1596 name
.setInfraSpecificEpithet(value
);
1597 if (CdmUtils
.isCapital(value
)){
1598 logger
.warn("Infraspecific epithet starts with a capital letter: " + value
);
1600 }else if (infraRank
!= null && infraRank
.isInfraGeneric()){
1601 name
.setInfraGenericEpithet(value
);
1602 if (! CdmUtils
.isCapital(value
)){
1603 logger
.warn("Infrageneric epithet does not start with a capital letter: " + value
);
1606 logger
.warn("Infrepi could not be handled: " + value
);
1613 * Returns the (empty) with the correct homotypical group depending on the taxon status
1615 * @param homotypicalGroup
1619 private NonViralName
makeName(Taxon taxon
,HomotypicalGroup homotypicalGroup
, boolean isSynonym
) {
1622 name
= BotanicalName
.NewInstance(Rank
.SPECIES(), homotypicalGroup
);
1623 SynonymRelationshipType synonymType
= SynonymRelationshipType
.HETEROTYPIC_SYNONYM_OF();
1624 if (taxon
.getHomotypicGroup().equals(homotypicalGroup
)){
1625 synonymType
= SynonymRelationshipType
.HOMOTYPIC_SYNONYM_OF();
1627 taxon
.addSynonymName(name
, synonymType
);
1629 name
= (NonViralName
)taxon
.getName();
1639 private void handleInfraspecificEpithet(Element element
, String attrValue
, NonViralName name
) {
1640 String value
= element
.getTextNormalize();
1641 if (value
.indexOf("subsp.") != -1){
1642 //TODO genus and species epi
1643 String infrEpi
= value
.substring(value
.indexOf("subsp.") + 6).trim();
1644 name
.setInfraSpecificEpithet(infrEpi
);
1645 name
.setRank(Rank
.SUBSPECIES());
1646 }else if (value
.indexOf("var.") != -1){
1647 //TODO genus and species epi
1648 String infrEpi
= value
.substring(value
.indexOf("var.") + 4).trim();
1649 name
.setInfraSpecificEpithet(infrEpi
);
1650 name
.setRank(Rank
.VARIETY());
1652 logger
.warn("Unhandled infraspecific type: " + value
);
1662 private void handleBasionymAuthor(EfloraImportState state
, Element elBasionymAuthor
, NonViralName name
, boolean overwrite
) {
1663 String strAuthor
= elBasionymAuthor
.getValue().trim();
1664 Pattern reBasionymAuthor
= Pattern
.compile("^\\(.*\\)$");
1665 if (reBasionymAuthor
.matcher(strAuthor
).matches()){
1666 strAuthor
= strAuthor
.substring(1, strAuthor
.length()-1);
1668 logger
.warn("Brackets are missing for original combination author " + strAuthor
);
1670 TeamOrPersonBase
[] basionymTeam
= getTeam(strAuthor
);
1671 if (name
.getBasionymAuthorTeam() != null && overwrite
== false){
1672 logger
.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1674 name
.setBasionymAuthorTeam(basionymTeam
[0]);
1675 name
.setExBasionymAuthorTeam(basionymTeam
[1]);
1680 private Map
<String
, UUID
> teamMap
= new HashMap
<String
, UUID
>();
1686 private void handleNameAuthors(Element elAuthor
, NonViralName name
) {
1687 if (name
.getCombinationAuthorTeam() != null){
1688 logger
.warn("Name already has a combination author. Name: " + name
.getTitleCache() + ", Author: " + elAuthor
.getTextNormalize());
1690 String strAuthor
= elAuthor
.getValue().trim();
1691 if (strAuthor
.endsWith(",")){
1692 strAuthor
= strAuthor
.substring(0, strAuthor
.length() -1);
1694 if (strAuthor
.indexOf("(") > -1 || strAuthor
.indexOf(")") > -1){
1695 logger
.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor
);
1697 TeamOrPersonBase
[] team
= getTeam(strAuthor
);
1698 name
.setCombinationAuthorTeam(team
[0]);
1699 name
.setExCombinationAuthorTeam(team
[1]);
1707 private TeamOrPersonBase
[] getTeam(String strAuthor
) {
1708 TeamOrPersonBase
[] result
= new TeamOrPersonBase
[2];
1709 String
[] split
= strAuthor
.split(" ex ");
1710 String strBaseAuthor
= null;
1711 String strExAuthor
= null;
1713 if (split
.length
== 2){
1714 strBaseAuthor
= split
[1];
1715 strExAuthor
= split
[0];
1716 }else if (split
.length
== 1){
1717 strBaseAuthor
= split
[0];
1719 logger
.warn("Could not parse (ex) author: " + strAuthor
);
1721 result
[0] = getUuidTeam(strBaseAuthor
);
1722 if (result
[0] == null){
1723 result
[0] = parseSingleTeam(strBaseAuthor
);
1724 teamMap
.put(strBaseAuthor
, result
[0].getUuid());
1726 if (strExAuthor
!= null){
1727 result
[1] = getUuidTeam(strExAuthor
);
1728 if (result
[1] == null){
1729 result
[1] = Team
.NewInstance();
1730 result
[1].setTitleCache(strExAuthor
, true);
1731 teamMap
.put(strExAuthor
, result
[1].getUuid());
1739 protected TeamOrPersonBase
parseSingleTeam(String strBaseAuthor
) {
1740 TeamOrPersonBase result
;
1741 String
[] split
= strBaseAuthor
.split("&");
1742 if (split
.length
> 1){
1743 result
= Team
.NewInstance();
1744 for (String personString
: split
){
1745 Person person
= makePerson(personString
);
1746 ((Team
)result
).addTeamMember(person
);
1749 result
= makePerson(strBaseAuthor
.trim());
1756 * @param personString
1759 private Person
makePerson(String personString
) {
1760 personString
= personString
.trim();
1761 Person person
= Person
.NewTitledInstance(personString
);
1762 person
.setNomenclaturalTitle(personString
);
1769 * @param strBaseAuthor
1771 private TeamOrPersonBase
getUuidTeam(String strBaseAuthor
) {
1772 UUID uuidTeam
= teamMap
.get(strBaseAuthor
);
1773 return CdmBase
.deproxy(getAgentService().find(uuidTeam
), TeamOrPersonBase
.class);
1777 private void handleDescription(EfloraImportState state
, Element elDescription
, Taxon taxon
, Set
<String
> unhandledChildren
) {
1778 verifyNoAttribute(elDescription
);
1780 List
<Element
> elements
= elDescription
.getChildren();
1781 for (Element element
: elements
){
1782 if (element
.getName().equalsIgnoreCase("char")){
1783 handleChar(state
, element
, taxon
);
1785 logger
.warn("Unhandled description child: " + element
.getName());
1797 private void handleChar(EfloraImportState state
, Element element
, Taxon taxon
) {
1798 List
<Attribute
> attributes
= element
.getAttributes();
1799 for (Attribute attribute
: attributes
){
1800 if (! attribute
.getName().equalsIgnoreCase("class")){
1801 logger
.warn("Char has unhandled attribute " + attribute
.getName());
1803 String classValue
= attribute
.getValue();
1804 Feature feature
= getFeature(classValue
, state
);
1805 if (feature
== null){
1806 logger
.warn("Unhandled feature: " + classValue
);
1808 String value
= element
.getValue();
1809 addDescriptionElement(state
, taxon
, value
, feature
, null);
1815 List
<Element
> elements
= element
.getChildren();
1816 if (! elements
.isEmpty()){
1817 logger
.warn("Char has unhandled children");
1826 protected TaxonDescription
getDescription(Taxon taxon
) {
1827 for (TaxonDescription description
: taxon
.getDescriptions()){
1828 if (! description
.isImageGallery()){
1832 TaxonDescription newDescription
= TaxonDescription
.NewInstance(taxon
);
1833 return newDescription
;
1841 * @throws UndefinedTransformerMethodException
1843 private Feature
getFeature(String classValue
, EfloraImportState state
) {
1846 uuid
= state
.getTransformer().getFeatureUuid(classValue
);
1848 logger
.info("Uuid is null for " + classValue
);
1850 String featureText
= StringUtils
.capitalize(classValue
);
1851 Feature feature
= getFeature(state
, uuid
, featureText
, featureText
, classValue
);
1852 if (feature
== null){
1853 throw new NullPointerException(classValue
+ " not recognized as a feature");
1856 } catch (Exception e
) {
1857 logger
.warn("Could not create feature for " + classValue
+ ": " + e
.getMessage()) ;
1858 return Feature
.UNKNOWN();
1867 * @param unhandledTitleClassess
1869 private void handleTitle(EfloraImportState state
, Element element
, Taxon taxon
, Set
<String
> unhandledTitleClassess
) {
1871 List
<Attribute
> attributes
= element
.getAttributes();
1872 for (Attribute attribute
: attributes
){
1873 if (! attribute
.getName().equalsIgnoreCase("class") ){
1874 if (! attribute
.getName().equalsIgnoreCase("num")){
1875 logger
.warn("Title has unhandled attribute " + attribute
.getName());
1877 //TODO num attribute in taxon
1880 String classValue
= attribute
.getValue();
1884 rank
= Rank
.getRankByNameOrAbbreviation(classValue
);
1885 } catch (Exception e
) {
1887 rank
= Rank
.getRankByEnglishName(classValue
, NomenclaturalCode
.ICBN
, false);
1889 taxon
.getName().setRank(rank
);
1890 if (rank
.equals(Rank
.FAMILY()) || rank
.equals(Rank
.GENUS())){
1891 handleGenus(element
.getValue(), taxon
.getName());
1892 }else if (rank
.equals(Rank
.SUBGENUS())){
1893 handleSubGenus(element
.getValue(), taxon
.getName());
1894 }else if (rank
.equals(Rank
.SECTION_BOTANY())){
1895 handleSection(element
.getValue(), taxon
.getName());
1896 }else if (rank
.equals(Rank
.SPECIES())){
1897 handleSpecies(element
.getValue(), taxon
.getName());
1898 }else if (rank
.equals(Rank
.SUBSPECIES())){
1899 handleSubSpecies(element
.getValue(), taxon
.getName());
1900 }else if (rank
.equals(Rank
.VARIETY())){
1901 handleVariety(element
.getValue(), taxon
.getName());
1903 logger
.warn("Unhandled rank: " + rank
.getLabel());
1905 } catch (UnknownCdmTypeException e
) {
1906 logger
.warn("Unknown rank " + classValue
);
1907 unhandledTitleClassess
.add(classValue
);
1911 List
<Element
> elements
= element
.getChildren();
1912 if (! elements
.isEmpty()){
1913 logger
.warn("Title has unexpected children");
1915 UUID uuidTitle
= EfloraTransformer
.uuidTitle
;
1916 ExtensionType titleExtension
= this.getExtensionType(state
, uuidTitle
, "title", "title", "title");
1917 taxon
.addExtension(element
.getTextNormalize(), titleExtension
);
1924 * @param taxonNameBase
1926 private void handleSubGenus(String value
, TaxonNameBase taxonNameBase
) {
1927 String name
= value
.replace("Subgenus", "").trim();
1928 ((NonViralName
)taxonNameBase
).setInfraGenericEpithet(name
);
1933 * @param taxonNameBase
1935 private void handleSection(String value
, TaxonNameBase taxonNameBase
) {
1936 String name
= value
.replace("Section", "").trim();
1937 ((NonViralName
)taxonNameBase
).setInfraGenericEpithet(name
);
1942 * @param taxonNameBase
1944 private void handleSpecies(String value
, TaxonNameBase taxonNameBase
) {
1950 * @param taxonNameBase
1952 private void handleVariety(String value
, TaxonNameBase taxonNameBase
) {
1958 * @param taxonNameBase
1960 private void handleSubSpecies(String value
, TaxonNameBase taxonNameBase
) {
1965 private Pattern rexGenusAuthor
= Pattern
.compile("(\\[|\\().*(\\]|\\))");
1969 * @param taxonNameBase
1971 protected void handleGenus(String value
, TaxonNameBase taxonName
) {
1972 Matcher matcher
= rexGenusAuthor
.matcher(value
);
1973 if (matcher
.find()){
1974 String author
= matcher
.group();
1975 // String genus = value.replace(author, "");
1976 author
= author
.substring(1, author
.length() - 1);
1977 Team team
= Team
.NewInstance();
1978 team
.setTitleCache(author
, true);
1979 Credit credit
= Credit
.NewInstance(team
, null);
1980 taxonName
.addCredit(credit
);
1981 // NonViralName nvn = (NonViralName)taxonName;
1982 // nvn.setCombinationAuthorTeam(team);
1983 // nvn.setGenusOrUninomial(genus);
1985 logger
.info("No Author match for " + value
);
1994 private void handleTaxonRelation(EfloraImportState state
, Taxon taxon
, Taxon lastTaxon
) {
1996 TaxonomicTree tree
= getTree(state
);
1997 if (lastTaxon
== null){
1998 tree
.addChildTaxon(taxon
, null, null, null);
2001 Rank thisRank
= taxon
.getName().getRank();
2002 Rank lastRank
= lastTaxon
.getName().getRank();
2003 if (lastTaxon
.getTaxonNodes().size() > 0){
2004 TaxonNode lastNode
= lastTaxon
.getTaxonNodes().iterator().next();
2005 if (thisRank
.isLower(lastRank
) ){
2006 lastNode
.addChildTaxon(taxon
, null, null, null);
2007 fillMissingEpithetsForTaxa(lastTaxon
, taxon
);
2008 }else if (thisRank
.equals(lastRank
)){
2009 TaxonNode parent
= lastNode
.getParent();
2010 if (parent
!= null){
2011 parent
.addChildTaxon(taxon
, null, null, null);
2012 fillMissingEpithetsForTaxa(parent
.getTaxon(), taxon
);
2014 tree
.addChildTaxon(taxon
, null, null, null);
2016 }else if (thisRank
.isHigher(lastRank
)){
2017 handleTaxonRelation(state
, taxon
, lastNode
.getParent().getTaxon());
2018 // TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2019 // parentNode.addChildTaxon(taxon, null, null, null);
2022 logger
.warn("Last taxon has no node");
2032 private TaxonomicTree
getTree(EfloraImportState state
) {
2033 TaxonomicTree result
= state
.getTree(null);
2034 if (result
== null){
2035 UUID uuid
= state
.getConfig().getTaxonomicTreeUuid();
2037 logger
.warn("No classification uuid is defined");
2038 result
= getNewClassification(state
);
2040 result
= getTaxonTreeService().getTaxonomicTreeByUuid(uuid
);
2041 if (result
== null){
2042 result
= getNewClassification(state
);
2043 result
.setUuid(uuid
);
2046 state
.putTree(null, result
);
2052 private TaxonomicTree
getNewClassification(EfloraImportState state
) {
2053 TaxonomicTree result
;
2054 result
= TaxonomicTree
.NewInstance(state
.getConfig().getClassificationTitle());
2055 state
.putTree(null, result
);
2067 private TextData
addDescriptionElement(EfloraImportState state
, Taxon taxon
, String value
, Feature feature
, String references
) {
2068 TextData textData
= TextData
.NewInstance(feature
);
2069 Language textLanguage
= getDefaultLanguage(state
);
2070 textData
.putText(value
, textLanguage
);
2071 TaxonDescription description
= getDescription(taxon
);
2072 description
.addElement(textData
);
2073 if (references
!= null){
2074 makeOriginalSourceReferences(textData
, ";", references
);
2079 private Language
getDefaultLanguage(EfloraImportState state
) {
2080 UUID defaultLanguageUuid
= state
.getConfig().getDefaultLanguageUuid();
2081 if (defaultLanguageUuid
!= null){
2082 Language result
= state
.getDefaultLanguage();
2083 if (result
== null || ! result
.getUuid().equals(defaultLanguageUuid
)){
2084 result
= (Language
)getTermService().find(defaultLanguageUuid
);
2085 state
.setDefaultLanguage(result
);
2086 if (result
== null){
2087 logger
.warn("Default language for " + defaultLanguageUuid
+ " does not exist.");
2092 return Language
.DEFAULT();
2098 * @param elNomenclature
2100 private void verifyNoAttribute(Element element
) {
2101 List
<Attribute
> attributes
= element
.getAttributes();
2102 if (! attributes
.isEmpty()){
2103 logger
.warn(element
.getName() + " has unhandled attributes: " + attributes
.get(0).getValue() + "..." );
2108 * @param elNomenclature
2110 protected void verifyNoChildren(Element element
) {
2111 verifyNoChildren(element
, false);
2115 * @param elNomenclature
2117 private void verifyNoChildren(Element element
, boolean ignoreLineBreak
) {
2118 List
<Element
> children
= element
.getChildren();
2119 if (! children
.isEmpty()){
2120 if (ignoreLineBreak
== true){
2121 for (Element child
: children
){
2122 if (! child
.getName().equalsIgnoreCase("BR")){
2123 logger
.warn(element
.getName() + " has unhandled child: " + child
.getName());
2127 logger
.warn(element
.getName() + " has unhandled children");
2135 * Parses the nomenclatural status from the references titleCache. If a nomenclatural status
2136 * exists it is added to the name and the nom. status part of the references title cache is
2137 * removed. Requires protected title cache.
2139 * @param nonViralName
2141 protected void parseNomStatus(ReferenceBase ref
, NonViralName nonViralName
) {
2142 String titleToParse
= ref
.getTitleCache();
2144 String noStatusTitle
= parser
.parseNomStatus(titleToParse
, nonViralName
);
2145 if (! noStatusTitle
.equals(titleToParse
)){
2146 ref
.setTitleCache(noStatusTitle
, true);
2152 * Extracts the date published part and returns micro reference
2156 private String
parseReferenceYearAndDetail(ReferenceBase ref
){
2157 String detailResult
= null;
2158 String titleToParse
= ref
.getTitleCache();
2159 titleToParse
= removeStartingSymbols(titleToParse
, ref
);
2160 String reReference
= "^\\.{1,}";
2161 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2162 String oneMonth
= "(Feb.|Dec.|March|June|July)";
2163 String reYear
= oneMonth
+ "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2164 String secondYear
= "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2166 String reYearPeriod
= "\\(" + reYear
+ "(\\-" + secondYear
+ ")?\\)";
2167 String reDetail
= "\\.{1,10}$";
2169 //pattern for the whole string
2170 Pattern patReference
= Pattern
.compile(/*reReference +*/ reYearPeriod
/*+ reDetail */);
2171 Matcher matcher
= patReference
.matcher(titleToParse
);
2172 if (matcher
.find()){
2173 int start
= matcher
.start();
2174 int end
= matcher
.end();
2176 //title and other information precedes the year part
2177 String title
= titleToParse
.substring(0, start
).trim();
2178 //detail follows the year part
2179 String detail
= titleToParse
.substring(end
).trim();
2182 String strPeriod
= matcher
.group().trim();
2183 strPeriod
= strPeriod
.substring(1, strPeriod
.length()-1); //remove brackets
2184 Pattern patStartMonth
= Pattern
.compile("^" + oneMonth
);
2185 matcher
= patStartMonth
.matcher(strPeriod
);
2186 strPeriod
= strPeriod
.replace(" ", "");
2187 Integer startMonth
= null;
2188 if (matcher
.find()){
2189 end
= matcher
.end();
2190 strPeriod
= strPeriod
.substring(0, end
) + " " + strPeriod
.substring(end
);
2191 startMonth
= getMonth(strPeriod
.substring(0, end
));
2194 TimePeriod datePublished
= TimePeriod
.parseString(strPeriod
);
2195 if (startMonth
!= null){
2196 datePublished
.setStartMonth(startMonth
);
2198 ref
.setDatePublished(datePublished
);
2199 ref
.setTitle(title
);
2200 detailResult
= CdmUtils
.removeTrailingDot(detail
);
2201 if (detailResult
.endsWith(".") || detailResult
.endsWith(";") || detailResult
.endsWith(",") ){
2202 detailResult
= detailResult
.substring(0, detailResult
.length() -1);
2204 ref
.setProtectedTitleCache(false);
2206 logger
.warn("Could not parse reference: " + titleToParse
);
2208 return detailResult
;
2214 private Integer
getMonth(String month
) {
2215 if (month
.startsWith("Jan")){
2217 }else if (month
.startsWith("Feb")){
2219 }else if (month
.startsWith("Mar")){
2221 }else if (month
.startsWith("Apr")){
2223 }else if (month
.startsWith("May")){
2225 }else if (month
.startsWith("Jun")){
2227 }else if (month
.startsWith("Jul")){
2229 }else if (month
.startsWith("Aug")){
2231 }else if (month
.startsWith("Sep")){
2233 }else if (month
.startsWith("Oct")){
2235 }else if (month
.startsWith("Nov")){
2237 }else if (month
.startsWith("Dec")){
2240 logger
.warn("Month not yet supported: " + month
);
2247 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2249 protected boolean isIgnore(EfloraImportState state
){
2250 return ! state
.getConfig().isDoTaxa();