adapt app-import to v5.45
[cdmlib-apps.git] / cdm-eflora / src / main / java / eu / etaxonomy / cdm / io / eflora / EfloraTaxonImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.eflora;
10
11 import java.util.ArrayList;
12 import java.util.HashMap;
13 import java.util.HashSet;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.UUID;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20
21 import org.apache.commons.lang.CharUtils;
22 import org.apache.commons.lang.StringUtils;
23 import org.apache.logging.log4j.LogManager;
24 import org.apache.logging.log4j.Logger;
25 import org.jdom.Attribute;
26 import org.jdom.Element;
27 import org.springframework.stereotype.Component;
28 import org.springframework.transaction.TransactionStatus;
29
30 import eu.etaxonomy.cdm.common.CdmUtils;
31 import eu.etaxonomy.cdm.common.ResultWrapper;
32 import eu.etaxonomy.cdm.common.XmlHelp;
33 import eu.etaxonomy.cdm.io.common.ICdmIO;
34 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
35 import eu.etaxonomy.cdm.io.eflora.UnmatchedLeads.UnmatchedLeadsKey;
36 import eu.etaxonomy.cdm.model.agent.Person;
37 import eu.etaxonomy.cdm.model.agent.Team;
38 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
39 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
40 import eu.etaxonomy.cdm.model.common.Annotation;
41 import eu.etaxonomy.cdm.model.common.AnnotationType;
42 import eu.etaxonomy.cdm.model.common.CdmBase;
43 import eu.etaxonomy.cdm.model.common.Credit;
44 import eu.etaxonomy.cdm.model.common.ExtensionType;
45 import eu.etaxonomy.cdm.model.common.Language;
46 import eu.etaxonomy.cdm.model.common.Marker;
47 import eu.etaxonomy.cdm.model.common.MarkerType;
48 import eu.etaxonomy.cdm.model.common.TimePeriod;
49 import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
50 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
51 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
52 import eu.etaxonomy.cdm.model.description.Feature;
53 import eu.etaxonomy.cdm.model.description.KeyStatement;
54 import eu.etaxonomy.cdm.model.description.PolytomousKey;
55 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
56 import eu.etaxonomy.cdm.model.description.TaxonDescription;
57 import eu.etaxonomy.cdm.model.description.TextData;
58 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
59 import eu.etaxonomy.cdm.model.name.IBotanicalName;
60 import eu.etaxonomy.cdm.model.name.INonViralName;
61 import eu.etaxonomy.cdm.model.name.NameRelationshipType;
62 import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
63 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
64 import eu.etaxonomy.cdm.model.name.Rank;
65 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
66 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
67 import eu.etaxonomy.cdm.model.name.TaxonName;
68 import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
69 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
70 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
71 import eu.etaxonomy.cdm.model.reference.IBook;
72 import eu.etaxonomy.cdm.model.reference.IJournal;
73 import eu.etaxonomy.cdm.model.reference.ISourceable;
74 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
75 import eu.etaxonomy.cdm.model.reference.Reference;
76 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
77 import eu.etaxonomy.cdm.model.reference.ReferenceType;
78 import eu.etaxonomy.cdm.model.taxon.Classification;
79 import eu.etaxonomy.cdm.model.taxon.SynonymType;
80 import eu.etaxonomy.cdm.model.taxon.Taxon;
81 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
82 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
83 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
84 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
85 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
86
87 /**
88 * @author a.mueller
89 */
90 @Component
91 public class EfloraTaxonImport extends EfloraImportBase implements ICdmIO<EfloraImportState> {
92
93 private static final long serialVersionUID = -333673708310331342L;
94 private static Logger logger = LogManager.getLogger();
95
96 private static int modCount = 30000;
97 private final NonViralNameParserImpl parser = new NonViralNameParserImpl();
98
99 public EfloraTaxonImport(){
100 super();
101 }
102
103 @Override
104 public boolean doCheck(EfloraImportState state){
105 boolean result = true;
106 return result;
107 }
108
109 //TODO make part of state, but state is renewed when invoking the import a second time
110 private UnmatchedLeads unmatchedLeads;
111
112 @Override
113 public void doInvoke(EfloraImportState state){
114 logger.info("start make Taxa ...");
115
116 //FIXME reset state
117 state.putTree(null, null);
118 // UnmatchedLeads unmatchedLeads = state.getOpenKeys();
119 if (unmatchedLeads == null){
120 unmatchedLeads = UnmatchedLeads.NewInstance();
121 }
122 state.setUnmatchedLeads(unmatchedLeads);
123
124 TransactionStatus tx = startTransaction();
125 unmatchedLeads.saveToSession(getPolytomousKeyNodeService());
126
127
128 //TODO generally do not store the reference object in the config
129 Reference sourceReference = state.getConfig().getSourceReference();
130 getReferenceService().saveOrUpdate(sourceReference);
131
132 Set<TaxonBase> taxaToSave = new HashSet<>();
133 ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
134
135 Element elbody= getBodyElement(state.getConfig());
136 List<Element> elTaxonList = elbody.getChildren();
137
138 int i = 0;
139
140 Set<String> unhandledTitleClassess = new HashSet<String>();
141 Set<String> unhandledNomeclatureChildren = new HashSet<String>();
142 Set<String> unhandledDescriptionChildren = new HashSet<String>();
143
144 Taxon lastTaxon = getLastTaxon(state);
145
146 //for each taxon
147 for (Element elTaxon : elTaxonList){
148 try {
149 if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
150 if (! elTaxon.getName().equalsIgnoreCase("taxon")){
151 logger.warn("body has element other than 'taxon'");
152 }
153
154 IBotanicalName botanicalName = TaxonNameFactory.NewBotanicalInstance(Rank.SPECIES());
155 Taxon taxon = Taxon.NewInstance(botanicalName, state.getConfig().getSourceReference());
156
157 handleTaxonAttributes(elTaxon, taxon, state);
158
159
160 List<Element> children = elTaxon.getChildren();
161 handleTaxonElement(state, unhandledTitleClassess, unhandledNomeclatureChildren, unhandledDescriptionChildren, taxon, children);
162 handleTaxonRelation(state, taxon, lastTaxon);
163 lastTaxon = taxon;
164 taxaToSave.add(taxon);
165 state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
166
167 } catch (Exception e) {
168 logger.warn("Exception occurred in Sapindacea taxon import: " + e);
169 e.printStackTrace();
170 }
171
172 }
173
174 System.out.println(state.getUnmatchedLeads().toString());
175 logger.warn("There are taxa with attributes 'excluded' and 'dubious'");
176
177 logger.info("Children for nomenclature are: " + unhandledNomeclatureChildren);
178 logger.info("Children for description are: " + unhandledDescriptionChildren);
179 logger.info("Children for homotypes are: " + unhandledHomotypeChildren);
180 logger.info("Children for nom are: " + unhandledNomChildren);
181
182
183 //invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
184 logger.info(i + " taxa handled. Saving ...");
185 getTaxonService().saveOrUpdate(taxaToSave);
186 getTermNodeService().saveOrUpdate(state.getFeatureNodesToSave());
187 state.getFeatureNodesToSave().clear();
188 commitTransaction(tx);
189
190 logger.info("end makeTaxa ...");
191 logger.info("start makeKey ...");
192 // invokeDoKey(state);
193 logger.info("end makeKey ...");
194
195 if (! success.getValue()){
196 state.setUnsuccessfull();
197 }
198 return ;
199 }
200
201
202 private void handleTaxonAttributes(Element elTaxon, Taxon taxon, EfloraImportState state) {
203 List<Attribute> attrList = elTaxon.getAttributes();
204 for (Attribute attr : attrList){
205 String attrName = attr.getName();
206 String attrValue = attr.getValue();
207 if ("class".equals(attrName)){
208 if (attrValue.equalsIgnoreCase("dubious") || attrValue.equalsIgnoreCase("DUBIOUS GENUS") || attrValue.equalsIgnoreCase("DOUBTFUL SPECIES") ){
209 taxon.setDoubtful(true);
210 }else{
211 MarkerType markerType = getMarkerType(state, attrValue);
212 if (markerType == null){
213 logger.warn("Class attribute value for taxon not yet supported: " + attrValue);
214 }else{
215 taxon.addMarker(Marker.NewInstance(markerType, true));
216 }
217 }
218 }else if ("num".equals(attrName)){
219 logger.warn("num not yet supported");
220 }else{
221 logger.warn("Attribute " + attrName + " not yet supported for element taxon");
222 }
223 }
224
225 }
226
227
228 private Taxon getLastTaxon(EfloraImportState state) {
229 if (state.getConfig().getLastTaxonUuid() == null){
230 return null;
231 }else{
232 return (Taxon)getTaxonService().find(state.getConfig().getLastTaxonUuid());
233 }
234 }
235
236
237 // private void invokeDoKey(SapindaceaeImportState state) {
238 // TransactionStatus tx = startTransaction();
239 //
240 // Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
241 // ITaxonService taxonService = getTaxonService();
242 // ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
243 //
244 // Element elbody= getBodyElement(state.getConfig());
245 // List<Element> elTaxonList = elbody.getChildren();
246 //
247 // int i = 0;
248 //
249 // //for each taxon
250 // for (Element elTaxon : elTaxonList){
251 // if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
252 // if (! elTaxon.getName().equalsIgnoreCase("taxon")){
253 // continue;
254 // }
255 //
256 // List<Element> children = elTaxon.getChildren("key");
257 // for (Element element : children){
258 // handleKeys(state, element, null);
259 // }
260 // nodesToSave.add(taxon);
261 //
262 // }
263 //
264 // }
265
266
267 // body/taxon/*
268 private void handleTaxonElement(EfloraImportState state, Set<String> unhandledTitleClassess, Set<String> unhandledNomeclatureChildren, Set<String> unhandledDescriptionChildren, Taxon taxon, List<Element> children) {
269 AnnotatableEntity lastEntity = null;
270 for (Element element : children){
271 String elName = element.getName();
272
273 if (elName.equalsIgnoreCase("title")){
274 handleTitle(state, element, taxon, unhandledTitleClassess);
275 lastEntity = null;
276 }else if(elName.equalsIgnoreCase("nomenclature")){
277 handleNomenclature(state, element, taxon, unhandledNomeclatureChildren);
278 lastEntity = null;
279 }else if(elName.equalsIgnoreCase("description")){
280 handleDescription(state, element, taxon, unhandledDescriptionChildren);
281 lastEntity = null;
282 }else if(elName.equalsIgnoreCase("habitatecology")){
283 lastEntity = handleEcology(state, element, taxon);
284 }else if(elName.equalsIgnoreCase("distribution")){
285 lastEntity = handleDistribution(state, element, taxon);
286 }else if(elName.equalsIgnoreCase("uses")){
287 lastEntity = handleUses(state, element, taxon);
288 }else if(elName.equalsIgnoreCase("notes")){
289 lastEntity = handleTaxonNotes(state, element, taxon);
290 }else if(elName.equalsIgnoreCase("chromosomes")){
291 lastEntity = handleChromosomes(state, element, taxon);
292 }else if(elName.equalsIgnoreCase("vernacularnames")){
293 handleVernaculars(state, element, taxon);
294 }else if(elName.equalsIgnoreCase("key")){
295 lastEntity = handleKeys(state, element, taxon);
296 }else if(elName.equalsIgnoreCase("references")){
297 handleReferences(state, element, taxon, lastEntity);
298 lastEntity = null;
299 }else if(elName.equalsIgnoreCase("taxon")){
300 logger.warn("A taxon should not be part of a taxon");
301 }else if(elName.equalsIgnoreCase("homotypes")){
302 logger.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
303 }else{
304 logger.warn("Unexpected child for taxon: " + elName);
305 }
306 }
307 }
308
309
310 private void handleVernaculars(EfloraImportState state, Element elVernacular, Taxon taxon) {
311 verifyNoAttribute(elVernacular);
312 verifyNoChildren(elVernacular, false);
313 String value = elVernacular.getTextNormalize();
314 Feature feature = Feature.COMMON_NAME();
315 value = replaceStart(value, "Noms vernaculaires");
316 String[] dialects = value.split(";");
317 for (String singleDialect : dialects){
318 handleSingleDialect(taxon, singleDialect, feature, state);
319 }
320 return;
321 }
322
323
324 private void handleSingleDialect(Taxon taxon, String singleDialect, Feature feature, EfloraImportState state) {
325 singleDialect = singleDialect.trim();
326 TaxonDescription description = getDescription(taxon);
327 String reDialect = "\\(dial\\.\\s.*\\)";
328 // String reDialect = "\\(.*\\)";
329 Pattern patDialect = Pattern.compile(reDialect);
330 Matcher matcher = patDialect.matcher(singleDialect);
331 if (matcher.find()){
332 String dialect = singleDialect.substring(matcher.start(), matcher.end());
333 dialect = dialect.replace("(dial. ", "").replace(")", "");
334
335 Language language = null;
336 try {
337 language = this.getLanguage(state, state.getTransformer().getLanguageUuid(dialect), dialect, dialect, dialect);
338 } catch (UndefinedTransformerMethodException e) {
339 logger.error(e.getMessage());
340 }
341
342 String commonNames = singleDialect.substring(0, matcher.start());
343 String[] splitNames = commonNames.split(",");
344 for (String commonNameString : splitNames){
345 commonNameString = commonNameString.trim();
346 CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameString, language);
347 description.addElement(commonName);
348 }
349 }else{
350 logger.warn("No dialect match: " + singleDialect);
351 }
352 }
353
354
355 private void handleReferences(EfloraImportState state, Element elReferences, Taxon taxon, AnnotatableEntity lastEntity) {
356 verifyNoAttribute(elReferences);
357 verifyNoChildren(elReferences, true);
358 String refString = elReferences.getTextNormalize();
359 if (lastEntity == null){
360 logger.warn("No last entity defined: " + refString);
361 return;
362 }
363
364 Annotation annotation = Annotation.NewInstance(refString, AnnotationType.EDITORIAL(), Language.DEFAULT());
365 lastEntity.addAnnotation(annotation);
366 }
367
368
369 private PolytomousKey handleKeys(EfloraImportState state, Element elKey, Taxon taxon) {
370 UnmatchedLeads openKeys = state.getUnmatchedLeads();
371
372 //title
373 String title = makeKeyTitle(elKey);
374
375 //key
376 PolytomousKey key = PolytomousKey.NewTitledInstance(title);
377
378 //TODO add covered taxa etc.
379 verifyNoAttribute(elKey);
380
381 //notes
382 makeKeyNotes(elKey, key);
383
384 //keycouplets
385 List<Element> keychoices = new ArrayList<Element>();
386 keychoices.addAll(elKey.getChildren("keycouplet"));
387 keychoices.addAll(elKey.getChildren("keychoice"));
388
389
390 for (Element elKeychoice : keychoices){
391 handleKeyChoices(state, openKeys, key, elKeychoice, taxon);
392 elKey.removeContent(elKeychoice);
393 }
394
395 //
396 verifyNoChildren(elKey);
397 logger.info("Unmatched leads after key handling:" + openKeys.toString());
398
399
400 if (state.getConfig().isDoPrintKeys()){
401 key.print(System.err);
402 }
403 getPolytomousKeyService().save(key);
404 return key;
405 }
406
407
408 /**
409 * @param state
410 * @param elKey
411 * @param openKeys
412 * @param key
413 * @param elKeychoice
414 * @param taxon
415 */
416 private void handleKeyChoices(EfloraImportState state, UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, Taxon taxon) {
417
418 //char Attribute
419 //TODO it's still unclear if char is a feature and needs to be a new attribute
420 //or if it is handled as question. Therefore both cases are handled but feature
421 //is finally not yet set
422 KeyStatement question = handleKeychoiceChar(state, elKeychoice);
423 Feature feature = handleKeychoiceCharAsFeature(state, elKeychoice);
424
425 //lead
426 List<PolytomousKeyNode> childNodes = handleKeychoiceLeads(state, key, elKeychoice, taxon, question, feature);
427
428 //num -> match with unmatched leads
429 handleKeychoiceNum(openKeys, key, elKeychoice, childNodes);
430
431 //others
432 verifyNoAttribute(elKeychoice);
433 }
434
435
436 /**
437 * @param openKeys
438 * @param key
439 * @param elKeychoice
440 * @param childNodes
441 */
442 private void handleKeychoiceNum(UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, List<PolytomousKeyNode> childNodes) {
443 Attribute numAttr = elKeychoice.getAttribute("num");
444 String num = CdmUtils.removeTrailingDots(numAttr == null? "":numAttr.getValue());
445 UnmatchedLeadsKey okk = UnmatchedLeadsKey.NewInstance(key, num);
446 Set<PolytomousKeyNode> matchingNodes = openKeys.getNodes(okk);
447 for (PolytomousKeyNode matchingNode : matchingNodes){
448 for (PolytomousKeyNode childNode : childNodes){
449 matchingNode.addChild(childNode);
450 }
451 openKeys.removeNode(okk, matchingNode);
452 }
453 if (matchingNodes.isEmpty()){
454 for (PolytomousKeyNode childNode : childNodes){
455 key.getRoot().addChild(childNode);
456 }
457 }
458
459 elKeychoice.removeAttribute("num");
460 }
461
462
463 /**
464 * @param state
465 * @param key
466 * @param elKeychoice
467 * @param taxon
468 * @param feature
469 * @return
470 */
471 private List<PolytomousKeyNode> handleKeychoiceLeads( EfloraImportState state, PolytomousKey key, Element elKeychoice, Taxon taxon, KeyStatement question, Feature feature) {
472 List<PolytomousKeyNode> childNodes = new ArrayList<PolytomousKeyNode>();
473 List<Element> leads = elKeychoice.getChildren("lead");
474 for(Element elLead : leads){
475 PolytomousKeyNode childNode = handleLead(state, key, elLead, taxon, question, feature);
476 childNodes.add(childNode);
477 }
478 return childNodes;
479 }
480
481
482 /**
483 * @param state
484 * @param elKeychoice
485 * @return
486 */
487 private KeyStatement handleKeychoiceChar(EfloraImportState state, Element elKeychoice) {
488 KeyStatement statement = null;
489 Attribute charAttr = elKeychoice.getAttribute("char");
490 if (charAttr != null){
491 String charStr = charAttr.getValue();
492 if (StringUtils.isNotBlank(charStr)){
493 statement = KeyStatement.NewInstance(charStr);
494 }
495 elKeychoice.removeAttribute("char");
496 }
497 return statement;
498 }
499
500 /**
501 * @param state
502 * @param elKeychoice
503 * @return
504 */
505 private Feature handleKeychoiceCharAsFeature(EfloraImportState state, Element elKeychoice) {
506 Feature feature = null;
507 Attribute charAttr = elKeychoice.getAttribute("char");
508 if (charAttr != null){
509 String charStr = charAttr.getValue();
510 feature = getFeature(charStr, state);
511 elKeychoice.removeAttribute("char");
512 }
513 return feature;
514 }
515
516
517 private PolytomousKeyNode handleLead(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, KeyStatement question, Feature feature) {
518 PolytomousKeyNode node = PolytomousKeyNode.NewInstance();
519 //TODO the char attribute in the keychoice is more a feature than a question
520 //needs to be discussed on model side
521 node.setQuestion(question);
522 // node.setFeature(feature);
523
524 //text
525 String text = handleLeadText(elLead, node);
526
527 //num
528 handleLeadNum(elLead, text);
529
530 //goto
531 handleLeadGoto(state, key, elLead, taxon, node);
532
533 //others
534 verifyNoAttribute(elLead);
535
536 return node;
537 }
538
539
540 /**
541 * @param elLead
542 * @param node
543 * @return
544 */
545 private String handleLeadText(Element elLead, PolytomousKeyNode node) {
546 String text = elLead.getAttributeValue("text").trim();
547 if (StringUtils.isBlank(text)){
548 logger.warn("Empty text in lead");
549 }
550 elLead.removeAttribute("text");
551 KeyStatement statement = KeyStatement.NewInstance(text);
552 node.setStatement(statement);
553 return text;
554 }
555
556
557 /**
558 * @param state
559 * @param key
560 * @param elLead
561 * @param taxon
562 * @param node
563 */
564 private void handleLeadGoto(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, PolytomousKeyNode node) {
565 Attribute gotoAttr = elLead.getAttribute("goto");
566 if (gotoAttr != null){
567 String strGoto = gotoAttr.getValue().trim();
568 //create key
569 UnmatchedLeadsKey gotoKey = null;
570 if (isInternalNode(strGoto)){
571 gotoKey = UnmatchedLeadsKey.NewInstance(key, strGoto);
572 }else{
573 String taxonKey = makeTaxonKey(strGoto, taxon);
574 gotoKey = UnmatchedLeadsKey.NewInstance(taxonKey);
575 }
576 //
577 UnmatchedLeads openKeys = state.getUnmatchedLeads();
578 if (gotoKey.isInnerLead()){
579 Set<PolytomousKeyNode> existingNodes = openKeys.getNodes(gotoKey);
580 for (PolytomousKeyNode existingNode : existingNodes){
581 node.addChild(existingNode);
582 }
583 }
584 openKeys.addKey(gotoKey, node);
585 //remove attribute (need for consistency check)
586 elLead.removeAttribute("goto");
587 }else{
588 logger.warn("lead has no goto attribute");
589 }
590 }
591
592
593 /**
594 * @param elLead
595 * @param text
596 */
597 private void handleLeadNum(Element elLead, String text) {
598 Attribute numAttr = elLead.getAttribute("num");
599 if (numAttr != null){
600 //TODO num
601 String num = numAttr.getValue();
602 elLead.removeAttribute("num");
603 }else{
604 logger.info("Keychoice has no num attribute: " + text);
605 }
606 }
607
608
609 private String makeTaxonKey(String strGoto, Taxon taxon) {
610 String result = "";
611 if (strGoto == null){
612 return "";
613 }
614 String strGenusName = taxon.getName().getGenusOrUninomial();
615 strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", ""); //replace all brackets
616 strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
617
618 strGoto = strGoto.trim();
619 String[] split = strGoto.split("\\s");
620 for (int i = 0; i<split.length; i++){
621 String single = split[i];
622 if (isGenusAbbrev(single, strGenusName)){
623 split[i] = strGenusName;
624 }
625 // if (isInfraSpecificMarker(single)){
626 // String strSpeciesName = taxon.getName().getSpecificEpithet();
627 // split[i] = strGenusName + " " + strSpeciesName + " ";
628 // }
629 result = (result + " " + split[i]).trim();
630 }
631 return result;
632 }
633
634
635 private boolean isInfraSpecificMarker(String single) {
636 try {
637 if (Rank.getRankByIdInVoc(single).isInfraSpecific()){
638 return true;
639 }
640 } catch (UnknownCdmTypeException e) {
641 return false;
642 }
643 return false;
644 }
645
646
647 private boolean isGenusAbbrev(String single, String strGenusName) {
648 if (! single.matches("[A-Z]\\.?")) {
649 return false;
650 }else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
651 return false;
652 }else{
653 return single.charAt(0) == strGenusName.charAt(0);
654 }
655 }
656
657
658 private boolean isInternalNode(String strGoto) {
659 return CdmUtils.isNumeric(strGoto);
660 }
661
662
663 private void makeKeyNotes(Element keyElement, PolytomousKey key) {
664 Element elNotes = keyElement.getChild("notes");
665 if (elNotes != null){
666 keyElement.removeContent(elNotes);
667 String notes = elNotes.getTextNormalize();
668 if (StringUtils.isNotBlank(notes)){
669 key.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
670 }
671 }
672 }
673
674
675 private String makeKeyTitle(Element keyElement) {
676 String title = "- no title - ";
677 Attribute titleAttr = keyElement.getAttribute("title");
678 keyElement.removeAttribute(titleAttr);
679 if (titleAttr == null){
680 Element elTitle = keyElement.getChild("keytitle");
681 keyElement.removeContent(elTitle);
682 if (elTitle != null){
683 title = elTitle.getTextNormalize();
684 }
685 }else{
686 title = titleAttr.getValue();
687 }
688 return title;
689 }
690
691
692 /**
693 * @param state
694 * @param element
695 * @param taxon
696 */
697 private TextData handleChromosomes(EfloraImportState state, Element element, Taxon taxon) {
698 Feature chromosomeFeature = getFeature("chromosomes", state);
699 verifyNoAttribute(element);
700 verifyNoChildren(element);
701 String value = element.getTextNormalize();
702 value = replaceStart(value, "Chromosomes");
703 String chromosomesPart = getChromosomesPart(value);
704 String references = value.replace(chromosomesPart, "").trim();
705 chromosomesPart = chromosomesPart.replace(":", "").trim();
706 return addDescriptionElement(state, taxon, chromosomesPart, chromosomeFeature, references);
707 }
708
709
710 /**
711 * @param ref
712 * @param string
713 * @return
714 */
715 private void makeOriginalSourceReferences(ISourceable sourcable, String splitter, String refAll) {
716 String[] splits = refAll.split(splitter);
717 for (String strRef: splits){
718 Reference ref = ReferenceFactory.newGeneric();
719 ref.setTitleCache(strRef, true);
720 String refDetail = parseReferenceYearAndDetail(ref);
721 sourcable.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, ref, refDetail);
722 }
723
724
725 //TODO use regex instead
726 /* String detailResult = null;
727 String titleToParse = ref.getTitleCache();
728 String reReference = "^\\.{1,}";
729 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
730 String reYear = "\\([1-2]{1}[0-9]{3}\\)";
731 String reYearPeriod = reYear + "(-" + reYear + ")+";
732 String reDetail = "\\.{1,10}$";
733 */
734 }
735
736
737 /**
738 * @param value
739 * @return
740 */
741 private String getChromosomesPart(String str) {
742 Pattern pattern = Pattern.compile("2n\\s*=\\s*\\d{1,2}:");
743 Matcher matcher = pattern.matcher(str);
744 if (matcher.find()){
745 return matcher.group(0);
746 }else{
747 logger.warn("Chromosomes could not be parsed: " + str);
748 }
749 return str;
750 }
751
752
753 /**
754 * @param state
755 * @param element
756 * @param taxon
757 */
758 private TextData handleTaxonNotes(EfloraImportState state, Element element, Taxon taxon) {
759 TextData result = null;
760 verifyNoChildren(element, true);
761 //verifyNoAttribute(element);
762 List<Attribute> attributes = element.getAttributes();
763 for (Attribute attribute : attributes){
764 if (! attribute.getName().equalsIgnoreCase("class")){
765 logger.warn("Char has unhandled attribute " + attribute.getName());
766 }else{
767 String classValue = attribute.getValue();
768 result = handleDescriptiveElement(state, element, taxon, classValue);
769 }
770 }
771 //if no class attribute exists, handle as note
772 if (attributes.isEmpty()){
773 result = handleDescriptiveElement(state, element, taxon, "Note");
774 }
775
776 //Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
777 //taxon.addAnnotation(annotation);
778 return result; //annotation;
779 }
780
781
782 /**
783 * @param state
784 * @param element
785 * @param taxon
786 * @param result
787 * @param attribute
788 * @return
789 */
790 private TextData handleDescriptiveElement(EfloraImportState state, Element element, Taxon taxon, String classValue) {
791 TextData result = null;
792 Feature feature = getFeature(classValue, state);
793 if (feature == null){
794 logger.warn("Unhandled feature: " + classValue);
795 }else{
796 String value = element.getValue();
797 value = replaceStart(value, "Notes");
798 value = replaceStart(value, "Note");
799 result = addDescriptionElement(state, taxon, value, feature, null);
800 }
801 return result;
802 }
803
804
805 private void removeBr(Element element) {
806 element.removeChildren("Br");
807 element.removeChildren("br");
808 element.removeChildren("BR");
809 }
810
811
812 /**
813 * @param state
814 * @param element
815 * @param taxon
816 */
817 private TextData handleUses(EfloraImportState state, Element element, Taxon taxon) {
818 verifyNoAttribute(element);
819 verifyNoChildren(element, true);
820 String value = element.getTextNormalize();
821 value = replaceStart(value, "Uses");
822 Feature feature = Feature.USES();
823 return addDescriptionElement(state, taxon, value, feature, null);
824
825 }
826
827
828 /**
829 * @param state
830 * @param element
831 * @param taxon
832 * @param unhandledDescriptionChildren
833 */
834 private DescriptionElementBase handleDistribution(EfloraImportState state, Element element, Taxon taxon) {
835 verifyNoAttribute(element);
836 verifyNoChildren(element, true);
837 String value = element.getTextNormalize();
838 value = replaceStart(value, "Distribution");
839 Feature feature = Feature.DISTRIBUTION();
840 //distribution parsing almost impossible as there is lots of freetext in the distribution tag
841 return addDescriptionElement(state, taxon, value, feature, null);
842 }
843
844
845 /**
846 * @param state
847 * @param element
848 * @param taxon
849 * @param unhandledDescriptionChildren
850 */
851 private TextData handleEcology(EfloraImportState state, Element elEcology, Taxon taxon) {
852 verifyNoAttribute(elEcology);
853 verifyNoChildren(elEcology, true);
854 String value = elEcology.getTextNormalize();
855 Feature feature = Feature.ECOLOGY();
856 if (value.startsWith("Habitat & Ecology")){
857 feature = getFeature("Habitat & Ecology", state);
858 value = replaceStart(value, "Habitat & Ecology");
859 }else if (value.startsWith("Habitat")){
860 value = replaceStart(value, "Habitat");
861 feature = getFeature("Habitat", state);
862 }
863 return addDescriptionElement(state, taxon, value, feature, null);
864 }
865
866
867
868 /**
869 * @param value
870 * @param replacementString
871 */
872 private String replaceStart(String value, String replacementString) {
873 if (value.startsWith(replacementString) ){
874 value = value.substring(replacementString.length()).trim();
875 }
876 while (value.startsWith("-") || value.startsWith("–") ){
877 value = value.substring("-".length()).trim();
878 }
879 return value;
880 }
881
882
883 /**
884 * @param value
885 * @param replacementString
886 */
887 protected String removeTrailing(String value, String replacementString) {
888 if (value == null){
889 return null;
890 }
891 if (value.endsWith(replacementString) ){
892 value = value.substring(0, value.length() - replacementString.length()).trim();
893 }
894 return value;
895 }
896
897 /**
898 * @param state
899 * @param element
900 * @param taxon
901 * @param unhandledNomeclatureChildren
902 */
903 private void handleNomenclature(EfloraImportState state, Element elNomenclature, Taxon taxon, Set<String> unhandledChildren) {
904 verifyNoAttribute(elNomenclature);
905
906 List<Element> elements = elNomenclature.getChildren();
907 for (Element element : elements){
908 if (element.getName().equals("homotypes")){
909 handleHomotypes(state, element, taxon);
910 }else if (element.getName().equals("notes")){
911 handleNomenclatureNotes(state, element, taxon);
912 }else{
913 unhandledChildren.add(element.getName());
914 }
915 }
916
917 }
918
919
920
921 private void handleNomenclatureNotes(EfloraImportState state, Element elNotes, Taxon taxon) {
922 verifyNoAttribute(elNotes);
923 verifyNoChildren(elNotes);
924 String notesText = elNotes.getTextNormalize();
925 Annotation annotation = Annotation.NewInstance(notesText, AnnotationType.EDITORIAL(), Language.DEFAULT());
926 taxon.addAnnotation(annotation);
927 }
928
929
930
931 private static Set<String> unhandledHomotypeChildren = new HashSet<String>();
932 /**
933 * @param state
934 * @param element
935 * @param taxon
936 */
937 private void handleHomotypes(EfloraImportState state, Element elHomotypes, Taxon taxon) {
938 verifyNoAttribute(elHomotypes);
939
940 List<Element> elements = elHomotypes.getChildren();
941 HomotypicalGroup homotypicalGroup = null;
942 for (Element element : elements){
943 if (element.getName().equals("nom")){
944 homotypicalGroup = handleNom(state, element, taxon, homotypicalGroup);
945 }else{
946 unhandledHomotypeChildren.add(element.getName());
947 }
948 }
949
950 }
951
952 private static Set<String> unhandledNomChildren = new HashSet<String>();
953
954 /**
955 * @param state
956 * @param element
957 * @param taxon
958 */
959 private HomotypicalGroup handleNom(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
960 List<Attribute> attributes = elNom.getAttributes();
961
962 boolean taxonBaseClassType = false;
963 for (Attribute attribute : attributes){
964 if (! attribute.getName().equalsIgnoreCase("class")){
965 logger.warn("Nom has unhandled attribute " + attribute.getName());
966 }else{
967 String classValue = attribute.getValue();
968 if (classValue.equalsIgnoreCase("acceptedname")){
969 homotypicalGroup = handleNomTaxon(state, elNom, taxon,homotypicalGroup, false);
970 taxonBaseClassType = true;
971 }else if (classValue.equalsIgnoreCase("synonym")){
972 homotypicalGroup = handleNomTaxon(state, elNom, taxon, homotypicalGroup, true);
973 taxonBaseClassType = true;
974 }else if (classValue.equalsIgnoreCase("typeref")){
975 handleTypeRef(state, elNom, taxon, homotypicalGroup);
976 }else{
977 logger.warn("Unhandled class value for nom: " + classValue);
978 }
979
980 }
981 }
982
983 List<Element> elements = elNom.getChildren();
984 for (Element element : elements){
985 if (element.getName().equals("name") || element.getName().equals("homonym") ){
986 if (taxonBaseClassType == false){
987 logger.warn("Name or homonym tag not allowed in non taxon nom tag");
988 }
989 }else{
990 unhandledNomChildren.add(element.getName());
991 }
992 }
993
994 return homotypicalGroup;
995
996 }
997
998 /**
999 * @param state
1000 * @param elNom
1001 * @param taxon
1002 * @param homotypicalGroup
1003 */
1004 protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
1005 verifyNoChildren(elNom);
1006 String typeRef = elNom.getTextNormalize();
1007 typeRef = removeStartingTypeRefMinus(typeRef);
1008
1009 String[] split = typeRef.split(":");
1010 if (split.length < 2){
1011 logger.warn("typeRef has no ':' : " + typeRef);
1012 }else if (split.length > 2){
1013 logger.warn("typeRef has more than 1 ':' : " + typeRef);
1014 }else{
1015 StringBuffer typeType = new StringBuffer(split[0]);
1016 String typeText = split[1].trim();
1017 TypeDesignationBase typeDesignation = getTypeDesignationAndReference(typeType);
1018
1019 //Name Type Desitnations
1020 if (typeDesignation instanceof NameTypeDesignation){
1021 makeNameTypeDesignations(typeType, typeText, typeDesignation);
1022 }
1023 //SpecimenTypeDesignations
1024 else if (typeDesignation instanceof SpecimenTypeDesignation){
1025 makeSpecimenTypeDesignation(typeType, typeText, typeDesignation);
1026 }else{
1027 logger.error("Unhandled type designation class" + typeDesignation.getClass().getName());
1028 }
1029 for (TaxonName name : homotypicalGroup.getTypifiedNames()){
1030 name.addTypeDesignation(typeDesignation, true);
1031 }
1032 }
1033 }
1034
1035
1036 /**
1037 * @param typeRef
1038 * @return
1039 */
1040 protected String removeStartingTypeRefMinus(String typeRef) {
1041 typeRef = replaceStart(typeRef, "-");
1042 typeRef = replaceStart(typeRef, "—");
1043 typeRef = replaceStart(typeRef, "\u002d");
1044 typeRef = replaceStart(typeRef, "\u2013");
1045 typeRef = replaceStart(typeRef, "--");
1046 return typeRef;
1047 }
1048
1049 /**
1050 * @param typeType
1051 * @param typeText
1052 * @param typeDesignation
1053 */
1054 private void makeNameTypeDesignations(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1055 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1056 //do nothing
1057 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1058 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1059 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype")){
1060 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1061 }else{
1062 logger.warn("Unhandled type string: " + typeType + "(" + CharUtils.unicodeEscaped(typeType.charAt(0)) + ")");
1063 }
1064 //clean
1065 typeText = cleanNameType(typeText);
1066 //create name
1067 TaxonName nameType = (TaxonName)parser.parseFullName(typeText, NomenclaturalCode.ICNAFP, Rank.SPECIES());
1068 ((NameTypeDesignation) typeDesignation).setTypeName(nameType);
1069 //TODO wie können NameTypes den Namen zugeordnet werden? - wird aber vom Portal via NameCache matching gemacht
1070 }
1071
1072
1073 private String cleanNameType(String typeText) {
1074 String result;
1075 String[] split = typeText.split("\\[.*\\].?");
1076 result = split[0];
1077 return result;
1078 }
1079
1080
1081 /**
1082 * @param typeType
1083 * @param typeText
1084 * @param typeDesignation
1085 */
1086 protected void makeSpecimenTypeDesignation(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1087 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1088 //do nothing
1089 }else if (typeType.toString().trim().equalsIgnoreCase("Neotype") || typeType.toString().trim().equalsIgnoreCase("Neotypes")){
1090 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.NEOTYPE());
1091 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype") || typeType.toString().trim().equalsIgnoreCase("Syntypes")){
1092 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1093 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1094 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1095 }else if (typeType.toString().trim().equalsIgnoreCase("Paratype")){
1096 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.PARATYPE());
1097 }else{
1098 logger.warn("Unhandled type string: " + typeType);
1099 }
1100 DerivedUnit specimen = DerivedUnit.NewPreservedSpecimenInstance();
1101 if (typeText.length() > 255){
1102 specimen.setTitleCache(typeText.substring(0, 252) + "...", true);
1103 }else{
1104 specimen.setTitleCache(typeText, true);
1105 }
1106 specimen.putDefinition(Language.ENGLISH(), typeText);
1107 ((SpecimenTypeDesignation) typeDesignation).setTypeSpecimen(specimen);
1108 }
1109
1110 private TypeDesignationBase getTypeDesignationAndReference(StringBuffer typeType) {
1111 TypeDesignationBase result;
1112 Reference ref = parseTypeDesignationReference(typeType);
1113 if (typeType.indexOf(" species")>-1 || typeType.indexOf("genus")>-1){
1114 if (typeType.indexOf(" species")>-1 ){
1115 result = NameTypeDesignation.NewInstance();
1116 int start = typeType.indexOf(" species");
1117 typeType.replace(start, start + " species".length(), "");
1118 }else {
1119 result = NameTypeDesignation.NewInstance();
1120 int start = typeType.indexOf(" genus");
1121 typeType.replace(start, start + " genus".length(), "");
1122 }
1123 }else{
1124 result = SpecimenTypeDesignation.NewInstance();
1125 }
1126 result.setCitation(ref);
1127 return result;
1128 }
1129
1130
1131 private Reference parseTypeDesignationReference(StringBuffer typeType) {
1132 Reference result = null;
1133 String reBracketReference = "\\(.*\\)";
1134 Pattern patBracketReference = Pattern.compile(reBracketReference);
1135 Matcher matcher = patBracketReference.matcher(typeType);
1136 if (matcher.find()){
1137 String refString = matcher.group();
1138 int start = typeType.indexOf(refString);
1139 typeType.replace(start, start + refString.length(), "");
1140 refString = refString.replace("(", "").replace(")", "").trim();
1141 Reference ref = ReferenceFactory.newGeneric();
1142 ref.setTitleCache(refString, true);
1143 result = ref;
1144 }
1145 return result;
1146 }
1147
1148
1149 /**
1150 * @param state
1151 * @param elNom
1152 * @param taxon
1153 */
1154 //body/taxon/
1155 private HomotypicalGroup handleNomTaxon(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1156 INonViralName nvn = makeName(taxon, homotypicalGroup, isSynonym);
1157 TaxonName name = TaxonName.castAndDeproxy(nvn);
1158 String num = null;
1159
1160 boolean hasGenusInfo = false;
1161 TeamOrPersonBase<?> lastTeam = null;
1162
1163 //genus
1164 List<Element> elGenus = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "genus");
1165 if (elGenus.size() > 0){
1166 hasGenusInfo = true;
1167 }else{
1168 logger.debug ("No Synonym Genus");
1169 }
1170 //infra rank -> needed to handle authors correctly
1171 List<Element> elInfraRank = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "infrank");
1172 Rank infraRank = null;
1173 infraRank = handleInfRank(name, elInfraRank, infraRank);
1174
1175 //get left over elements
1176 List<Element> elements = elNom.getChildren();
1177 elements.removeAll(elInfraRank);
1178
1179 for (Element element : elements){
1180 if (element.getName().equals("name")){
1181 String classValue = element.getAttributeValue("class");
1182 String value = element.getValue().trim();
1183 if (classValue.equalsIgnoreCase("genus") || classValue.equalsIgnoreCase("family") ){
1184 name.setGenusOrUninomial(value);
1185 }else if (classValue.equalsIgnoreCase("family") ){
1186 name.setGenusOrUninomial(value);
1187 name.setRank(Rank.FAMILY());
1188 }else if (classValue.equalsIgnoreCase("subgenus")){
1189 //name.setInfraGenericEpithet(value);
1190 name.setNameCache(value.replace(":", "").trim());
1191 name.setRank(Rank.SUBGENUS());
1192 }else if (classValue.equalsIgnoreCase("epithet") ){
1193 if (hasGenusInfo == true){
1194 name.setSpecificEpithet(value);
1195 }else{
1196 handleInfraspecificEpithet(element, classValue, name);
1197 }
1198 }else if (classValue.equalsIgnoreCase("author")){
1199 handleNameAuthors(element, name);
1200 }else if (classValue.equalsIgnoreCase("paraut")){
1201 handleBasionymAuthor(state, element, name, false);
1202 }else if (classValue.equalsIgnoreCase("infrauthor") || classValue.equalsIgnoreCase("infraut")){
1203 handleInfrAuthor(state, element, name, true);
1204 }else if (classValue.equalsIgnoreCase("infrapar") || classValue.equalsIgnoreCase("infrpar") || classValue.equalsIgnoreCase("parauthor") ){
1205 handleBasionymAuthor(state, element, name, true);
1206 }else if (classValue.equalsIgnoreCase("infrepi")){
1207 handleInfrEpi(name, infraRank, value);
1208 }else if (classValue.equalsIgnoreCase("pub")){
1209 lastTeam = handleNomenclaturalReference(name, value);
1210 }else if (classValue.equalsIgnoreCase("usage")){
1211 lastTeam = handleNameUsage(taxon, name, value, lastTeam);
1212 }else if (classValue.equalsIgnoreCase("note")){
1213 handleNameNote(name, value);
1214 }else if (classValue.equalsIgnoreCase("num")){
1215 if (num != null){
1216 logger.warn("Duplicate num: " + value);
1217 }else{
1218 num = value;
1219 }
1220 if (isSynonym == true){
1221 logger.warn("Synonym should not have a num");
1222 }
1223 }else if (classValue.equalsIgnoreCase("typification")){
1224 logger.warn("Typification should not be a nom class");
1225 }else{
1226 logger.warn("Unhandled name class: " + classValue);
1227 }
1228 }else if(element.getName().equals("homonym")){
1229 handleHomonym(state, element, name);
1230 }else{
1231 // child element is not "name"
1232 unhandledNomChildren.add(element.getName());
1233 }
1234 }
1235
1236 //handle key
1237 if (! isSynonym){
1238 String taxonString = name.getNameCache();
1239 //try to find matching lead nodes
1240 UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, taxonString);
1241 Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
1242 //same without using the num
1243 if (num != null){
1244 UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", taxonString);
1245 handleMatchingNodes(state, taxon, noNumLeadsKey);
1246 }
1247 if (matchingNodes.isEmpty() && num != null){
1248 logger.warn("Taxon has num but no matching nodes exist: " + num+ ", Key: " + leadsKey.toString());
1249 }
1250 }
1251
1252 //test nom element has no text
1253 if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("—", "").replace("\u002d","").replace("\u2013", ""))){
1254 String strElNom = elNom.getTextNormalize();
1255 if ("?".equals(strElNom)){
1256 handleQuestionMark(name, taxon);
1257 }
1258 // Character c = strElNom.charAt(0);
1259 //System.out.println(CharUtils.unicodeEscaped(c));
1260 logger.warn("Nom tag has text: " + strElNom);
1261 }
1262
1263 return name.getHomotypicalGroup();
1264 }
1265
1266
1267 private void handleQuestionMark(INonViralName name, Taxon taxon) {
1268 int count = name.getTaxonBases().size();
1269 if (count != 1){
1270 logger.warn("Name has " + count + " taxa. This is not handled for question mark");
1271 }else{
1272 TaxonBase taxonBase = name.getTaxonBases().iterator().next();
1273 taxonBase.setDoubtful(true);
1274 }
1275 }
1276
1277
1278 //merge with handleNomTaxon
1279 private void handleHomonym(EfloraImportState state, Element elHomonym, TaxonName upperName) {
1280 verifyNoAttribute(elHomonym);
1281
1282 //hommonym name
1283 TaxonName homonymName = TaxonNameFactory.NewBotanicalInstance(upperName.getRank());
1284 homonymName.setGenusOrUninomial(upperName.getGenusOrUninomial());
1285 homonymName.setInfraGenericEpithet(upperName.getInfraGenericEpithet());
1286 homonymName.setSpecificEpithet(upperName.getSpecificEpithet());
1287 homonymName.setInfraSpecificEpithet(upperName.getInfraSpecificEpithet());
1288
1289 for (Element elName : (List<Element>)elHomonym.getChildren("name")){
1290 String classValue = elName.getAttributeValue("class");
1291 String value = elName.getValue().trim();
1292 if (classValue.equalsIgnoreCase("genus") ){
1293 homonymName.setGenusOrUninomial(value);
1294 }else if (classValue.equalsIgnoreCase("epithet") ){
1295 homonymName.setSpecificEpithet(value);
1296 }else if (classValue.equalsIgnoreCase("author")){
1297 handleNameAuthors(elName, homonymName);
1298 }else if (classValue.equalsIgnoreCase("paraut")){
1299 handleBasionymAuthor(state, elName, homonymName, true);
1300 }else if (classValue.equalsIgnoreCase("pub")){
1301 handleNomenclaturalReference(homonymName, value);
1302 }else if (classValue.equalsIgnoreCase("note")){
1303 handleNameNote(homonymName, value);
1304 }else{
1305 logger.warn("Unhandled class value: " + classValue);
1306 }
1307 }
1308 //TODO verify other information
1309
1310
1311 //rel
1312 boolean homonymIsLater = false;
1313 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1314 if (upperName.getNomenclaturalReference() != null && homonymName.getNomenclaturalReference() != null){
1315 TimePeriod homonymYear = homonymName.getNomenclaturalReference().getDatePublished();
1316 TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();
1317 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1318 }else{
1319 if (upperName.getNomenclaturalReference() == null){
1320 logger.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName.getTitleCache());
1321 }
1322 if (homonymName.getNomenclaturalReference() == null){
1323 logger.warn("Homonym does not have a nomenclatural reference or year: " + homonymName.getTitleCache());
1324 }
1325 }
1326 if (homonymIsLater){
1327 homonymName.addRelationshipToName(upperName, relType, null, null);
1328 }else{
1329 upperName.addRelationshipToName(homonymName, relType, null, null);
1330 }
1331 }
1332
1333
1334 /**
1335 * @param state
1336 * @param taxon
1337 * @param leadsKey
1338 * @return
1339 */
1340 private Set<PolytomousKeyNode> handleMatchingNodes(EfloraImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
1341 Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
1342 for (PolytomousKeyNode matchingNode : matchingNodes){
1343 state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
1344 matchingNode.setTaxon(taxon);
1345 state.getPolytomousKeyNodesToSave().add(matchingNode);
1346 }
1347 return matchingNodes;
1348 }
1349
1350
1351 private void handleNameNote(INonViralName name, String value) {
1352 logger.warn("Name note: " + value + ". Available in portal?");
1353 Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.DEFAULT());
1354 name.addAnnotation(annotation);
1355 }
1356
1357
1358 /**
1359 * @param taxon
1360 * @param name
1361 * @param value
1362 */
1363 protected TeamOrPersonBase handleNameUsage(Taxon taxon, TaxonName name, String referenceTitle, TeamOrPersonBase lastTeam) {
1364 Reference ref = ReferenceFactory.newGeneric();
1365 referenceTitle = removeStartingSymbols(referenceTitle, ref);
1366
1367 ref.setTitleCache(referenceTitle, true);
1368 String microReference = parseReferenceYearAndDetail(ref);
1369 TeamOrPersonBase<?> team = getReferenceAuthor(ref);
1370 parseReferenceType(ref);
1371 if (team == null){
1372 team = lastTeam;
1373 }
1374 ref.setAuthorship(team);
1375
1376 TaxonDescription description = getDescription(taxon);
1377 TextData textData = TextData.NewInstance(Feature.CITATION());
1378 textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, ref, microReference,
1379 name, null);
1380 description.addElement(textData);
1381 return team;
1382 }
1383
1384
1385 /**
1386 * @param referenceTitle
1387 * @param ref
1388 * @return
1389 */
1390 private String removeStartingSymbols(String referenceTitle, Reference ref) {
1391 if (referenceTitle.startsWith(";") || referenceTitle.startsWith(",") || referenceTitle.startsWith(":")){
1392 referenceTitle = referenceTitle.substring(1).trim();
1393 ref.setTitleCache(referenceTitle);
1394 }
1395 return referenceTitle;
1396 }
1397
1398
1399 private void parseReferenceType(Reference ref) {
1400 String title = ref.getTitle();
1401 if (title == null){
1402 return;
1403 }
1404 title = title.trim();
1405 //no in reference
1406 if (! title.startsWith("in ")){
1407 ref.setType(ReferenceType.Book);
1408 return;
1409 }
1410
1411 title = title.substring(3);
1412 //in reference
1413 //no ,
1414 if (title.indexOf(",") == -1){
1415 ref.setType(ReferenceType.Article);
1416 IJournal journal = ReferenceFactory.newJournal();
1417 journal.setTitle(title);
1418 ref.setTitle(null);
1419 ref.setInJournal(journal);
1420 //return;
1421 }else{
1422 //,-references
1423 ref.setType(ReferenceType.BookSection);
1424 String[] split = (title).split(",\\s*[A-Z]");
1425 if (split.length <= 1){
1426 logger.warn("Can not fully decide what reference type. Guess it is a book section: " + title );
1427 }
1428 IBook book = ReferenceFactory.newBook();
1429 Team bookTeam = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1430 try {
1431 title = title.substring(split[0].length() + 1).trim();
1432 } catch (Exception e) {
1433 logger.error("ERROR occurred when trying to split title: " + title + "; split[0]: + " + split[0]);
1434 }
1435 book.setTitle(title);
1436 book.setAuthorship(bookTeam);
1437 book.setDatePublished(ref.getDatePublished());
1438 ref.setTitle(null);
1439 ref.setInBook(book);
1440 }
1441 }
1442
1443
1444 protected Team getReferenceAuthor (Reference ref) {
1445 boolean isCache = false;
1446 String referenceTitle = ref.getTitle();
1447 if (referenceTitle == null){
1448 isCache = true;
1449 referenceTitle = ref.getTitleCache();
1450 }
1451 //in references
1452 String[] split = (" " + referenceTitle).split(" in ");
1453 if (split.length > 1){
1454 if (StringUtils.isNotBlank(split[0])){
1455 //' in ' is within the reference string, take the preceding string as the team
1456 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1457 if (! isCache){
1458 ref.setTitle("in " + split[1]);
1459 }
1460 return team;
1461 }else{
1462 //string starts with in therefore no author is given
1463 return null;
1464 }
1465 }
1466 //no ,-reference
1467 split = referenceTitle.split(",");
1468 if (split.length < 2){
1469 //no author is given
1470 return null;
1471 }
1472
1473 //,-references
1474 split = (referenceTitle).split(",\\s*[A-Z]");
1475 if (split.length > 1){
1476 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1477 if (! isCache){
1478 ref.setTitle(referenceTitle.substring(split[0].length()+1).trim());
1479 }
1480 return team;
1481 }else{
1482 logger.warn("Can't decide if a usage has an author: " + referenceTitle );
1483 return null;
1484 }
1485 }
1486
1487
1488 /**
1489 * Replaced by <homonym> tag but still in use for exceptions
1490 * @param detail
1491 * @param name
1492 * @return
1493 */
1494 protected String parseHomonym(String detail, TaxonName name) {
1495 String result;
1496 if (detail == null){
1497 return detail;
1498 }
1499
1500 //non RE
1501 String reNon = "(\\s|,)non\\s";
1502 Pattern patReference = Pattern.compile(reNon);
1503 Matcher matcher = patReference.matcher(detail);
1504 if (matcher.find()){
1505 int start = matcher.start();
1506 int end = matcher.end();
1507
1508 if (detail != null){
1509 logger.warn("Unhandled non part: " + detail.substring(start));
1510 return detail;
1511 }
1512
1513 result = detail.substring(0, start);
1514
1515 //homonym string
1516 String homonymString = detail.substring(end);
1517
1518 //hommonym name
1519 TaxonName homonymName = TaxonNameFactory.NewBotanicalInstance(name.getRank());
1520 homonymName.setGenusOrUninomial(name.getGenusOrUninomial());
1521 homonymName.setInfraGenericEpithet(name.getInfraGenericEpithet());
1522 homonymName.setSpecificEpithet(name.getSpecificEpithet());
1523 homonymName.setInfraSpecificEpithet(name.getInfraSpecificEpithet());
1524 Reference homonymNomRef = ReferenceFactory.newGeneric();
1525 homonymNomRef.setTitleCache(homonymString, true);
1526 String homonymNomRefDetail = parseReferenceYearAndDetail(homonymNomRef);
1527 homonymName.setNomenclaturalMicroReference(homonymNomRefDetail);
1528 String authorTitle = homonymNomRef.getTitleCache();
1529 Team team = Team.NewTitledInstance(authorTitle, authorTitle);
1530 homonymNomRef.setAuthorship(team);
1531 homonymNomRef.setTitle("");
1532 homonymNomRef.setProtectedTitleCache(false);
1533
1534 //rel
1535 boolean homonymIsLater = false;
1536 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1537 TimePeriod homonymYear = homonymNomRef.getDatePublished();
1538 if (name.getNomenclaturalReference() != null){
1539 TimePeriod nameYear = name.getNomenclaturalReference().getDatePublished();
1540 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1541 }else{
1542 logger.warn("Classification name has no nomenclatural reference");
1543 }
1544 if (homonymIsLater){
1545 homonymName.addRelationshipToName(name, relType, null, null);
1546 }else{
1547 name.addRelationshipToName(homonymName, relType, null, null);
1548 }
1549
1550 }else{
1551 return detail;
1552 }
1553 return result;
1554 }
1555
1556
1557 /**
1558 * @Xpath body/taxon/nomenclature/homotypes/nom/name[@class="pub"]
1559 * @param name
1560 * @param value
1561 */
1562 protected TeamOrPersonBase handleNomenclaturalReference(TaxonName name, String value) {
1563 Reference nomRef = ReferenceFactory.newGeneric();
1564 nomRef.setTitleCache(value, true);
1565 parseNomStatus(nomRef, name);
1566 String microReference = parseReferenceYearAndDetail(nomRef);
1567 name.setNomenclaturalReference(nomRef);
1568 microReference = parseHomonym(microReference, name);
1569 name.setNomenclaturalMicroReference(microReference);
1570 TeamOrPersonBase<?> team = name.getCombinationAuthorship();
1571 if (team == null){
1572 logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
1573 }else{
1574 nomRef.setAuthorship(team);
1575 }
1576 return team;
1577 }
1578
1579 private void handleInfrAuthor(EfloraImportState state, Element elAuthor, INonViralName name, boolean overwrite) {
1580 String strAuthor = elAuthor.getValue().trim();
1581 if (strAuthor.endsWith(",")){
1582 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1583 }
1584 TeamOrPersonBase[] team = getTeam(strAuthor);
1585 if (name.getCombinationAuthorship() != null && overwrite == false){
1586 logger.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1587 }else{
1588 name.setCombinationAuthorship(team[0]);
1589 name.setExCombinationAuthorship(team[1]);
1590 }
1591
1592
1593 }
1594
1595
1596 /**
1597 * Sets the names rank according to the infrank value
1598 * @param name
1599 * @param elements
1600 * @param elInfraRank
1601 * @param infraRank
1602 * @return
1603 */
1604 private Rank handleInfRank(INonViralName name, List<Element> elInfraRank, Rank infraRank) {
1605 if (elInfraRank.size() == 1){
1606 String strRank = elInfraRank.get(0).getTextNormalize();
1607 try {
1608 infraRank = Rank.getRankByLatinNameOrIdInVoc(strRank);
1609 } catch (UnknownCdmTypeException e) {
1610 try{
1611 infraRank = Rank.getRankByLatinNameOrIdInVoc(strRank + ".");
1612 } catch (UnknownCdmTypeException e2) {
1613 logger.warn("Unknown infrank " + strRank + ". Set infraRank to (null).");
1614 }
1615 }
1616 }else if (elInfraRank.size() > 1){
1617 logger.warn ("There is more than 1 infrank");
1618 }
1619 if (infraRank != null){
1620 name.setRank(infraRank);
1621 }
1622 return infraRank;
1623 }
1624
1625
1626 private void handleInfrEpi(INonViralName name, Rank infraRank, String value) {
1627 if (infraRank != null && infraRank.isInfraSpecific()){
1628 name.setInfraSpecificEpithet(value);
1629 if (CdmUtils.isCapital(value)){
1630 logger.warn("Infraspecific epithet starts with a capital letter: " + value);
1631 }
1632 }else if (infraRank != null && infraRank.isInfraGeneric()){
1633 name.setInfraGenericEpithet(value);
1634 if (! CdmUtils.isCapital(value)){
1635 logger.warn("Infrageneric epithet does not start with a capital letter: " + value);
1636 }
1637 }else{
1638 logger.warn("Infrepi could not be handled: " + value);
1639 }
1640 }
1641
1642
1643
1644 /**
1645 * Returns the (empty) with the correct homotypical group depending on the taxon status
1646 * @param taxon
1647 * @param homotypicalGroup
1648 * @param isSynonym
1649 * @return
1650 */
1651 private TaxonName makeName(Taxon taxon,HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1652
1653 TaxonName name;
1654 if (isSynonym){
1655 name = TaxonNameFactory.NewBotanicalInstance(Rank.SPECIES(), homotypicalGroup);
1656 SynonymType synonymType = SynonymType.HETEROTYPIC_SYNONYM_OF;
1657 if (taxon.getHomotypicGroup().equals(homotypicalGroup)){
1658 synonymType = SynonymType.HOMOTYPIC_SYNONYM_OF;
1659 }
1660 taxon.addSynonymName(name, synonymType);
1661 }else{
1662 name = taxon.getName();
1663 }
1664 return name;
1665 }
1666
1667 private void handleInfraspecificEpithet(Element element, String attrValue, INonViralName name) {
1668 String value = element.getTextNormalize();
1669 if (value.indexOf("subsp.") != -1){
1670 //TODO genus and species epi
1671 String infrEpi = value.substring(value.indexOf("subsp.") + 6).trim();
1672 name.setInfraSpecificEpithet(infrEpi);
1673 name.setRank(Rank.SUBSPECIES());
1674 }else if (value.indexOf("var.") != -1){
1675 //TODO genus and species epi
1676 String infrEpi = value.substring(value.indexOf("var.") + 4).trim();
1677 name.setInfraSpecificEpithet(infrEpi);
1678 name.setRank(Rank.VARIETY());
1679 }else{
1680 logger.warn("Unhandled infraspecific type: " + value);
1681 }
1682 }
1683
1684 /**
1685 * @param state
1686 * @param element
1687 * @param name
1688 */
1689 private void handleBasionymAuthor(EfloraImportState state, Element elBasionymAuthor, INonViralName name, boolean overwrite) {
1690 String strAuthor = elBasionymAuthor.getValue().trim();
1691 Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");
1692 if (reBasionymAuthor.matcher(strAuthor).matches()){
1693 strAuthor = strAuthor.substring(1, strAuthor.length()-1);
1694 }else{
1695 logger.warn("Brackets are missing for original combination author " + strAuthor);
1696 }
1697 TeamOrPersonBase[] basionymTeam = getTeam(strAuthor);
1698 if (name.getBasionymAuthorship() != null && overwrite == false){
1699 logger.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1700 }else{
1701 name.setBasionymAuthorship(basionymTeam[0]);
1702 name.setExBasionymAuthorship(basionymTeam[1]);
1703
1704 }
1705 }
1706
1707 private final Map<String, UUID> teamMap = new HashMap<String, UUID>();
1708 /**
1709 * @param elAuthors
1710 * @param name
1711 * @param elNom
1712 */
1713 private void handleNameAuthors(Element elAuthor, INonViralName name) {
1714 if (name.getCombinationAuthorship() != null){
1715 logger.warn("Name already has a combination author. Name: " + name.getTitleCache() + ", Author: " + elAuthor.getTextNormalize());
1716 }
1717 String strAuthor = elAuthor.getValue().trim();
1718 if (strAuthor.endsWith(",")){
1719 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1720 }
1721 if (strAuthor.indexOf("(") > -1 || strAuthor.indexOf(")") > -1){
1722 logger.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor);
1723 }
1724 TeamOrPersonBase[] team = getTeam(strAuthor);
1725 name.setCombinationAuthorship(team[0]);
1726 name.setExCombinationAuthorship(team[1]);
1727 }
1728
1729
1730 /**
1731 * @param strAuthor
1732 * @return
1733 */
1734 private TeamOrPersonBase[] getTeam(String strAuthor) {
1735 TeamOrPersonBase[] result = new TeamOrPersonBase[2];
1736 String[] split = strAuthor.split(" ex ");
1737 String strBaseAuthor = null;
1738 String strExAuthor = null;
1739
1740 if (split.length == 2){
1741 strBaseAuthor = split[1];
1742 strExAuthor = split[0];
1743 }else if (split.length == 1){
1744 strBaseAuthor = split[0];
1745 }else{
1746 logger.warn("Could not parse (ex) author: " + strAuthor);
1747 }
1748 result[0] = getUuidTeam(strBaseAuthor);
1749 if (result[0] == null){
1750 result[0] = parseSingleTeam(strBaseAuthor);
1751 teamMap.put(strBaseAuthor, result[0].getUuid());
1752 }
1753 if (strExAuthor != null){
1754 result[1] = getUuidTeam(strExAuthor);
1755 if (result[1] == null){
1756 result[1] = Team.NewInstance();
1757 result[1].setTitleCache(strExAuthor, true);
1758 teamMap.put(strExAuthor, result[1].getUuid());
1759 }
1760
1761 }
1762 return result;
1763 }
1764
1765
1766 protected TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {
1767 TeamOrPersonBase result;
1768 String[] split = strBaseAuthor.split("&");
1769 if (split.length > 1){
1770 result = Team.NewInstance();
1771 for (String personString : split){
1772 Person person = makePerson(personString);
1773 ((Team)result).addTeamMember(person);
1774 }
1775 }else{
1776 result = makePerson(strBaseAuthor.trim());
1777 }
1778 return result;
1779 }
1780
1781
1782 /**
1783 * @param personString
1784 * @return
1785 */
1786 private Person makePerson(String personString) {
1787 personString = personString.trim();
1788 Person person = Person.NewTitledInstance(personString);
1789 person.setNomenclaturalTitle(personString);
1790 return person;
1791 }
1792
1793
1794 /**
1795 * @param result
1796 * @param strBaseAuthor
1797 */
1798 private TeamOrPersonBase getUuidTeam(String strBaseAuthor) {
1799 UUID uuidTeam = teamMap.get(strBaseAuthor);
1800 return CdmBase.deproxy(getAgentService().find(uuidTeam), TeamOrPersonBase.class);
1801 }
1802
1803
1804 private void handleDescription(EfloraImportState state, Element elDescription, Taxon taxon, Set<String> unhandledChildren) {
1805 verifyNoAttribute(elDescription);
1806
1807 List<Element> elements = elDescription.getChildren();
1808 for (Element element : elements){
1809 if (element.getName().equalsIgnoreCase("char")){
1810 handleChar(state, element, taxon);
1811 }else{
1812 logger.warn("Unhandled description child: " + element.getName());
1813 }
1814 }
1815
1816 }
1817
1818
1819 /**
1820 * @param state
1821 * @param element
1822 * @param taxon
1823 */
1824 private void handleChar(EfloraImportState state, Element element, Taxon taxon) {
1825 List<Attribute> attributes = element.getAttributes();
1826 for (Attribute attribute : attributes){
1827 if (! attribute.getName().equalsIgnoreCase("class")){
1828 logger.warn("Char has unhandled attribute " + attribute.getName());
1829 }else{
1830 String classValue = attribute.getValue();
1831 Feature feature = getFeature(classValue, state);
1832 if (feature == null){
1833 logger.warn("Unhandled feature: " + classValue);
1834 }else{
1835 String value = element.getValue();
1836 addDescriptionElement(state, taxon, value, feature, null);
1837 }
1838
1839 }
1840 }
1841
1842 List<Element> elements = element.getChildren();
1843 if (! elements.isEmpty()){
1844 logger.warn("Char has unhandled children");
1845 }
1846 }
1847
1848
1849 /**
1850 * @param taxon
1851 * @return
1852 */
1853 protected TaxonDescription getDescription(Taxon taxon) {
1854 for (TaxonDescription description : taxon.getDescriptions()){
1855 if (! description.isImageGallery()){
1856 return description;
1857 }
1858 }
1859 TaxonDescription newDescription = TaxonDescription.NewInstance(taxon);
1860 return newDescription;
1861 }
1862
1863
1864 /**
1865 * @param classValue
1866 * @param state
1867 * @return
1868 * @throws UndefinedTransformerMethodException
1869 */
1870 private Feature getFeature(String classValue, EfloraImportState state) {
1871 UUID uuid;
1872 try {
1873 uuid = state.getTransformer().getFeatureUuid(classValue);
1874 if (uuid == null){
1875 logger.info("Uuid is null for " + classValue);
1876 }
1877 String featureText = StringUtils.capitalize(classValue);
1878 //TODO eFlora feature vocabulary
1879 Feature feature = getFeature(state, uuid, featureText, featureText, classValue, null);
1880 if (feature == null){
1881 throw new NullPointerException(classValue + " not recognized as a feature");
1882 }
1883 return feature;
1884 } catch (Exception e) {
1885 logger.warn("Could not create feature for " + classValue + ": " + e.getMessage()) ;
1886 return Feature.UNKNOWN();
1887 }
1888 }
1889
1890
1891 /**
1892 * @param state
1893 * @param element
1894 * @param taxon
1895 * @param unhandledTitleClassess
1896 */
1897 private void handleTitle(EfloraImportState state, Element element, Taxon taxon, Set<String> unhandledTitleClassess) {
1898 // attributes
1899 List<Attribute> attributes = element.getAttributes();
1900 for (Attribute attribute : attributes){
1901 if (! attribute.getName().equalsIgnoreCase("class") ){
1902 if (! attribute.getName().equalsIgnoreCase("num")){
1903 logger.warn("Title has unhandled attribute " + attribute.getName());
1904 }else{
1905 //TODO num attribute in taxon
1906 }
1907 }else{
1908 String classValue = attribute.getValue();
1909 try {
1910 Rank rank;
1911 try {
1912 rank = Rank.getRankByLatinNameOrIdInVoc(classValue);
1913 } catch (Exception e) {
1914 //TODO nc
1915 rank = Rank.getRankByEnglishName(classValue, NomenclaturalCode.ICNAFP, false);
1916 }
1917 taxon.getName().setRank(rank);
1918 if (rank.equals(Rank.FAMILY()) || rank.equals(Rank.GENUS())){
1919 handleGenus(element.getValue(), taxon.getName());
1920 }else if (rank.equals(Rank.SUBGENUS())){
1921 handleSubGenus(element.getValue(), taxon.getName());
1922 }else if (rank.equals(Rank.SECTION_BOTANY())){
1923 handleSection(element.getValue(), taxon.getName());
1924 }else if (rank.equals(Rank.SPECIES())){
1925 handleSpecies(element.getValue(), taxon.getName());
1926 }else if (rank.equals(Rank.SUBSPECIES())){
1927 handleSubSpecies(element.getValue(), taxon.getName());
1928 }else if (rank.equals(Rank.VARIETY())){
1929 handleVariety(element.getValue(), taxon.getName());
1930 }else{
1931 logger.warn("Unhandled rank: " + rank.getLabel());
1932 }
1933 } catch (UnknownCdmTypeException e) {
1934 logger.warn("Unknown rank " + classValue);
1935 unhandledTitleClassess.add(classValue);
1936 }
1937 }
1938 }
1939 List<Element> elements = element.getChildren();
1940 if (! elements.isEmpty()){
1941 logger.warn("Title has unexpected children");
1942 }
1943 UUID uuidTitle = EfloraTransformer.uuidTitle;
1944 ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "title", "title", "title");
1945 taxon.addExtension(element.getTextNormalize(), titleExtension);
1946
1947 }
1948
1949
1950 /**
1951 * @param value
1952 * @param taxonNameBase
1953 */
1954 private void handleSubGenus(String value, INonViralName taxonNameBase) {
1955 String name = value.replace("Subgenus", "").trim();
1956 taxonNameBase.setInfraGenericEpithet(name);
1957 }
1958
1959 /**
1960 * @param value
1961 * @param taxonNameBase
1962 */
1963 private void handleSection(String value, INonViralName taxonNameBase) {
1964 String name = value.replace("Section", "").trim();
1965 taxonNameBase.setInfraGenericEpithet(name);
1966 }
1967
1968 /**
1969 * @param value
1970 * @param taxonNameBase
1971 */
1972 private void handleSpecies(String value, TaxonName taxonNameBase) {
1973 //do nothing
1974 }
1975
1976 /**
1977 * @param value
1978 * @param taxonNameBase
1979 */
1980 private void handleVariety(String value, TaxonName taxonNameBase) {
1981 //do nothing
1982 }
1983
1984 /**
1985 * @param value
1986 * @param taxonNameBase
1987 */
1988 private void handleSubSpecies(String value, TaxonName taxonNameBase) {
1989 //do nothing
1990 }
1991
1992
1993 private final Pattern rexGenusAuthor = Pattern.compile("(\\[|\\().*(\\]|\\))");
1994
1995 protected void handleGenus(String value, INonViralName taxonName) {
1996 Matcher matcher = rexGenusAuthor.matcher(value);
1997 if (matcher.find()){
1998 String author = matcher.group();
1999 // String genus = value.replace(author, "");
2000 author = author.substring(1, author.length() - 1);
2001 Team team = Team.NewInstance();
2002 team.setTitleCache(author, true);
2003 Credit credit = Credit.NewInstance(team, null, null);
2004 taxonName.addCredit(credit);
2005 // taxonName.setCombinationAuthorship(team);
2006 // taxonName.setGenusOrUninomial(genus);
2007 }else{
2008 logger.info("No Author match for " + value);
2009 }
2010 }
2011
2012 private void handleTaxonRelation(EfloraImportState state, Taxon taxon, Taxon lastTaxon) {
2013
2014 Classification tree = getTree(state);
2015 if (lastTaxon == null){
2016 tree.addChildTaxon(taxon, null, null);
2017 return;
2018 }
2019 Rank thisRank = taxon.getName().getRank();
2020 Rank lastRank = lastTaxon.getName().getRank();
2021 if (lastTaxon.getTaxonNodes().size() > 0){
2022 TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
2023 if (thisRank.isLower(lastRank ) ){
2024 lastNode.addChildTaxon(taxon, null, null);
2025 fillMissingEpithetsForTaxa(lastTaxon, taxon);
2026 }else if (thisRank.equals(lastRank)){
2027 TaxonNode parent = lastNode.getParent();
2028 if (parent != null){
2029 parent.addChildTaxon(taxon, null, null);
2030 fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
2031 }else{
2032 tree.addChildTaxon(taxon, null, null);
2033 }
2034 }else if (thisRank.isHigher(lastRank)){
2035 handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2036 // TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2037 // parentNode.addChildTaxon(taxon, null, null, null);
2038 }
2039 }else{
2040 logger.warn("Last taxon has no node");
2041 }
2042 }
2043
2044
2045
2046 /**
2047 * @param state
2048 * @return
2049 */
2050 private Classification getTree(EfloraImportState state) {
2051 Classification result = state.getTree(null);
2052 if (result == null){
2053 UUID uuid = state.getConfig().getClassificationUuid();
2054 if (uuid == null){
2055 logger.warn("No classification uuid is defined");
2056 result = getNewClassification(state);
2057 }else{
2058 result = getClassificationService().find(uuid);
2059 if (result == null){
2060 result = getNewClassification(state);
2061 result.setUuid(uuid);
2062 }
2063 }
2064 state.putTree(null, result);
2065 }
2066 return result;
2067 }
2068
2069
2070 private Classification getNewClassification(EfloraImportState state) {
2071 Classification result;
2072 result = Classification.NewInstance(state.getConfig().getClassificationTitle());
2073 state.putTree(null, result);
2074 return result;
2075 }
2076
2077
2078 /**
2079 * @param state
2080 * @param taxon
2081 * @param value
2082 * @param feature
2083 * @return
2084 */
2085 private TextData addDescriptionElement(EfloraImportState state, Taxon taxon, String value, Feature feature, String references) {
2086 TextData textData = TextData.NewInstance(feature);
2087 Language textLanguage = getDefaultLanguage(state);
2088 textData.putText(textLanguage, value);
2089 TaxonDescription description = getDescription(taxon);
2090 description.addElement(textData);
2091 if (references != null){
2092 makeOriginalSourceReferences(textData, ";", references);
2093 }
2094 return textData;
2095 }
2096
2097 private Language getDefaultLanguage(EfloraImportState state) {
2098 UUID defaultLanguageUuid = state.getConfig().getDefaultLanguageUuid();
2099 if (defaultLanguageUuid != null){
2100 Language result = state.getDefaultLanguage();
2101 if (result == null || ! result.getUuid().equals(defaultLanguageUuid)){
2102 result = (Language)getTermService().find(defaultLanguageUuid);
2103 state.setDefaultLanguage(result);
2104 if (result == null){
2105 logger.warn("Default language for " + defaultLanguageUuid + " does not exist.");
2106 }
2107 }
2108 return result;
2109 }else{
2110 return Language.DEFAULT();
2111 }
2112 }
2113
2114
2115 /**
2116 * @param elNomenclature
2117 */
2118 private void verifyNoAttribute(Element element) {
2119 List<Attribute> attributes = element.getAttributes();
2120 if (! attributes.isEmpty()){
2121 logger.warn(element.getName() + " has unhandled attributes: " + attributes.get(0).getValue() + "..." );
2122 }
2123 }
2124
2125 /**
2126 * @param elNomenclature
2127 */
2128 protected void verifyNoChildren(Element element) {
2129 verifyNoChildren(element, false);
2130 }
2131
2132 /**
2133 * @param elNomenclature
2134 */
2135 private void verifyNoChildren(Element element, boolean ignoreLineBreak) {
2136 List<Element> children = element.getChildren();
2137 if (! children.isEmpty()){
2138 if (ignoreLineBreak == true){
2139 for (Element child : children){
2140 if (! child.getName().equalsIgnoreCase("BR")){
2141 logger.warn(element.getName() + " has unhandled child: " + child.getName());
2142 }
2143 }
2144 }else{
2145 logger.warn(element.getName() + " has unhandled children");
2146 }
2147 }
2148 }
2149
2150
2151
2152 /**
2153 * Parses the nomenclatural status from the references titleCache. If a nomenclatural status
2154 * exists it is added to the name and the nom. status part of the references title cache is
2155 * removed. Requires protected title cache.
2156 * @param ref
2157 * @param nonViralName
2158 */
2159 protected void parseNomStatus(Reference ref, INonViralName nonViralName) {
2160 String titleToParse = ref.getTitleCache();
2161
2162 String noStatusTitle = parser.parseNomStatus(titleToParse, nonViralName, true);
2163 if (! noStatusTitle.equals(titleToParse)){
2164 ref.setTitleCache(noStatusTitle, true);
2165 }
2166 }
2167
2168
2169 /**
2170 * Extracts the date published part and returns micro reference
2171 * @param ref
2172 * @return
2173 */
2174 private String parseReferenceYearAndDetail(Reference ref){
2175 String detailResult = null;
2176 String titleToParse = ref.getTitleCache();
2177 titleToParse = removeStartingSymbols(titleToParse, ref);
2178 String reReference = "^\\.{1,}";
2179 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2180 String oneMonth = "(Feb.|Dec.|March|June|July)";
2181 String reYear = oneMonth + "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2182 String secondYear = "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2183
2184 String reYearPeriod = "\\(" + reYear + "(\\-" + secondYear + ")?\\)";
2185 String reDetail = "\\.{1,10}$";
2186
2187 //pattern for the whole string
2188 Pattern patReference = Pattern.compile(/*reReference +*/ reYearPeriod /*+ reDetail */);
2189 Matcher matcher = patReference.matcher(titleToParse);
2190 if (matcher.find()){
2191 int start = matcher.start();
2192 int end = matcher.end();
2193
2194 //title and other information precedes the year part
2195 String title = titleToParse.substring(0, start).trim();
2196 //detail follows the year part
2197 String detail = titleToParse.substring(end).trim();
2198
2199 //time period
2200 String strPeriod = matcher.group().trim();
2201 strPeriod = strPeriod.substring(1, strPeriod.length()-1); //remove brackets
2202 Pattern patStartMonth = Pattern.compile("^" + oneMonth);
2203 matcher = patStartMonth.matcher(strPeriod);
2204 strPeriod = strPeriod.replace(" ", "");
2205 Integer startMonth = null;
2206 if (matcher.find()){
2207 end = matcher.end();
2208 strPeriod = strPeriod.substring(0, end) + " " + strPeriod.substring(end);
2209 startMonth = getMonth(strPeriod.substring(0, end));
2210 }
2211
2212 VerbatimTimePeriod datePublished = TimePeriodParser.parseStringVerbatim(strPeriod);
2213 if (startMonth != null){
2214 datePublished.setStartMonth(startMonth);
2215 }
2216 ref.setDatePublished(datePublished);
2217 ref.setTitle(title);
2218 detailResult = CdmUtils.removeTrailingDots(detail);
2219 if (detailResult.endsWith(".") || detailResult.endsWith(";") || detailResult.endsWith(",") ){
2220 detailResult = detailResult.substring(0, detailResult.length() -1);
2221 }
2222 ref.setProtectedTitleCache(false);
2223 }else{
2224 logger.warn("Could not parse reference: " + titleToParse);
2225 }
2226 return detailResult;
2227
2228 }
2229
2230
2231
2232 private Integer getMonth(String month) {
2233 if (month.startsWith("Jan")){
2234 return 1;
2235 }else if (month.startsWith("Feb")){
2236 return 2;
2237 }else if (month.startsWith("Mar")){
2238 return 3;
2239 }else if (month.startsWith("Apr")){
2240 return 4;
2241 }else if (month.startsWith("May")){
2242 return 5;
2243 }else if (month.startsWith("Jun")){
2244 return 6;
2245 }else if (month.startsWith("Jul")){
2246 return 7;
2247 }else if (month.startsWith("Aug")){
2248 return 8;
2249 }else if (month.startsWith("Sep")){
2250 return 9;
2251 }else if (month.startsWith("Oct")){
2252 return 10;
2253 }else if (month.startsWith("Nov")){
2254 return 11;
2255 }else if (month.startsWith("Dec")){
2256 return 12;
2257 }else{
2258 logger.warn("Month not yet supported: " + month);
2259 return null;
2260 }
2261 }
2262
2263
2264 /* (non-Javadoc)
2265 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2266 */
2267 @Override
2268 protected boolean isIgnore(EfloraImportState state){
2269 return ! state.getConfig().isDoTaxa();
2270 }
2271
2272 }