rename cdmlib-eflora -> cdm-eflora
[cdmlib-apps.git] / cdm-eflora / src / main / java / eu / etaxonomy / cdm / io / eflora / EfloraTaxonImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.eflora;
11
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.UUID;
19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern;
21
22 import org.apache.commons.lang.CharUtils;
23 import org.apache.commons.lang.StringUtils;
24 import org.apache.log4j.Logger;
25 import org.jdom.Attribute;
26 import org.jdom.Element;
27 import org.springframework.stereotype.Component;
28 import org.springframework.transaction.TransactionStatus;
29
30 import eu.etaxonomy.cdm.common.CdmUtils;
31 import eu.etaxonomy.cdm.common.ResultWrapper;
32 import eu.etaxonomy.cdm.common.XmlHelp;
33 import eu.etaxonomy.cdm.io.common.ICdmIO;
34 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
35 import eu.etaxonomy.cdm.io.eflora.UnmatchedLeads.UnmatchedLeadsKey;
36 import eu.etaxonomy.cdm.model.agent.Person;
37 import eu.etaxonomy.cdm.model.agent.Team;
38 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
39 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
40 import eu.etaxonomy.cdm.model.common.Annotation;
41 import eu.etaxonomy.cdm.model.common.AnnotationType;
42 import eu.etaxonomy.cdm.model.common.CdmBase;
43 import eu.etaxonomy.cdm.model.common.Credit;
44 import eu.etaxonomy.cdm.model.common.ExtensionType;
45 import eu.etaxonomy.cdm.model.common.ISourceable;
46 import eu.etaxonomy.cdm.model.common.Language;
47 import eu.etaxonomy.cdm.model.common.Marker;
48 import eu.etaxonomy.cdm.model.common.MarkerType;
49 import eu.etaxonomy.cdm.model.common.TimePeriod;
50 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
51 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
52 import eu.etaxonomy.cdm.model.description.Feature;
53 import eu.etaxonomy.cdm.model.description.KeyStatement;
54 import eu.etaxonomy.cdm.model.description.PolytomousKey;
55 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
56 import eu.etaxonomy.cdm.model.description.TaxonDescription;
57 import eu.etaxonomy.cdm.model.description.TextData;
58 import eu.etaxonomy.cdm.model.name.BotanicalName;
59 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
60 import eu.etaxonomy.cdm.model.name.NameRelationshipType;
61 import eu.etaxonomy.cdm.model.name.NameTypeDesignation;
62 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
63 import eu.etaxonomy.cdm.model.name.NonViralName;
64 import eu.etaxonomy.cdm.model.name.Rank;
65 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
66 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
67 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
68 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
69 import eu.etaxonomy.cdm.model.occurrence.Specimen;
70 import eu.etaxonomy.cdm.model.reference.IBook;
71 import eu.etaxonomy.cdm.model.reference.IJournal;
72 import eu.etaxonomy.cdm.model.reference.Reference;
73 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
74 import eu.etaxonomy.cdm.model.reference.ReferenceType;
75 import eu.etaxonomy.cdm.model.taxon.Classification;
76 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
77 import eu.etaxonomy.cdm.model.taxon.Taxon;
78 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
79 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
80 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
81 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
82
83
84 /**
85 * @author a.mueller
86 *
87 */
88 @Component
89 public class EfloraTaxonImport extends EfloraImportBase implements ICdmIO<EfloraImportState> {
90 private static final Logger logger = Logger.getLogger(EfloraTaxonImport.class);
91
92 private static int modCount = 30000;
93 private NonViralNameParserImpl parser = new NonViralNameParserImpl();
94
95 public EfloraTaxonImport(){
96 super();
97 }
98
99
100 @Override
101 public boolean doCheck(EfloraImportState state){
102 boolean result = true;
103 return result;
104 }
105
106 //TODO make part of state, but state is renewed when invoking the import a second time
107 private UnmatchedLeads unmatchedLeads;
108
109 @Override
110 public void doInvoke(EfloraImportState state){
111 logger.info("start make Taxa ...");
112
113 //FIXME reset state
114 state.putTree(null, null);
115 // UnmatchedLeads unmatchedLeads = state.getOpenKeys();
116 if (unmatchedLeads == null){
117 unmatchedLeads = UnmatchedLeads.NewInstance();
118 }
119 state.setUnmatchedLeads(unmatchedLeads);
120
121 TransactionStatus tx = startTransaction();
122 unmatchedLeads.saveToSession(getPolytomousKeyNodeService());
123
124
125 //TODO generally do not store the reference object in the config
126 Reference sourceReference = state.getConfig().getSourceReference();
127 getReferenceService().saveOrUpdate(sourceReference);
128
129 Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
130 ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
131
132 Element elbody= getBodyElement(state.getConfig());
133 List<Element> elTaxonList = elbody.getChildren();
134
135 int i = 0;
136
137 Set<String> unhandledTitleClassess = new HashSet<String>();
138 Set<String> unhandledNomeclatureChildren = new HashSet<String>();
139 Set<String> unhandledDescriptionChildren = new HashSet<String>();
140
141 Taxon lastTaxon = getLastTaxon(state);
142
143 //for each taxon
144 for (Element elTaxon : elTaxonList){
145 try {
146 if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
147 if (! elTaxon.getName().equalsIgnoreCase("taxon")){
148 logger.warn("body has element other than 'taxon'");
149 }
150
151 BotanicalName botanicalName = BotanicalName.NewInstance(Rank.SPECIES());
152 Taxon taxon = Taxon.NewInstance(botanicalName, state.getConfig().getSourceReference());
153
154 handleTaxonAttributes(elTaxon, taxon, state);
155
156
157 List<Element> children = elTaxon.getChildren();
158 handleTaxonElement(state, unhandledTitleClassess, unhandledNomeclatureChildren, unhandledDescriptionChildren, taxon, children);
159 handleTaxonRelation(state, taxon, lastTaxon);
160 lastTaxon = taxon;
161 taxaToSave.add(taxon);
162 state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
163
164 } catch (Exception e) {
165 logger.warn("Exception occurred in Sapindacea taxon import: " + e);
166 e.printStackTrace();
167 }
168
169 }
170
171 System.out.println(state.getUnmatchedLeads().toString());
172 logger.warn("There are taxa with attributes 'excluded' and 'dubious'");
173
174 logger.info("Children for nomenclature are: " + unhandledNomeclatureChildren);
175 logger.info("Children for description are: " + unhandledDescriptionChildren);
176 logger.info("Children for homotypes are: " + unhandledHomotypeChildren);
177 logger.info("Children for nom are: " + unhandledNomChildren);
178
179
180 //invokeRelations(source, cdmApp, deleteAll, taxonMap, referenceMap);
181 logger.info(i + " taxa handled. Saving ...");
182 getTaxonService().saveOrUpdate(taxaToSave);
183 getFeatureTreeService().saveOrUpdateFeatureNodesAll(state.getFeatureNodesToSave());
184 state.getFeatureNodesToSave().clear();
185 commitTransaction(tx);
186
187 logger.info("end makeTaxa ...");
188 logger.info("start makeKey ...");
189 // invokeDoKey(state);
190 logger.info("end makeKey ...");
191
192 if (! success.getValue()){
193 state.setUnsuccessfull();
194 }
195 return ;
196 }
197
198
199 private void handleTaxonAttributes(Element elTaxon, Taxon taxon, EfloraImportState state) {
200 List<Attribute> attrList = elTaxon.getAttributes();
201 for (Attribute attr : attrList){
202 String attrName = attr.getName();
203 String attrValue = attr.getValue();
204 if ("class".equals(attrName)){
205 if (attrValue.equalsIgnoreCase("dubious") || attrValue.equalsIgnoreCase("DUBIOUS GENUS") || attrValue.equalsIgnoreCase("DOUBTFUL SPECIES") ){
206 taxon.setDoubtful(true);
207 }else{
208 MarkerType markerType = getMarkerType(state, attrValue);
209 if (markerType == null){
210 logger.warn("Class attribute value for taxon not yet supported: " + attrValue);
211 }else{
212 taxon.addMarker(Marker.NewInstance(markerType, true));
213 }
214 }
215 }else if ("num".equals(attrName)){
216 logger.warn("num not yet supported");
217 }else{
218 logger.warn("Attribute " + attrName + " not yet supported for element taxon");
219 }
220 }
221
222 }
223
224
225 private Taxon getLastTaxon(EfloraImportState state) {
226 if (state.getConfig().getLastTaxonUuid() == null){
227 return null;
228 }else{
229 return (Taxon)getTaxonService().find(state.getConfig().getLastTaxonUuid());
230 }
231 }
232
233
234 // private void invokeDoKey(SapindaceaeImportState state) {
235 // TransactionStatus tx = startTransaction();
236 //
237 // Set<FeatureNode> nodesToSave = new HashSet<FeatureNode>();
238 // ITaxonService taxonService = getTaxonService();
239 // ResultWrapper<Boolean> success = ResultWrapper.NewInstance(true);
240 //
241 // Element elbody= getBodyElement(state.getConfig());
242 // List<Element> elTaxonList = elbody.getChildren();
243 //
244 // int i = 0;
245 //
246 // //for each taxon
247 // for (Element elTaxon : elTaxonList){
248 // if ((i++ % modCount) == 0 && i > 1){ logger.info("Taxa handled: " + (i-1));}
249 // if (! elTaxon.getName().equalsIgnoreCase("taxon")){
250 // continue;
251 // }
252 //
253 // List<Element> children = elTaxon.getChildren("key");
254 // for (Element element : children){
255 // handleKeys(state, element, null);
256 // }
257 // nodesToSave.add(taxon);
258 //
259 // }
260 //
261 // }
262
263
264 // body/taxon/*
265 private void handleTaxonElement(EfloraImportState state, Set<String> unhandledTitleClassess, Set<String> unhandledNomeclatureChildren, Set<String> unhandledDescriptionChildren, Taxon taxon, List<Element> children) {
266 AnnotatableEntity lastEntity = null;
267 for (Element element : children){
268 String elName = element.getName();
269
270 if (elName.equalsIgnoreCase("title")){
271 handleTitle(state, element, taxon, unhandledTitleClassess);
272 lastEntity = null;
273 }else if(elName.equalsIgnoreCase("nomenclature")){
274 handleNomenclature(state, element, taxon, unhandledNomeclatureChildren);
275 lastEntity = null;
276 }else if(elName.equalsIgnoreCase("description")){
277 handleDescription(state, element, taxon, unhandledDescriptionChildren);
278 lastEntity = null;
279 }else if(elName.equalsIgnoreCase("habitatecology")){
280 lastEntity = handleEcology(state, element, taxon);
281 }else if(elName.equalsIgnoreCase("distribution")){
282 lastEntity = handleDistribution(state, element, taxon);
283 }else if(elName.equalsIgnoreCase("uses")){
284 lastEntity = handleUses(state, element, taxon);
285 }else if(elName.equalsIgnoreCase("notes")){
286 lastEntity = handleTaxonNotes(state, element, taxon);
287 }else if(elName.equalsIgnoreCase("chromosomes")){
288 lastEntity = handleChromosomes(state, element, taxon);
289 }else if(elName.equalsIgnoreCase("vernacularnames")){
290 handleVernaculars(state, element, taxon);
291 }else if(elName.equalsIgnoreCase("key")){
292 lastEntity = handleKeys(state, element, taxon);
293 }else if(elName.equalsIgnoreCase("references")){
294 handleReferences(state, element, taxon, lastEntity);
295 lastEntity = null;
296 }else if(elName.equalsIgnoreCase("taxon")){
297 logger.warn("A taxon should not be part of a taxon");
298 }else if(elName.equalsIgnoreCase("homotypes")){
299 logger.warn("Homotypes should be included in the nomenclature flag but is child of taxon [XPath: body/taxon/homotypes]");
300 }else{
301 logger.warn("Unexpected child for taxon: " + elName);
302 }
303 }
304 }
305
306
307 private void handleVernaculars(EfloraImportState state, Element elVernacular, Taxon taxon) {
308 verifyNoAttribute(elVernacular);
309 verifyNoChildren(elVernacular, false);
310 String value = elVernacular.getTextNormalize();
311 Feature feature = Feature.COMMON_NAME();
312 value = replaceStart(value, "Noms vernaculaires");
313 String[] dialects = value.split(";");
314 for (String singleDialect : dialects){
315 handleSingleDialect(taxon, singleDialect, feature, state);
316 }
317 return;
318 }
319
320
321 private void handleSingleDialect(Taxon taxon, String singleDialect, Feature feature, EfloraImportState state) {
322 singleDialect = singleDialect.trim();
323 TaxonDescription description = getDescription(taxon);
324 String reDialect = "\\(dial\\.\\s.*\\)";
325 // String reDialect = "\\(.*\\)";
326 Pattern patDialect = Pattern.compile(reDialect);
327 Matcher matcher = patDialect.matcher(singleDialect);
328 if (matcher.find()){
329 String dialect = singleDialect.substring(matcher.start(), matcher.end());
330 dialect = dialect.replace("(dial. ", "").replace(")", "");
331
332 Language language = null;
333 try {
334 language = this.getLanguage(state, state.getTransformer().getLanguageUuid(dialect), dialect, dialect, dialect);
335 } catch (UndefinedTransformerMethodException e) {
336 logger.error(e.getMessage());
337 }
338
339 String commonNames = singleDialect.substring(0, matcher.start());
340 String[] splitNames = commonNames.split(",");
341 for (String commonNameString : splitNames){
342 commonNameString = commonNameString.trim();
343 CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameString, language);
344 description.addElement(commonName);
345 }
346 }else{
347 logger.warn("No dialect match: " + singleDialect);
348 }
349 }
350
351
352 private void handleReferences(EfloraImportState state, Element elReferences, Taxon taxon, AnnotatableEntity lastEntity) {
353 verifyNoAttribute(elReferences);
354 verifyNoChildren(elReferences, true);
355 String refString = elReferences.getTextNormalize();
356 if (lastEntity == null){
357 logger.warn("No last entity defined: " + refString);
358 return;
359 }
360
361 Annotation annotation = Annotation.NewInstance(refString, AnnotationType.EDITORIAL(), Language.DEFAULT());
362 lastEntity.addAnnotation(annotation);
363 }
364
365
366 private PolytomousKey handleKeys(EfloraImportState state, Element elKey, Taxon taxon) {
367 UnmatchedLeads openKeys = state.getUnmatchedLeads();
368
369 //title
370 String title = makeKeyTitle(elKey);
371
372 //key
373 PolytomousKey key = PolytomousKey.NewTitledInstance(title);
374
375 //TODO add covered taxa etc.
376 verifyNoAttribute(elKey);
377
378 //notes
379 makeKeyNotes(elKey, key);
380
381 //keycouplets
382 List<Element> keychoices = new ArrayList<Element>();
383 keychoices.addAll(elKey.getChildren("keycouplet"));
384 keychoices.addAll(elKey.getChildren("keychoice"));
385
386
387 for (Element elKeychoice : keychoices){
388 handleKeyChoices(state, openKeys, key, elKeychoice, taxon);
389 elKey.removeContent(elKeychoice);
390 }
391
392 //
393 verifyNoChildren(elKey);
394 logger.info("Unmatched leads after key handling:" + openKeys.toString());
395
396
397 if (state.getConfig().isDoPrintKeys()){
398 key.print(System.err);
399 }
400 getPolytomousKeyService().save(key);
401 return key;
402 }
403
404
405 /**
406 * @param state
407 * @param elKey
408 * @param openKeys
409 * @param key
410 * @param elKeychoice
411 * @param taxon
412 */
413 private void handleKeyChoices(EfloraImportState state, UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, Taxon taxon) {
414
415 //char Attribute
416 //TODO it's still unclear if char is a feature and needs to be a new attribute
417 //or if it is handled as question. Therefore both cases are handled but feature
418 //is finally not yet set
419 KeyStatement question = handleKeychoiceChar(state, elKeychoice);
420 Feature feature = handleKeychoiceCharAsFeature(state, elKeychoice);
421
422 //lead
423 List<PolytomousKeyNode> childNodes = handleKeychoiceLeads(state, key, elKeychoice, taxon, question, feature);
424
425 //num -> match with unmatched leads
426 handleKeychoiceNum(openKeys, key, elKeychoice, childNodes);
427
428 //others
429 verifyNoAttribute(elKeychoice);
430 }
431
432
433 /**
434 * @param openKeys
435 * @param key
436 * @param elKeychoice
437 * @param childNodes
438 */
439 private void handleKeychoiceNum(UnmatchedLeads openKeys, PolytomousKey key, Element elKeychoice, List<PolytomousKeyNode> childNodes) {
440 Attribute numAttr = elKeychoice.getAttribute("num");
441 String num = CdmUtils.removeTrailingDot(numAttr == null? "":numAttr.getValue());
442 UnmatchedLeadsKey okk = UnmatchedLeadsKey.NewInstance(key, num);
443 Set<PolytomousKeyNode> matchingNodes = openKeys.getNodes(okk);
444 for (PolytomousKeyNode matchingNode : matchingNodes){
445 for (PolytomousKeyNode childNode : childNodes){
446 matchingNode.addChild(childNode);
447 }
448 openKeys.removeNode(okk, matchingNode);
449 }
450 if (matchingNodes.isEmpty()){
451 for (PolytomousKeyNode childNode : childNodes){
452 key.getRoot().addChild(childNode);
453 }
454 }
455
456 elKeychoice.removeAttribute("num");
457 }
458
459
460 /**
461 * @param state
462 * @param key
463 * @param elKeychoice
464 * @param taxon
465 * @param feature
466 * @return
467 */
468 private List<PolytomousKeyNode> handleKeychoiceLeads( EfloraImportState state, PolytomousKey key, Element elKeychoice, Taxon taxon, KeyStatement question, Feature feature) {
469 List<PolytomousKeyNode> childNodes = new ArrayList<PolytomousKeyNode>();
470 List<Element> leads = elKeychoice.getChildren("lead");
471 for(Element elLead : leads){
472 PolytomousKeyNode childNode = handleLead(state, key, elLead, taxon, question, feature);
473 childNodes.add(childNode);
474 }
475 return childNodes;
476 }
477
478
479 /**
480 * @param state
481 * @param elKeychoice
482 * @return
483 */
484 private KeyStatement handleKeychoiceChar(EfloraImportState state, Element elKeychoice) {
485 KeyStatement statement = null;
486 Attribute charAttr = elKeychoice.getAttribute("char");
487 if (charAttr != null){
488 String charStr = charAttr.getValue();
489 if (StringUtils.isNotBlank(charStr)){
490 statement = KeyStatement.NewInstance(charStr);
491 }
492 elKeychoice.removeAttribute("char");
493 }
494 return statement;
495 }
496
497 /**
498 * @param state
499 * @param elKeychoice
500 * @return
501 */
502 private Feature handleKeychoiceCharAsFeature(EfloraImportState state, Element elKeychoice) {
503 Feature feature = null;
504 Attribute charAttr = elKeychoice.getAttribute("char");
505 if (charAttr != null){
506 String charStr = charAttr.getValue();
507 feature = getFeature(charStr, state);
508 elKeychoice.removeAttribute("char");
509 }
510 return feature;
511 }
512
513
514 private PolytomousKeyNode handleLead(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, KeyStatement question, Feature feature) {
515 PolytomousKeyNode node = PolytomousKeyNode.NewInstance();
516 //TODO the char attribute in the keychoice is more a feature than a question
517 //needs to be discussed on model side
518 node.setQuestion(question);
519 // node.setFeature(feature);
520
521 //text
522 String text = handleLeadText(elLead, node);
523
524 //num
525 handleLeadNum(elLead, text);
526
527 //goto
528 handleLeadGoto(state, key, elLead, taxon, node);
529
530 //others
531 verifyNoAttribute(elLead);
532
533 return node;
534 }
535
536
537 /**
538 * @param elLead
539 * @param node
540 * @return
541 */
542 private String handleLeadText(Element elLead, PolytomousKeyNode node) {
543 String text = elLead.getAttributeValue("text").trim();
544 if (StringUtils.isBlank(text)){
545 logger.warn("Empty text in lead");
546 }
547 elLead.removeAttribute("text");
548 KeyStatement statement = KeyStatement.NewInstance(text);
549 node.setStatement(statement);
550 return text;
551 }
552
553
554 /**
555 * @param state
556 * @param key
557 * @param elLead
558 * @param taxon
559 * @param node
560 */
561 private void handleLeadGoto(EfloraImportState state, PolytomousKey key, Element elLead, Taxon taxon, PolytomousKeyNode node) {
562 Attribute gotoAttr = elLead.getAttribute("goto");
563 if (gotoAttr != null){
564 String strGoto = gotoAttr.getValue().trim();
565 //create key
566 UnmatchedLeadsKey gotoKey = null;
567 if (isInternalNode(strGoto)){
568 gotoKey = UnmatchedLeadsKey.NewInstance(key, strGoto);
569 }else{
570 String taxonKey = makeTaxonKey(strGoto, taxon);
571 gotoKey = UnmatchedLeadsKey.NewInstance(taxonKey);
572 }
573 //
574 UnmatchedLeads openKeys = state.getUnmatchedLeads();
575 if (gotoKey.isInnerLead()){
576 Set<PolytomousKeyNode> existingNodes = openKeys.getNodes(gotoKey);
577 for (PolytomousKeyNode existingNode : existingNodes){
578 node.addChild(existingNode);
579 }
580 }
581 openKeys.addKey(gotoKey, node);
582 //remove attribute (need for consistency check)
583 elLead.removeAttribute("goto");
584 }else{
585 logger.warn("lead has no goto attribute");
586 }
587 }
588
589
590 /**
591 * @param elLead
592 * @param text
593 */
594 private void handleLeadNum(Element elLead, String text) {
595 Attribute numAttr = elLead.getAttribute("num");
596 if (numAttr != null){
597 //TODO num
598 String num = numAttr.getValue();
599 elLead.removeAttribute("num");
600 }else{
601 logger.info("Keychoice has no num attribute: " + text);
602 }
603 }
604
605
606 private String makeTaxonKey(String strGoto, Taxon taxon) {
607 String result = "";
608 if (strGoto == null){
609 return "";
610 }
611 String strGenusName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getGenusOrUninomial();
612 strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", ""); //replace all brackets
613 strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
614
615 strGoto = strGoto.trim();
616 String[] split = strGoto.split("\\s");
617 for (int i = 0; i<split.length; i++){
618 String single = split[i];
619 if (isGenusAbbrev(single, strGenusName)){
620 split[i] = strGenusName;
621 }
622 // if (isInfraSpecificMarker(single)){
623 // String strSpeciesName = CdmBase.deproxy(taxon.getName(), NonViralName.class).getSpecificEpithet();
624 // split[i] = strGenusName + " " + strSpeciesName + " ";
625 // }
626 result = (result + " " + split[i]).trim();
627 }
628 return result;
629 }
630
631
632 private boolean isInfraSpecificMarker(String single) {
633 try {
634 if (Rank.getRankByAbbreviation(single).isInfraSpecific()){
635 return true;
636 }
637 } catch (UnknownCdmTypeException e) {
638 return false;
639 }
640 return false;
641 }
642
643
644 private boolean isGenusAbbrev(String single, String strGenusName) {
645 if (! single.matches("[A-Z]\\.?")) {
646 return false;
647 }else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
648 return false;
649 }else{
650 return single.charAt(0) == strGenusName.charAt(0);
651 }
652 }
653
654
655 private boolean isInternalNode(String strGoto) {
656 return CdmUtils.isNumeric(strGoto);
657 }
658
659
660 private void makeKeyNotes(Element keyElement, PolytomousKey key) {
661 Element elNotes = keyElement.getChild("notes");
662 if (elNotes != null){
663 keyElement.removeContent(elNotes);
664 String notes = elNotes.getTextNormalize();
665 if (StringUtils.isNotBlank(notes)){
666 key.addAnnotation(Annotation.NewInstance(notes, AnnotationType.EDITORIAL(), Language.DEFAULT()));
667 }
668 }
669 }
670
671
672 private String makeKeyTitle(Element keyElement) {
673 String title = "- no title - ";
674 Attribute titleAttr = keyElement.getAttribute("title");
675 keyElement.removeAttribute(titleAttr);
676 if (titleAttr == null){
677 Element elTitle = keyElement.getChild("keytitle");
678 keyElement.removeContent(elTitle);
679 if (elTitle != null){
680 title = elTitle.getTextNormalize();
681 }
682 }else{
683 title = titleAttr.getValue();
684 }
685 return title;
686 }
687
688
689 /**
690 * @param state
691 * @param element
692 * @param taxon
693 */
694 private TextData handleChromosomes(EfloraImportState state, Element element, Taxon taxon) {
695 Feature chromosomeFeature = getFeature("chromosomes", state);
696 verifyNoAttribute(element);
697 verifyNoChildren(element);
698 String value = element.getTextNormalize();
699 value = replaceStart(value, "Chromosomes");
700 String chromosomesPart = getChromosomesPart(value);
701 String references = value.replace(chromosomesPart, "").trim();
702 chromosomesPart = chromosomesPart.replace(":", "").trim();
703 return addDescriptionElement(state, taxon, chromosomesPart, chromosomeFeature, references);
704 }
705
706
707 /**
708 * @param ref
709 * @param string
710 * @return
711 */
712 private void makeOriginalSourceReferences(ISourceable sourcable, String splitter, String refAll) {
713 String[] splits = refAll.split(splitter);
714 for (String strRef: splits){
715 Reference ref = ReferenceFactory.newGeneric();
716 ref.setTitleCache(strRef, true);
717 String refDetail = parseReferenceYearAndDetail(ref);
718 sourcable.addSource(null, null, ref, refDetail);
719 }
720
721
722 //TODO use regex instead
723 /* String detailResult = null;
724 String titleToParse = ref.getTitleCache();
725 String reReference = "^\\.{1,}";
726 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
727 String reYear = "\\([1-2]{1}[0-9]{3}\\)";
728 String reYearPeriod = reYear + "(-" + reYear + ")+";
729 String reDetail = "\\.{1,10}$";
730 */
731 }
732
733
734 /**
735 * @param value
736 * @return
737 */
738 private String getChromosomesPart(String str) {
739 Pattern pattern = Pattern.compile("2n\\s*=\\s*\\d{1,2}:");
740 Matcher matcher = pattern.matcher(str);
741 if (matcher.find()){
742 return matcher.group(0);
743 }else{
744 logger.warn("Chromosomes could not be parsed: " + str);
745 }
746 return str;
747 }
748
749
750 /**
751 * @param state
752 * @param element
753 * @param taxon
754 */
755 private TextData handleTaxonNotes(EfloraImportState state, Element element, Taxon taxon) {
756 TextData result = null;
757 verifyNoChildren(element, true);
758 //verifyNoAttribute(element);
759 List<Attribute> attributes = element.getAttributes();
760 for (Attribute attribute : attributes){
761 if (! attribute.getName().equalsIgnoreCase("class")){
762 logger.warn("Char has unhandled attribute " + attribute.getName());
763 }else{
764 String classValue = attribute.getValue();
765 result = handleDescriptiveElement(state, element, taxon, classValue);
766 }
767 }
768 //if no class attribute exists, handle as note
769 if (attributes.isEmpty()){
770 result = handleDescriptiveElement(state, element, taxon, "Note");
771 }
772
773 //Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.ENGLISH());
774 //taxon.addAnnotation(annotation);
775 return result; //annotation;
776 }
777
778
779 /**
780 * @param state
781 * @param element
782 * @param taxon
783 * @param result
784 * @param attribute
785 * @return
786 */
787 private TextData handleDescriptiveElement(EfloraImportState state, Element element, Taxon taxon, String classValue) {
788 TextData result = null;
789 Feature feature = getFeature(classValue, state);
790 if (feature == null){
791 logger.warn("Unhandled feature: " + classValue);
792 }else{
793 String value = element.getValue();
794 value = replaceStart(value, "Notes");
795 value = replaceStart(value, "Note");
796 result = addDescriptionElement(state, taxon, value, feature, null);
797 }
798 return result;
799 }
800
801
802 private void removeBr(Element element) {
803 element.removeChildren("Br");
804 element.removeChildren("br");
805 element.removeChildren("BR");
806 }
807
808
809 /**
810 * @param state
811 * @param element
812 * @param taxon
813 */
814 private TextData handleUses(EfloraImportState state, Element element, Taxon taxon) {
815 verifyNoAttribute(element);
816 verifyNoChildren(element, true);
817 String value = element.getTextNormalize();
818 value = replaceStart(value, "Uses");
819 Feature feature = Feature.USES();
820 return addDescriptionElement(state, taxon, value, feature, null);
821
822 }
823
824
825 /**
826 * @param state
827 * @param element
828 * @param taxon
829 * @param unhandledDescriptionChildren
830 */
831 private DescriptionElementBase handleDistribution(EfloraImportState state, Element element, Taxon taxon) {
832 verifyNoAttribute(element);
833 verifyNoChildren(element, true);
834 String value = element.getTextNormalize();
835 value = replaceStart(value, "Distribution");
836 Feature feature = Feature.DISTRIBUTION();
837 //distribution parsing almost impossible as there is lots of freetext in the distribution tag
838 return addDescriptionElement(state, taxon, value, feature, null);
839 }
840
841
842 /**
843 * @param state
844 * @param element
845 * @param taxon
846 * @param unhandledDescriptionChildren
847 */
848 private TextData handleEcology(EfloraImportState state, Element elEcology, Taxon taxon) {
849 verifyNoAttribute(elEcology);
850 verifyNoChildren(elEcology, true);
851 String value = elEcology.getTextNormalize();
852 Feature feature = Feature.ECOLOGY();
853 if (value.startsWith("Habitat & Ecology")){
854 feature = getFeature("Habitat & Ecology", state);
855 value = replaceStart(value, "Habitat & Ecology");
856 }else if (value.startsWith("Habitat")){
857 value = replaceStart(value, "Habitat");
858 feature = getFeature("Habitat", state);
859 }
860 return addDescriptionElement(state, taxon, value, feature, null);
861 }
862
863
864
865 /**
866 * @param value
867 * @param replacementString
868 */
869 private String replaceStart(String value, String replacementString) {
870 if (value.startsWith(replacementString) ){
871 value = value.substring(replacementString.length()).trim();
872 }
873 while (value.startsWith("-") || value.startsWith("–") ){
874 value = value.substring("-".length()).trim();
875 }
876 return value;
877 }
878
879
880 /**
881 * @param value
882 * @param replacementString
883 */
884 protected String removeTrailing(String value, String replacementString) {
885 if (value == null){
886 return null;
887 }
888 if (value.endsWith(replacementString) ){
889 value = value.substring(0, value.length() - replacementString.length()).trim();
890 }
891 return value;
892 }
893
894 /**
895 * @param state
896 * @param element
897 * @param taxon
898 * @param unhandledNomeclatureChildren
899 */
900 private void handleNomenclature(EfloraImportState state, Element elNomenclature, Taxon taxon, Set<String> unhandledChildren) {
901 verifyNoAttribute(elNomenclature);
902
903 List<Element> elements = elNomenclature.getChildren();
904 for (Element element : elements){
905 if (element.getName().equals("homotypes")){
906 handleHomotypes(state, element, taxon);
907 }else if (element.getName().equals("notes")){
908 handleNomenclatureNotes(state, element, taxon);
909 }else{
910 unhandledChildren.add(element.getName());
911 }
912 }
913
914 }
915
916
917
918 private void handleNomenclatureNotes(EfloraImportState state, Element elNotes, Taxon taxon) {
919 verifyNoAttribute(elNotes);
920 verifyNoChildren(elNotes);
921 String notesText = elNotes.getTextNormalize();
922 Annotation annotation = Annotation.NewInstance(notesText, AnnotationType.EDITORIAL(), Language.DEFAULT());
923 taxon.addAnnotation(annotation);
924 }
925
926
927
928 private static Set<String> unhandledHomotypeChildren = new HashSet<String>();
929 /**
930 * @param state
931 * @param element
932 * @param taxon
933 */
934 private void handleHomotypes(EfloraImportState state, Element elHomotypes, Taxon taxon) {
935 verifyNoAttribute(elHomotypes);
936
937 List<Element> elements = elHomotypes.getChildren();
938 HomotypicalGroup homotypicalGroup = null;
939 for (Element element : elements){
940 if (element.getName().equals("nom")){
941 homotypicalGroup = handleNom(state, element, taxon, homotypicalGroup);
942 }else{
943 unhandledHomotypeChildren.add(element.getName());
944 }
945 }
946
947 }
948
949 private static Set<String> unhandledNomChildren = new HashSet<String>();
950
951 /**
952 * @param state
953 * @param element
954 * @param taxon
955 */
956 private HomotypicalGroup handleNom(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
957 List<Attribute> attributes = elNom.getAttributes();
958
959 boolean taxonBaseClassType = false;
960 for (Attribute attribute : attributes){
961 if (! attribute.getName().equalsIgnoreCase("class")){
962 logger.warn("Nom has unhandled attribute " + attribute.getName());
963 }else{
964 String classValue = attribute.getValue();
965 if (classValue.equalsIgnoreCase("acceptedname")){
966 homotypicalGroup = handleNomTaxon(state, elNom, taxon,homotypicalGroup, false);
967 taxonBaseClassType = true;
968 }else if (classValue.equalsIgnoreCase("synonym")){
969 homotypicalGroup = handleNomTaxon(state, elNom, taxon, homotypicalGroup, true);
970 taxonBaseClassType = true;
971 }else if (classValue.equalsIgnoreCase("typeref")){
972 handleTypeRef(state, elNom, taxon, homotypicalGroup);
973 }else{
974 logger.warn("Unhandled class value for nom: " + classValue);
975 }
976
977 }
978 }
979
980 List<Element> elements = elNom.getChildren();
981 for (Element element : elements){
982 if (element.getName().equals("name") || element.getName().equals("homonym") ){
983 if (taxonBaseClassType == false){
984 logger.warn("Name or homonym tag not allowed in non taxon nom tag");
985 }
986 }else{
987 unhandledNomChildren.add(element.getName());
988 }
989 }
990
991 return homotypicalGroup;
992
993 }
994
995 /**
996 * @param state
997 * @param elNom
998 * @param taxon
999 * @param homotypicalGroup
1000 */
1001 protected void handleTypeRef(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup) {
1002 verifyNoChildren(elNom);
1003 String typeRef = elNom.getTextNormalize();
1004 typeRef = removeStartingTypeRefMinus(typeRef);
1005
1006 String[] split = typeRef.split(":");
1007 if (split.length < 2){
1008 logger.warn("typeRef has no ':' : " + typeRef);
1009 }else if (split.length > 2){
1010 logger.warn("typeRef has more than 1 ':' : " + typeRef);
1011 }else{
1012 StringBuffer typeType = new StringBuffer(split[0]);
1013 String typeText = split[1].trim();
1014 TypeDesignationBase typeDesignation = getTypeDesignationAndReference(typeType);
1015
1016 //Name Type Desitnations
1017 if (typeDesignation instanceof NameTypeDesignation){
1018 makeNameTypeDesignations(typeType, typeText, typeDesignation);
1019 }
1020 //SpecimenTypeDesignations
1021 else if (typeDesignation instanceof SpecimenTypeDesignation){
1022 makeSpecimenTypeDesignation(typeType, typeText, typeDesignation);
1023 }else{
1024 logger.error("Unhandled type designation class" + typeDesignation.getClass().getName());
1025 }
1026 for (TaxonNameBase name : homotypicalGroup.getTypifiedNames()){
1027 name.addTypeDesignation(typeDesignation, true);
1028 }
1029 }
1030 }
1031
1032
1033 /**
1034 * @param typeRef
1035 * @return
1036 */
1037 protected String removeStartingTypeRefMinus(String typeRef) {
1038 typeRef = replaceStart(typeRef, "-");
1039 typeRef = replaceStart(typeRef, "—");
1040 typeRef = replaceStart(typeRef, "\u002d");
1041 typeRef = replaceStart(typeRef, "\u2013");
1042 typeRef = replaceStart(typeRef, "--");
1043 return typeRef;
1044 }
1045
1046 /**
1047 * @param typeType
1048 * @param typeText
1049 * @param typeDesignation
1050 */
1051 private void makeNameTypeDesignations(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1052 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1053 //do nothing
1054 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1055 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1056 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype")){
1057 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1058 }else{
1059 logger.warn("Unhandled type string: " + typeType + "(" + CharUtils.unicodeEscaped(typeType.charAt(0)) + ")");
1060 }
1061 //clean
1062 typeText = cleanNameType(typeText);
1063 //create name
1064 BotanicalName nameType = (BotanicalName)parser.parseFullName(typeText, NomenclaturalCode.ICBN, Rank.SPECIES());
1065 ((NameTypeDesignation) typeDesignation).setTypeName(nameType);
1066 //TODO wie können NameTypes den Namen zugeordnet werden? - wird aber vom Portal via NameCache matching gemacht
1067 }
1068
1069
1070 private String cleanNameType(String typeText) {
1071 String result;
1072 String[] split = typeText.split("\\[.*\\].?");
1073 result = split[0];
1074 return result;
1075 }
1076
1077
1078 /**
1079 * @param typeType
1080 * @param typeText
1081 * @param typeDesignation
1082 */
1083 protected void makeSpecimenTypeDesignation(StringBuffer typeType, String typeText, TypeDesignationBase typeDesignation) {
1084 if (typeType.toString().trim().equalsIgnoreCase("Type")){
1085 //do nothing
1086 }else if (typeType.toString().trim().equalsIgnoreCase("Neotype") || typeType.toString().trim().equalsIgnoreCase("Neotypes")){
1087 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.NEOTYPE());
1088 }else if (typeType.toString().trim().equalsIgnoreCase("Syntype") || typeType.toString().trim().equalsIgnoreCase("Syntypes")){
1089 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.SYNTYPE());
1090 }else if (typeType.toString().trim().equalsIgnoreCase("Lectotype")){
1091 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.LECTOTYPE());
1092 }else if (typeType.toString().trim().equalsIgnoreCase("Paratype")){
1093 typeDesignation.setTypeStatus(SpecimenTypeDesignationStatus.PARATYPE());
1094 }else{
1095 logger.warn("Unhandled type string: " + typeType);
1096 }
1097 Specimen specimen = Specimen.NewInstance();
1098 if (typeText.length() > 255){
1099 specimen.setTitleCache(typeText.substring(0, 252) + "...", true);
1100 }else{
1101 specimen.setTitleCache(typeText, true);
1102 }
1103 specimen.addDefinition(typeText, Language.ENGLISH());
1104 ((SpecimenTypeDesignation) typeDesignation).setTypeSpecimen(specimen);
1105 }
1106
1107 private TypeDesignationBase getTypeDesignationAndReference(StringBuffer typeType) {
1108 TypeDesignationBase result;
1109 Reference ref = parseTypeDesignationReference(typeType);
1110 if (typeType.indexOf(" species")>-1 || typeType.indexOf("genus")>-1){
1111 if (typeType.indexOf(" species")>-1 ){
1112 result = NameTypeDesignation.NewInstance();
1113 int start = typeType.indexOf(" species");
1114 typeType.replace(start, start + " species".length(), "");
1115 }else {
1116 result = NameTypeDesignation.NewInstance();
1117 int start = typeType.indexOf(" genus");
1118 typeType.replace(start, start + " genus".length(), "");
1119 }
1120 }else{
1121 result = SpecimenTypeDesignation.NewInstance();
1122 }
1123 result.setCitation(ref);
1124 return result;
1125 }
1126
1127
1128 private Reference parseTypeDesignationReference(StringBuffer typeType) {
1129 Reference result = null;
1130 String reBracketReference = "\\(.*\\)";
1131 Pattern patBracketReference = Pattern.compile(reBracketReference);
1132 Matcher matcher = patBracketReference.matcher(typeType);
1133 if (matcher.find()){
1134 String refString = matcher.group();
1135 int start = typeType.indexOf(refString);
1136 typeType.replace(start, start + refString.length(), "");
1137 refString = refString.replace("(", "").replace(")", "").trim();
1138 Reference ref = ReferenceFactory.newGeneric();
1139 ref.setTitleCache(refString, true);
1140 result = ref;
1141 }
1142 return result;
1143 }
1144
1145
1146 /**
1147 * @param state
1148 * @param elNom
1149 * @param taxon
1150 */
1151 //body/taxon/
1152 private HomotypicalGroup handleNomTaxon(EfloraImportState state, Element elNom, Taxon taxon, HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1153 NonViralName name = makeName(taxon, homotypicalGroup, isSynonym);
1154 String num = null;
1155
1156 boolean hasGenusInfo = false;
1157 TeamOrPersonBase lastTeam = null;
1158
1159 //genus
1160 List<Element> elGenus = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "genus");
1161 if (elGenus.size() > 0){
1162 hasGenusInfo = true;
1163 }else{
1164 logger.debug ("No Synonym Genus");
1165 }
1166 //infra rank -> needed to handle authors correctly
1167 List<Element> elInfraRank = XmlHelp.getAttributedChildListWithValue(elNom, "name", "class", "infrank");
1168 Rank infraRank = null;
1169 infraRank = handleInfRank(name, elInfraRank, infraRank);
1170
1171 //get left over elements
1172 List<Element> elements = elNom.getChildren();
1173 elements.removeAll(elInfraRank);
1174
1175 for (Element element : elements){
1176 if (element.getName().equals("name")){
1177 String classValue = element.getAttributeValue("class");
1178 String value = element.getValue().trim();
1179 if (classValue.equalsIgnoreCase("genus") || classValue.equalsIgnoreCase("family") ){
1180 name.setGenusOrUninomial(value);
1181 }else if (classValue.equalsIgnoreCase("family") ){
1182 name.setGenusOrUninomial(value);
1183 name.setRank(Rank.FAMILY());
1184 }else if (classValue.equalsIgnoreCase("subgenus")){
1185 //name.setInfraGenericEpithet(value);
1186 name.setNameCache(value.replace(":", "").trim());
1187 name.setRank(Rank.SUBGENUS());
1188 }else if (classValue.equalsIgnoreCase("epithet") ){
1189 if (hasGenusInfo == true){
1190 name.setSpecificEpithet(value);
1191 }else{
1192 handleInfraspecificEpithet(element, classValue, name);
1193 }
1194 }else if (classValue.equalsIgnoreCase("author")){
1195 handleNameAuthors(element, name);
1196 }else if (classValue.equalsIgnoreCase("paraut")){
1197 handleBasionymAuthor(state, element, name, false);
1198 }else if (classValue.equalsIgnoreCase("infrauthor") || classValue.equalsIgnoreCase("infraut")){
1199 handleInfrAuthor(state, element, name, true);
1200 }else if (classValue.equalsIgnoreCase("infrapar") || classValue.equalsIgnoreCase("infrpar") || classValue.equalsIgnoreCase("parauthor") ){
1201 handleBasionymAuthor(state, element, name, true);
1202 }else if (classValue.equalsIgnoreCase("infrepi")){
1203 handleInfrEpi(name, infraRank, value);
1204 }else if (classValue.equalsIgnoreCase("pub")){
1205 lastTeam = handleNomenclaturalReference(name, value);
1206 }else if (classValue.equalsIgnoreCase("usage")){
1207 lastTeam = handleNameUsage(taxon, name, value, lastTeam);
1208 }else if (classValue.equalsIgnoreCase("note")){
1209 handleNameNote(name, value);
1210 }else if (classValue.equalsIgnoreCase("num")){
1211 if (num != null){
1212 logger.warn("Duplicate num: " + value);
1213 }else{
1214 num = value;
1215 }
1216 if (isSynonym == true){
1217 logger.warn("Synonym should not have a num");
1218 }
1219 }else if (classValue.equalsIgnoreCase("typification")){
1220 logger.warn("Typification should not be a nom class");
1221 }else{
1222 logger.warn("Unhandled name class: " + classValue);
1223 }
1224 }else if(element.getName().equals("homonym")){
1225 handleHomonym(state, element, name);
1226 }else{
1227 // child element is not "name"
1228 unhandledNomChildren.add(element.getName());
1229 }
1230 }
1231
1232 //handle key
1233 if (! isSynonym){
1234 String taxonString = name.getNameCache();
1235 //try to find matching lead nodes
1236 UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, taxonString);
1237 Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
1238 //same without using the num
1239 if (num != null){
1240 UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", taxonString);
1241 handleMatchingNodes(state, taxon, noNumLeadsKey);
1242 }
1243 if (matchingNodes.isEmpty() && num != null){
1244 logger.warn("Taxon has num but no matching nodes exist: " + num+ ", Key: " + leadsKey.toString());
1245 }
1246 }
1247
1248 //test nom element has no text
1249 if (StringUtils.isNotBlank(elNom.getTextNormalize().replace("—", "").replace("\u002d","").replace("\u2013", ""))){
1250 String strElNom = elNom.getTextNormalize();
1251 if ("?".equals(strElNom)){
1252 handleQuestionMark(name, taxon);
1253 }
1254 // Character c = strElNom.charAt(0);
1255 //System.out.println(CharUtils.unicodeEscaped(c));
1256 logger.warn("Nom tag has text: " + strElNom);
1257 }
1258
1259 return name.getHomotypicalGroup();
1260 }
1261
1262
1263 private void handleQuestionMark(NonViralName name, Taxon taxon) {
1264 int count = name.getTaxonBases().size();
1265 if (count != 1){
1266 logger.warn("Name has " + count + " taxa. This is not handled for question mark");
1267 }else{
1268 TaxonBase taxonBase = (TaxonBase)name.getTaxonBases().iterator().next();
1269 taxonBase.setDoubtful(true);
1270 }
1271 }
1272
1273
1274 //merge with handleNomTaxon
1275 private void handleHomonym(EfloraImportState state, Element elHomonym, NonViralName upperName) {
1276 verifyNoAttribute(elHomonym);
1277
1278 //hommonym name
1279 BotanicalName homonymName = BotanicalName.NewInstance(upperName.getRank());
1280 homonymName.setGenusOrUninomial(upperName.getGenusOrUninomial());
1281 homonymName.setInfraGenericEpithet(upperName.getInfraGenericEpithet());
1282 homonymName.setSpecificEpithet(upperName.getSpecificEpithet());
1283 homonymName.setInfraSpecificEpithet(upperName.getInfraSpecificEpithet());
1284
1285 for (Element elName : (List<Element>)elHomonym.getChildren("name")){
1286 String classValue = elName.getAttributeValue("class");
1287 String value = elName.getValue().trim();
1288 if (classValue.equalsIgnoreCase("genus") ){
1289 homonymName.setGenusOrUninomial(value);
1290 }else if (classValue.equalsIgnoreCase("epithet") ){
1291 homonymName.setSpecificEpithet(value);
1292 }else if (classValue.equalsIgnoreCase("author")){
1293 handleNameAuthors(elName, homonymName);
1294 }else if (classValue.equalsIgnoreCase("paraut")){
1295 handleBasionymAuthor(state, elName, homonymName, true);
1296 }else if (classValue.equalsIgnoreCase("pub")){
1297 handleNomenclaturalReference(homonymName, value);
1298 }else if (classValue.equalsIgnoreCase("note")){
1299 handleNameNote(homonymName, value);
1300 }else{
1301 logger.warn("Unhandled class value: " + classValue);
1302 }
1303 }
1304 //TODO verify other information
1305
1306
1307 //rel
1308 boolean homonymIsLater = false;
1309 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1310 if (upperName.getNomenclaturalReference() != null && homonymName.getNomenclaturalReference() != null){
1311 TimePeriod homonymYear = homonymName.getNomenclaturalReference().getDatePublished();
1312 TimePeriod nameYear = upperName.getNomenclaturalReference().getDatePublished();
1313 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1314 }else{
1315 if (upperName.getNomenclaturalReference() == null){
1316 logger.warn("Homonym parent does not have a nomenclatural reference or year: " + upperName.getTitleCache());
1317 }
1318 if (homonymName.getNomenclaturalReference() == null){
1319 logger.warn("Homonym does not have a nomenclatural reference or year: " + homonymName.getTitleCache());
1320 }
1321 }
1322 if (homonymIsLater){
1323 homonymName.addRelationshipToName(upperName, relType, null);
1324 }else{
1325 upperName.addRelationshipToName(homonymName, relType, null);
1326 }
1327
1328 }
1329
1330
1331 /**
1332 * @param state
1333 * @param taxon
1334 * @param leadsKey
1335 * @return
1336 */
1337 private Set<PolytomousKeyNode> handleMatchingNodes(EfloraImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
1338 Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
1339 for (PolytomousKeyNode matchingNode : matchingNodes){
1340 state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
1341 matchingNode.setTaxon(taxon);
1342 state.getPolytomousKeyNodesToSave().add(matchingNode);
1343 }
1344 return matchingNodes;
1345 }
1346
1347
1348 private void handleNameNote(NonViralName name, String value) {
1349 logger.warn("Name note: " + value + ". Available in portal?");
1350 Annotation annotation = Annotation.NewInstance(value, AnnotationType.EDITORIAL(), Language.DEFAULT());
1351 name.addAnnotation(annotation);
1352 }
1353
1354
1355 /**
1356 * @param taxon
1357 * @param name
1358 * @param value
1359 */
1360 protected TeamOrPersonBase handleNameUsage(Taxon taxon, NonViralName name, String referenceTitle, TeamOrPersonBase lastTeam) {
1361 Reference ref = ReferenceFactory.newGeneric();
1362 referenceTitle = removeStartingSymbols(referenceTitle, ref);
1363
1364 ref.setTitleCache(referenceTitle, true);
1365 String microReference = parseReferenceYearAndDetail(ref);
1366 TeamOrPersonBase team = getReferenceAuthor(ref);
1367 parseReferenceType(ref);
1368 if (team == null){
1369 team = lastTeam;
1370 }
1371 ref.setAuthorTeam(team);
1372
1373 TaxonDescription description = getDescription(taxon);
1374 TextData textData = TextData.NewInstance(Feature.CITATION());
1375 textData.addSource(null, null, ref, microReference, name, null);
1376 description.addElement(textData);
1377 return team;
1378 }
1379
1380
1381 /**
1382 * @param referenceTitle
1383 * @param ref
1384 * @return
1385 */
1386 private String removeStartingSymbols(String referenceTitle, Reference ref) {
1387 if (referenceTitle.startsWith(";") || referenceTitle.startsWith(",") || referenceTitle.startsWith(":")){
1388 referenceTitle = referenceTitle.substring(1).trim();
1389 ref.setTitleCache(referenceTitle);
1390 }
1391 return referenceTitle;
1392 }
1393
1394
1395 private void parseReferenceType(Reference ref) {
1396 String title = ref.getTitle();
1397 if (title == null){
1398 return;
1399 }
1400 title = title.trim();
1401 //no in reference
1402 if (! title.startsWith("in ")){
1403 ref.setType(ReferenceType.Book);
1404 return;
1405 }
1406
1407 title = title.substring(3);
1408 //in reference
1409 //no ,
1410 if (title.indexOf(",") == -1){
1411 ref.setType(ReferenceType.Article);
1412 IJournal journal = ReferenceFactory.newJournal();
1413 journal.setTitle(title);
1414 ref.setTitle(null);
1415 ref.setInJournal(journal);
1416 //return;
1417 }else{
1418 //,-references
1419 ref.setType(ReferenceType.BookSection);
1420 String[] split = (title).split(",\\s*[A-Z]");
1421 if (split.length <= 1){
1422 logger.warn("Can not fully decide what reference type. Guess it is a book section: " + title );
1423 }
1424 IBook book = ReferenceFactory.newBook();
1425 Team bookTeam = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1426 try {
1427 title = title.substring(split[0].length() + 1).trim();
1428 } catch (Exception e) {
1429 logger.error("ERROR occurred when trying to split title: " + title + "; split[0]: + " + split[0]);
1430 }
1431 book.setTitle(title);
1432 book.setAuthorTeam(bookTeam);
1433 book.setDatePublished(ref.getDatePublished());
1434 ref.setTitle(null);
1435 ref.setInBook(book);
1436 }
1437 }
1438
1439
1440 protected Team getReferenceAuthor (Reference ref) {
1441 boolean isCache = false;
1442 String referenceTitle = ref.getTitle();
1443 if (referenceTitle == null){
1444 isCache = true;
1445 referenceTitle = ref.getTitleCache();
1446 }
1447 //in references
1448 String[] split = (" " + referenceTitle).split(" in ");
1449 if (split.length > 1){
1450 if (StringUtils.isNotBlank(split[0])){
1451 //' in ' is within the reference string, take the preceding string as the team
1452 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1453 if (! isCache){
1454 ref.setTitle("in " + split[1]);
1455 }
1456 return team;
1457 }else{
1458 //string starts with in therefore no author is given
1459 return null;
1460 }
1461 }
1462 //no ,-reference
1463 split = referenceTitle.split(",");
1464 if (split.length < 2){
1465 //no author is given
1466 return null;
1467 }
1468
1469 //,-references
1470 split = (referenceTitle).split(",\\s*[A-Z]");
1471 if (split.length > 1){
1472 Team team = Team.NewTitledInstance(split[0].trim(), split[0].trim());
1473 if (! isCache){
1474 ref.setTitle(referenceTitle.substring(split[0].length()+1).trim());
1475 }
1476 return team;
1477 }else{
1478 logger.warn("Can't decide if a usage has an author: " + referenceTitle );
1479 return null;
1480 }
1481 }
1482
1483
1484 /**
1485 * Replaced by <homonym> tag but still in use for exceptions
1486 * @param detail
1487 * @param name
1488 * @return
1489 */
1490 protected String parseHomonym(String detail, NonViralName name) {
1491 String result;
1492 if (detail == null){
1493 return detail;
1494 }
1495
1496
1497 //non RE
1498 String reNon = "(\\s|,)non\\s";
1499 Pattern patReference = Pattern.compile(reNon);
1500 Matcher matcher = patReference.matcher(detail);
1501 if (matcher.find()){
1502 int start = matcher.start();
1503 int end = matcher.end();
1504
1505 if (detail != null){
1506 logger.warn("Unhandled non part: " + detail.substring(start));
1507 return detail;
1508 }
1509
1510 result = detail.substring(0, start);
1511
1512 //homonym string
1513 String homonymString = detail.substring(end);
1514
1515 //hommonym name
1516 BotanicalName homonymName = BotanicalName.NewInstance(name.getRank());
1517 homonymName.setGenusOrUninomial(name.getGenusOrUninomial());
1518 homonymName.setInfraGenericEpithet(name.getInfraGenericEpithet());
1519 homonymName.setSpecificEpithet(name.getSpecificEpithet());
1520 homonymName.setInfraSpecificEpithet(name.getInfraSpecificEpithet());
1521 Reference homonymNomRef = ReferenceFactory.newGeneric();
1522 homonymNomRef.setTitleCache(homonymString);
1523 String homonymNomRefDetail = parseReferenceYearAndDetail(homonymNomRef);
1524 homonymName.setNomenclaturalMicroReference(homonymNomRefDetail);
1525 String authorTitle = homonymNomRef.getTitleCache();
1526 Team team = Team.NewTitledInstance(authorTitle, authorTitle);
1527 homonymNomRef.setAuthorTeam(team);
1528 homonymNomRef.setTitle("");
1529 homonymNomRef.setProtectedTitleCache(false);
1530
1531 //rel
1532 boolean homonymIsLater = false;
1533 NameRelationshipType relType = NameRelationshipType.LATER_HOMONYM();
1534 TimePeriod homonymYear = homonymNomRef.getDatePublished();
1535 if (name.getNomenclaturalReference() != null){
1536 TimePeriod nameYear = name.getNomenclaturalReference().getDatePublished();
1537 homonymIsLater = homonymYear.getStart().compareTo(nameYear.getStart()) > 0;
1538 }else{
1539 logger.warn("Classification name has no nomenclatural reference");
1540 }
1541 if (homonymIsLater){
1542 homonymName.addRelationshipToName(name, relType, null);
1543 }else{
1544 name.addRelationshipToName(homonymName, relType, null);
1545 }
1546
1547 }else{
1548 return detail;
1549 }
1550 return result;
1551 }
1552
1553
1554 /**
1555 * @Xpath body/taxon/nomenclature/homotypes/nom/name[@class="pub"]
1556 * @param name
1557 * @param value
1558 */
1559 protected TeamOrPersonBase handleNomenclaturalReference(NonViralName name, String value) {
1560 Reference nomRef = ReferenceFactory.newGeneric();
1561 nomRef.setTitleCache(value, true);
1562 parseNomStatus(nomRef, name);
1563 String microReference = parseReferenceYearAndDetail(nomRef);
1564 name.setNomenclaturalReference(nomRef);
1565 microReference = parseHomonym(microReference, name);
1566 name.setNomenclaturalMicroReference(microReference);
1567 TeamOrPersonBase team = (TeamOrPersonBase)name.getCombinationAuthorTeam();
1568 if (team == null){
1569 logger.warn("Name has nom. ref. but no author team. Name: " + name.getTitleCache() + ", Nom.Ref.: " + value);
1570 }else{
1571 nomRef.setAuthorTeam(team);
1572 }
1573 return team;
1574 }
1575
1576 private void handleInfrAuthor(EfloraImportState state, Element elAuthor, NonViralName name, boolean overwrite) {
1577 String strAuthor = elAuthor.getValue().trim();
1578 if (strAuthor.endsWith(",")){
1579 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1580 }
1581 TeamOrPersonBase[] team = getTeam(strAuthor);
1582 if (name.getCombinationAuthorTeam() != null && overwrite == false){
1583 logger.warn("Try to write combination author for a name that already has a combination author. Neglected.");
1584 }else{
1585 name.setCombinationAuthorTeam(team[0]);
1586 name.setExCombinationAuthorTeam(team[1]);
1587 }
1588
1589
1590 }
1591
1592
1593 /**
1594 * Sets the names rank according to the infrank value
1595 * @param name
1596 * @param elements
1597 * @param elInfraRank
1598 * @param infraRank
1599 * @return
1600 */
1601 private Rank handleInfRank(NonViralName name, List<Element> elInfraRank, Rank infraRank) {
1602 if (elInfraRank.size() == 1){
1603 String strRank = elInfraRank.get(0).getTextNormalize();
1604 try {
1605 infraRank = Rank.getRankByNameOrAbbreviation(strRank);
1606 } catch (UnknownCdmTypeException e) {
1607 try{
1608 infraRank = Rank.getRankByNameOrAbbreviation(strRank + ".");
1609 } catch (UnknownCdmTypeException e2) {
1610 logger.warn("Unknown infrank " + strRank + ". Set infraRank to (null).");
1611 }
1612 }
1613 }else if (elInfraRank.size() > 1){
1614 logger.warn ("There is more than 1 infrank");
1615 }
1616 if (infraRank != null){
1617 name.setRank(infraRank);
1618 }
1619 return infraRank;
1620 }
1621
1622
1623 private void handleInfrEpi(NonViralName name, Rank infraRank, String value) {
1624 if (infraRank != null && infraRank.isInfraSpecific()){
1625 name.setInfraSpecificEpithet(value);
1626 if (CdmUtils.isCapital(value)){
1627 logger.warn("Infraspecific epithet starts with a capital letter: " + value);
1628 }
1629 }else if (infraRank != null && infraRank.isInfraGeneric()){
1630 name.setInfraGenericEpithet(value);
1631 if (! CdmUtils.isCapital(value)){
1632 logger.warn("Infrageneric epithet does not start with a capital letter: " + value);
1633 }
1634 }else{
1635 logger.warn("Infrepi could not be handled: " + value);
1636 }
1637 }
1638
1639
1640
1641 /**
1642 * Returns the (empty) with the correct homotypical group depending on the taxon status
1643 * @param taxon
1644 * @param homotypicalGroup
1645 * @param isSynonym
1646 * @return
1647 */
1648 private NonViralName makeName(Taxon taxon,HomotypicalGroup homotypicalGroup, boolean isSynonym) {
1649 NonViralName name;
1650 if (isSynonym){
1651 name = BotanicalName.NewInstance(Rank.SPECIES(), homotypicalGroup);
1652 SynonymRelationshipType synonymType = SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF();
1653 if (taxon.getHomotypicGroup().equals(homotypicalGroup)){
1654 synonymType = SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF();
1655 }
1656 taxon.addSynonymName(name, synonymType);
1657 }else{
1658 name = (NonViralName)taxon.getName();
1659 }
1660 return name;
1661 }
1662
1663
1664 /**
1665 * @param element
1666 * @param taxon
1667 */
1668 private void handleInfraspecificEpithet(Element element, String attrValue, NonViralName name) {
1669 String value = element.getTextNormalize();
1670 if (value.indexOf("subsp.") != -1){
1671 //TODO genus and species epi
1672 String infrEpi = value.substring(value.indexOf("subsp.") + 6).trim();
1673 name.setInfraSpecificEpithet(infrEpi);
1674 name.setRank(Rank.SUBSPECIES());
1675 }else if (value.indexOf("var.") != -1){
1676 //TODO genus and species epi
1677 String infrEpi = value.substring(value.indexOf("var.") + 4).trim();
1678 name.setInfraSpecificEpithet(infrEpi);
1679 name.setRank(Rank.VARIETY());
1680 }else{
1681 logger.warn("Unhandled infraspecific type: " + value);
1682 }
1683 }
1684
1685
1686 /**
1687 * @param state
1688 * @param element
1689 * @param name
1690 */
1691 private void handleBasionymAuthor(EfloraImportState state, Element elBasionymAuthor, NonViralName name, boolean overwrite) {
1692 String strAuthor = elBasionymAuthor.getValue().trim();
1693 Pattern reBasionymAuthor = Pattern.compile("^\\(.*\\)$");
1694 if (reBasionymAuthor.matcher(strAuthor).matches()){
1695 strAuthor = strAuthor.substring(1, strAuthor.length()-1);
1696 }else{
1697 logger.warn("Brackets are missing for original combination author " + strAuthor);
1698 }
1699 TeamOrPersonBase[] basionymTeam = getTeam(strAuthor);
1700 if (name.getBasionymAuthorTeam() != null && overwrite == false){
1701 logger.warn("Try to write basionym author for a name that already has a basionym author. Neglected.");
1702 }else{
1703 name.setBasionymAuthorTeam(basionymTeam[0]);
1704 name.setExBasionymAuthorTeam(basionymTeam[1]);
1705
1706 }
1707 }
1708
1709 private Map<String, UUID> teamMap = new HashMap<String, UUID>();
1710 /**
1711 * @param elAuthors
1712 * @param name
1713 * @param elNom
1714 */
1715 private void handleNameAuthors(Element elAuthor, NonViralName name) {
1716 if (name.getCombinationAuthorTeam() != null){
1717 logger.warn("Name already has a combination author. Name: " + name.getTitleCache() + ", Author: " + elAuthor.getTextNormalize());
1718 }
1719 String strAuthor = elAuthor.getValue().trim();
1720 if (strAuthor.endsWith(",")){
1721 strAuthor = strAuthor.substring(0, strAuthor.length() -1);
1722 }
1723 if (strAuthor.indexOf("(") > -1 || strAuthor.indexOf(")") > -1){
1724 logger.warn("Author has brackets. Basionym authors should be handled in separate tags: " + strAuthor);
1725 }
1726 TeamOrPersonBase[] team = getTeam(strAuthor);
1727 name.setCombinationAuthorTeam(team[0]);
1728 name.setExCombinationAuthorTeam(team[1]);
1729 }
1730
1731
1732 /**
1733 * @param strAuthor
1734 * @return
1735 */
1736 private TeamOrPersonBase[] getTeam(String strAuthor) {
1737 TeamOrPersonBase[] result = new TeamOrPersonBase[2];
1738 String[] split = strAuthor.split(" ex ");
1739 String strBaseAuthor = null;
1740 String strExAuthor = null;
1741
1742 if (split.length == 2){
1743 strBaseAuthor = split[1];
1744 strExAuthor = split[0];
1745 }else if (split.length == 1){
1746 strBaseAuthor = split[0];
1747 }else{
1748 logger.warn("Could not parse (ex) author: " + strAuthor);
1749 }
1750 result[0] = getUuidTeam(strBaseAuthor);
1751 if (result[0] == null){
1752 result[0] = parseSingleTeam(strBaseAuthor);
1753 teamMap.put(strBaseAuthor, result[0].getUuid());
1754 }
1755 if (strExAuthor != null){
1756 result[1] = getUuidTeam(strExAuthor);
1757 if (result[1] == null){
1758 result[1] = Team.NewInstance();
1759 result[1].setTitleCache(strExAuthor, true);
1760 teamMap.put(strExAuthor, result[1].getUuid());
1761 }
1762
1763 }
1764 return result;
1765 }
1766
1767
1768 protected TeamOrPersonBase parseSingleTeam(String strBaseAuthor) {
1769 TeamOrPersonBase result;
1770 String[] split = strBaseAuthor.split("&");
1771 if (split.length > 1){
1772 result = Team.NewInstance();
1773 for (String personString : split){
1774 Person person = makePerson(personString);
1775 ((Team)result).addTeamMember(person);
1776 }
1777 }else{
1778 result = makePerson(strBaseAuthor.trim());
1779 }
1780 return result;
1781 }
1782
1783
1784 /**
1785 * @param personString
1786 * @return
1787 */
1788 private Person makePerson(String personString) {
1789 personString = personString.trim();
1790 Person person = Person.NewTitledInstance(personString);
1791 person.setNomenclaturalTitle(personString);
1792 return person;
1793 }
1794
1795
1796 /**
1797 * @param result
1798 * @param strBaseAuthor
1799 */
1800 private TeamOrPersonBase getUuidTeam(String strBaseAuthor) {
1801 UUID uuidTeam = teamMap.get(strBaseAuthor);
1802 return CdmBase.deproxy(getAgentService().find(uuidTeam), TeamOrPersonBase.class);
1803 }
1804
1805
1806 private void handleDescription(EfloraImportState state, Element elDescription, Taxon taxon, Set<String> unhandledChildren) {
1807 verifyNoAttribute(elDescription);
1808
1809 List<Element> elements = elDescription.getChildren();
1810 for (Element element : elements){
1811 if (element.getName().equalsIgnoreCase("char")){
1812 handleChar(state, element, taxon);
1813 }else{
1814 logger.warn("Unhandled description child: " + element.getName());
1815 }
1816 }
1817
1818 }
1819
1820
1821 /**
1822 * @param state
1823 * @param element
1824 * @param taxon
1825 */
1826 private void handleChar(EfloraImportState state, Element element, Taxon taxon) {
1827 List<Attribute> attributes = element.getAttributes();
1828 for (Attribute attribute : attributes){
1829 if (! attribute.getName().equalsIgnoreCase("class")){
1830 logger.warn("Char has unhandled attribute " + attribute.getName());
1831 }else{
1832 String classValue = attribute.getValue();
1833 Feature feature = getFeature(classValue, state);
1834 if (feature == null){
1835 logger.warn("Unhandled feature: " + classValue);
1836 }else{
1837 String value = element.getValue();
1838 addDescriptionElement(state, taxon, value, feature, null);
1839 }
1840
1841 }
1842 }
1843
1844 List<Element> elements = element.getChildren();
1845 if (! elements.isEmpty()){
1846 logger.warn("Char has unhandled children");
1847 }
1848 }
1849
1850
1851 /**
1852 * @param taxon
1853 * @return
1854 */
1855 protected TaxonDescription getDescription(Taxon taxon) {
1856 for (TaxonDescription description : taxon.getDescriptions()){
1857 if (! description.isImageGallery()){
1858 return description;
1859 }
1860 }
1861 TaxonDescription newDescription = TaxonDescription.NewInstance(taxon);
1862 return newDescription;
1863 }
1864
1865
1866 /**
1867 * @param classValue
1868 * @param state
1869 * @return
1870 * @throws UndefinedTransformerMethodException
1871 */
1872 private Feature getFeature(String classValue, EfloraImportState state) {
1873 UUID uuid;
1874 try {
1875 uuid = state.getTransformer().getFeatureUuid(classValue);
1876 if (uuid == null){
1877 logger.info("Uuid is null for " + classValue);
1878 }
1879 String featureText = StringUtils.capitalize(classValue);
1880 //TODO eFlora feature vocabulary
1881 Feature feature = getFeature(state, uuid, featureText, featureText, classValue, null);
1882 if (feature == null){
1883 throw new NullPointerException(classValue + " not recognized as a feature");
1884 }
1885 return feature;
1886 } catch (Exception e) {
1887 logger.warn("Could not create feature for " + classValue + ": " + e.getMessage()) ;
1888 return Feature.UNKNOWN();
1889 }
1890 }
1891
1892
1893 /**
1894 * @param state
1895 * @param element
1896 * @param taxon
1897 * @param unhandledTitleClassess
1898 */
1899 private void handleTitle(EfloraImportState state, Element element, Taxon taxon, Set<String> unhandledTitleClassess) {
1900 // attributes
1901 List<Attribute> attributes = element.getAttributes();
1902 for (Attribute attribute : attributes){
1903 if (! attribute.getName().equalsIgnoreCase("class") ){
1904 if (! attribute.getName().equalsIgnoreCase("num")){
1905 logger.warn("Title has unhandled attribute " + attribute.getName());
1906 }else{
1907 //TODO num attribute in taxon
1908 }
1909 }else{
1910 String classValue = attribute.getValue();
1911 try {
1912 Rank rank;
1913 try {
1914 rank = Rank.getRankByNameOrAbbreviation(classValue);
1915 } catch (Exception e) {
1916 //TODO nc
1917 rank = Rank.getRankByEnglishName(classValue, NomenclaturalCode.ICBN, false);
1918 }
1919 taxon.getName().setRank(rank);
1920 if (rank.equals(Rank.FAMILY()) || rank.equals(Rank.GENUS())){
1921 handleGenus(element.getValue(), taxon.getName());
1922 }else if (rank.equals(Rank.SUBGENUS())){
1923 handleSubGenus(element.getValue(), taxon.getName());
1924 }else if (rank.equals(Rank.SECTION_BOTANY())){
1925 handleSection(element.getValue(), taxon.getName());
1926 }else if (rank.equals(Rank.SPECIES())){
1927 handleSpecies(element.getValue(), taxon.getName());
1928 }else if (rank.equals(Rank.SUBSPECIES())){
1929 handleSubSpecies(element.getValue(), taxon.getName());
1930 }else if (rank.equals(Rank.VARIETY())){
1931 handleVariety(element.getValue(), taxon.getName());
1932 }else{
1933 logger.warn("Unhandled rank: " + rank.getLabel());
1934 }
1935 } catch (UnknownCdmTypeException e) {
1936 logger.warn("Unknown rank " + classValue);
1937 unhandledTitleClassess.add(classValue);
1938 }
1939 }
1940 }
1941 List<Element> elements = element.getChildren();
1942 if (! elements.isEmpty()){
1943 logger.warn("Title has unexpected children");
1944 }
1945 UUID uuidTitle = EfloraTransformer.uuidTitle;
1946 ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "title", "title", "title");
1947 taxon.addExtension(element.getTextNormalize(), titleExtension);
1948
1949 }
1950
1951
1952 /**
1953 * @param value
1954 * @param taxonNameBase
1955 */
1956 private void handleSubGenus(String value, TaxonNameBase taxonNameBase) {
1957 String name = value.replace("Subgenus", "").trim();
1958 ((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1959 }
1960
1961 /**
1962 * @param value
1963 * @param taxonNameBase
1964 */
1965 private void handleSection(String value, TaxonNameBase taxonNameBase) {
1966 String name = value.replace("Section", "").trim();
1967 ((NonViralName)taxonNameBase).setInfraGenericEpithet(name);
1968 }
1969
1970 /**
1971 * @param value
1972 * @param taxonNameBase
1973 */
1974 private void handleSpecies(String value, TaxonNameBase taxonNameBase) {
1975 //do nothing
1976 }
1977
1978 /**
1979 * @param value
1980 * @param taxonNameBase
1981 */
1982 private void handleVariety(String value, TaxonNameBase taxonNameBase) {
1983 //do nothing
1984 }
1985
1986 /**
1987 * @param value
1988 * @param taxonNameBase
1989 */
1990 private void handleSubSpecies(String value, TaxonNameBase taxonNameBase) {
1991 //do nothing
1992 }
1993
1994
1995 private Pattern rexGenusAuthor = Pattern.compile("(\\[|\\().*(\\]|\\))");
1996
1997 /**
1998 * @param value
1999 * @param taxonNameBase
2000 */
2001 protected void handleGenus(String value, TaxonNameBase taxonName) {
2002 Matcher matcher = rexGenusAuthor.matcher(value);
2003 if (matcher.find()){
2004 String author = matcher.group();
2005 // String genus = value.replace(author, "");
2006 author = author.substring(1, author.length() - 1);
2007 Team team = Team.NewInstance();
2008 team.setTitleCache(author, true);
2009 Credit credit = Credit.NewInstance(team, null);
2010 taxonName.addCredit(credit);
2011 // NonViralName nvn = (NonViralName)taxonName;
2012 // nvn.setCombinationAuthorTeam(team);
2013 // nvn.setGenusOrUninomial(genus);
2014 }else{
2015 logger.info("No Author match for " + value);
2016 }
2017 }
2018
2019
2020 /**
2021 * @param taxon
2022 * @param lastTaxon
2023 */
2024 private void handleTaxonRelation(EfloraImportState state, Taxon taxon, Taxon lastTaxon) {
2025
2026 Classification tree = getTree(state);
2027 if (lastTaxon == null){
2028 tree.addChildTaxon(taxon, null, null, null);
2029 return;
2030 }
2031 Rank thisRank = taxon.getName().getRank();
2032 Rank lastRank = lastTaxon.getName().getRank();
2033 if (lastTaxon.getTaxonNodes().size() > 0){
2034 TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
2035 if (thisRank.isLower(lastRank ) ){
2036 lastNode.addChildTaxon(taxon, null, null, null);
2037 fillMissingEpithetsForTaxa(lastTaxon, taxon);
2038 }else if (thisRank.equals(lastRank)){
2039 TaxonNode parent = lastNode.getParent();
2040 if (parent != null){
2041 parent.addChildTaxon(taxon, null, null, null);
2042 fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
2043 }else{
2044 tree.addChildTaxon(taxon, null, null, null);
2045 }
2046 }else if (thisRank.isHigher(lastRank)){
2047 handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2048 // TaxonNode parentNode = handleTaxonRelation(state, taxon, lastNode.getParent().getTaxon());
2049 // parentNode.addChildTaxon(taxon, null, null, null);
2050 }
2051 }else{
2052 logger.warn("Last taxon has no node");
2053 }
2054 }
2055
2056
2057
2058 /**
2059 * @param state
2060 * @return
2061 */
2062 private Classification getTree(EfloraImportState state) {
2063 Classification result = state.getTree(null);
2064 if (result == null){
2065 UUID uuid = state.getConfig().getClassificationUuid();
2066 if (uuid == null){
2067 logger.warn("No classification uuid is defined");
2068 result = getNewClassification(state);
2069 }else{
2070 result = getClassificationService().find(uuid);
2071 if (result == null){
2072 result = getNewClassification(state);
2073 result.setUuid(uuid);
2074 }
2075 }
2076 state.putTree(null, result);
2077 }
2078 return result;
2079 }
2080
2081
2082 private Classification getNewClassification(EfloraImportState state) {
2083 Classification result;
2084 result = Classification.NewInstance(state.getConfig().getClassificationTitle());
2085 state.putTree(null, result);
2086 return result;
2087 }
2088
2089
2090 /**
2091 * @param state
2092 * @param taxon
2093 * @param value
2094 * @param feature
2095 * @return
2096 */
2097 private TextData addDescriptionElement(EfloraImportState state, Taxon taxon, String value, Feature feature, String references) {
2098 TextData textData = TextData.NewInstance(feature);
2099 Language textLanguage = getDefaultLanguage(state);
2100 textData.putText(textLanguage, value);
2101 TaxonDescription description = getDescription(taxon);
2102 description.addElement(textData);
2103 if (references != null){
2104 makeOriginalSourceReferences(textData, ";", references);
2105 }
2106 return textData;
2107 }
2108
2109 private Language getDefaultLanguage(EfloraImportState state) {
2110 UUID defaultLanguageUuid = state.getConfig().getDefaultLanguageUuid();
2111 if (defaultLanguageUuid != null){
2112 Language result = state.getDefaultLanguage();
2113 if (result == null || ! result.getUuid().equals(defaultLanguageUuid)){
2114 result = (Language)getTermService().find(defaultLanguageUuid);
2115 state.setDefaultLanguage(result);
2116 if (result == null){
2117 logger.warn("Default language for " + defaultLanguageUuid + " does not exist.");
2118 }
2119 }
2120 return result;
2121 }else{
2122 return Language.DEFAULT();
2123 }
2124 }
2125
2126
2127 /**
2128 * @param elNomenclature
2129 */
2130 private void verifyNoAttribute(Element element) {
2131 List<Attribute> attributes = element.getAttributes();
2132 if (! attributes.isEmpty()){
2133 logger.warn(element.getName() + " has unhandled attributes: " + attributes.get(0).getValue() + "..." );
2134 }
2135 }
2136
2137 /**
2138 * @param elNomenclature
2139 */
2140 protected void verifyNoChildren(Element element) {
2141 verifyNoChildren(element, false);
2142 }
2143
2144 /**
2145 * @param elNomenclature
2146 */
2147 private void verifyNoChildren(Element element, boolean ignoreLineBreak) {
2148 List<Element> children = element.getChildren();
2149 if (! children.isEmpty()){
2150 if (ignoreLineBreak == true){
2151 for (Element child : children){
2152 if (! child.getName().equalsIgnoreCase("BR")){
2153 logger.warn(element.getName() + " has unhandled child: " + child.getName());
2154 }
2155 }
2156 }else{
2157 logger.warn(element.getName() + " has unhandled children");
2158 }
2159 }
2160 }
2161
2162
2163
2164 /**
2165 * Parses the nomenclatural status from the references titleCache. If a nomenclatural status
2166 * exists it is added to the name and the nom. status part of the references title cache is
2167 * removed. Requires protected title cache.
2168 * @param ref
2169 * @param nonViralName
2170 */
2171 protected void parseNomStatus(Reference ref, NonViralName nonViralName) {
2172 String titleToParse = ref.getTitleCache();
2173
2174 String noStatusTitle = parser.parseNomStatus(titleToParse, nonViralName);
2175 if (! noStatusTitle.equals(titleToParse)){
2176 ref.setTitleCache(noStatusTitle, true);
2177 }
2178 }
2179
2180
2181 /**
2182 * Extracts the date published part and returns micro reference
2183 * @param ref
2184 * @return
2185 */
2186 private String parseReferenceYearAndDetail(Reference ref){
2187 String detailResult = null;
2188 String titleToParse = ref.getTitleCache();
2189 titleToParse = removeStartingSymbols(titleToParse, ref);
2190 String reReference = "^\\.{1,}";
2191 // String reYear = "\\([1-2]{1}[0-9]{3}\\)";
2192 String oneMonth = "(Feb.|Dec.|March|June|July)";
2193 String reYear = oneMonth + "?\\s?[1-2]\\s?[0-9]\\s?[0-9]\\s?[0-9]\\s?";
2194 String secondYear = "(\\s?[1-2]\\s?[0-9])?\\s?[0-9]\\s?[0-9]\\s?";
2195
2196 String reYearPeriod = "\\(" + reYear + "(\\-" + secondYear + ")?\\)";
2197 String reDetail = "\\.{1,10}$";
2198
2199 //pattern for the whole string
2200 Pattern patReference = Pattern.compile(/*reReference +*/ reYearPeriod /*+ reDetail */);
2201 Matcher matcher = patReference.matcher(titleToParse);
2202 if (matcher.find()){
2203 int start = matcher.start();
2204 int end = matcher.end();
2205
2206 //title and other information precedes the year part
2207 String title = titleToParse.substring(0, start).trim();
2208 //detail follows the year part
2209 String detail = titleToParse.substring(end).trim();
2210
2211 //time period
2212 String strPeriod = matcher.group().trim();
2213 strPeriod = strPeriod.substring(1, strPeriod.length()-1); //remove brackets
2214 Pattern patStartMonth = Pattern.compile("^" + oneMonth);
2215 matcher = patStartMonth.matcher(strPeriod);
2216 strPeriod = strPeriod.replace(" ", "");
2217 Integer startMonth = null;
2218 if (matcher.find()){
2219 end = matcher.end();
2220 strPeriod = strPeriod.substring(0, end) + " " + strPeriod.substring(end);
2221 startMonth = getMonth(strPeriod.substring(0, end));
2222 }
2223
2224 TimePeriod datePublished = TimePeriod.parseString(strPeriod);
2225 if (startMonth != null){
2226 datePublished.setStartMonth(startMonth);
2227 }
2228 ref.setDatePublished(datePublished);
2229 ref.setTitle(title);
2230 detailResult = CdmUtils.removeTrailingDot(detail);
2231 if (detailResult.endsWith(".") || detailResult.endsWith(";") || detailResult.endsWith(",") ){
2232 detailResult = detailResult.substring(0, detailResult.length() -1);
2233 }
2234 ref.setProtectedTitleCache(false);
2235 }else{
2236 logger.warn("Could not parse reference: " + titleToParse);
2237 }
2238 return detailResult;
2239
2240 }
2241
2242
2243
2244 private Integer getMonth(String month) {
2245 if (month.startsWith("Jan")){
2246 return 1;
2247 }else if (month.startsWith("Feb")){
2248 return 2;
2249 }else if (month.startsWith("Mar")){
2250 return 3;
2251 }else if (month.startsWith("Apr")){
2252 return 4;
2253 }else if (month.startsWith("May")){
2254 return 5;
2255 }else if (month.startsWith("Jun")){
2256 return 6;
2257 }else if (month.startsWith("Jul")){
2258 return 7;
2259 }else if (month.startsWith("Aug")){
2260 return 8;
2261 }else if (month.startsWith("Sep")){
2262 return 9;
2263 }else if (month.startsWith("Oct")){
2264 return 10;
2265 }else if (month.startsWith("Nov")){
2266 return 11;
2267 }else if (month.startsWith("Dec")){
2268 return 12;
2269 }else{
2270 logger.warn("Month not yet supported: " + month);
2271 return null;
2272 }
2273 }
2274
2275
2276 /* (non-Javadoc)
2277 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
2278 */
2279 protected boolean isIgnore(EfloraImportState state){
2280 return ! state.getConfig().isDoTaxa();
2281 }
2282
2283 }