fix #6799: add totalCount of ticks as totalwork and start the progress monitor when...
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupFeatureImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.markup;
11
12 import java.util.ArrayList;
13 import java.util.HashSet;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.UUID;
18
19 import javax.xml.stream.XMLEventReader;
20 import javax.xml.stream.XMLStreamException;
21 import javax.xml.stream.events.Attribute;
22 import javax.xml.stream.events.XMLEvent;
23
24 import org.apache.commons.lang.StringUtils;
25 import org.apache.log4j.Logger;
26
27 import eu.etaxonomy.cdm.common.CdmUtils;
28 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
29 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
30 import eu.etaxonomy.cdm.model.common.Annotation;
31 import eu.etaxonomy.cdm.model.common.AnnotationType;
32 import eu.etaxonomy.cdm.model.common.IntextReference;
33 import eu.etaxonomy.cdm.model.common.Language;
34 import eu.etaxonomy.cdm.model.common.LanguageString;
35 import eu.etaxonomy.cdm.model.common.MarkerType;
36 import eu.etaxonomy.cdm.model.common.TermVocabulary;
37 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
38 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
39 import eu.etaxonomy.cdm.model.description.Feature;
40 import eu.etaxonomy.cdm.model.description.TaxonDescription;
41 import eu.etaxonomy.cdm.model.description.TextData;
42 import eu.etaxonomy.cdm.model.location.Country;
43 import eu.etaxonomy.cdm.model.location.NamedArea;
44 import eu.etaxonomy.cdm.model.media.Media;
45 import eu.etaxonomy.cdm.model.reference.Reference;
46 import eu.etaxonomy.cdm.model.taxon.Taxon;
47
48 /**
49 * @author a.mueller
50 * @created 30.05.2012
51 *
52 */
53 public class MarkupFeatureImport extends MarkupImportBase {
54 @SuppressWarnings("unused")
55 private static final Logger logger = Logger.getLogger(MarkupFeatureImport.class);
56
57 protected static final String MODS_TITLEINFO = "titleInfo";
58
59 private final MarkupSpecimenImport specimenImport;
60 private final MarkupNomenclatureImport nomenclatureImport;
61 private final MarkupKeyImport keyImport;
62
63 public MarkupFeatureImport(MarkupDocumentImport docImport, MarkupSpecimenImport specimenImport,
64 MarkupNomenclatureImport nomenclatureImport, MarkupKeyImport keyImport) {
65 super(docImport);
66 this.specimenImport = specimenImport;
67 this.nomenclatureImport = nomenclatureImport;
68 this.keyImport = keyImport;
69 this.featureImport = this;
70 }
71
72 public void handleFeature(MarkupImportState state, XMLEventReader readerOrig, XMLEvent parentEvent) throws XMLStreamException {
73 Map<String, Attribute> attrs = getAttributes(parentEvent);
74 Boolean isFreetext = getAndRemoveBooleanAttributeValue(parentEvent, attrs, IS_FREETEXT, false);
75 String classValue =getAndRemoveRequiredAttributeValue(parentEvent, attrs, CLASS);
76 checkNoAttributes(attrs, parentEvent);
77
78 Reference sourceReference = state.getConfig().getSourceReference();
79 Feature feature = makeFeature(classValue, state, parentEvent, null);
80 Taxon taxon = state.getCurrentTaxon();
81 TaxonDescription taxonDescription = getDefaultTaxonDescription(taxon, NO_IMAGE_GALLERY, CREATE_NEW, sourceReference);
82 if (!taxonDescription.isDefault()){
83 taxonDescription.setDefault(true);
84 }
85 // TextData figureHolderTextData = null; //for use with one TextData for
86 // all figure only
87
88
89 TaxonDescription structuredDescription = null;
90
91 boolean isDescription = feature.equals(Feature.DESCRIPTION());
92
93 XMLEventReader reader;
94 if (isDescription){
95 LookAheadEventReader lookAhead = new LookAheadEventReader(parentEvent.asStartElement(), readerOrig);
96 String descriptionText = makeFullDescriptionText(lookAhead.getCachedEvents(true));
97 TextData descriptionTextData = TextData.NewInstance(Feature.DESCRIPTION(), descriptionText, getDefaultLanguage(state),null);
98 descriptionTextData.addPrimaryTaxonomicSource(sourceReference);
99 taxonDescription.addElement(descriptionTextData);
100 reader = lookAhead;
101 }else{
102 reader = readerOrig;
103 }
104
105 DescriptionElementBase lastDescriptionElement = null;
106
107 CharOrder charOrder= new CharOrder();
108 while (reader.hasNext()) {
109 XMLEvent next = readNoWhitespace(reader);
110 if (isMyEndingElement(next, parentEvent)) {
111 state.putFeatureToGeneralSorterList(feature);
112 return;
113 } else if (isEndingElement(next, DISTRIBUTION_LIST) || isEndingElement(next, HABITAT_LIST)) {
114 // only handle list elements
115 } else if (isStartingElement(next, HEADING)) {
116 makeFeatureHeading(state, reader, classValue, feature, next);
117 } else if (isStartingElement(next, WRITER)) {
118 makeFeatureWriter(state, reader, feature, taxon, next);
119 // } else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
120 // if (!feature.equals(Feature.DISTRIBUTION())) {
121 // String message = "Distribution locality only allowed for feature of type 'distribution'";
122 // fireWarningEvent(message, next, 4);
123 // }
124 // handleDistributionLocality(state, reader, next);
125 } else if (isStartingElement(next, DISTRIBUTION_LIST) || isStartingElement(next, HABITAT_LIST)) {
126 // only handle single list elements
127 } else if (isStartingElement(next, HABITAT)) {
128 if (!(feature.equals(Feature.HABITAT())
129 || feature.equals(Feature.HABITAT_ECOLOGY())
130 || feature.equals(Feature.ECOLOGY()))) {
131 String message = "Habitat only allowed for feature of type 'habitat','habitat ecology' or 'ecology'";
132 fireWarningEvent(message, next, 4);
133 }
134 String habitatString = handleHabitat(state, reader, next);
135 fireWarningEvent("Return value from habitat tag not yet handled: " + habitatString, next, 4);
136 } else if (isStartingElement(next, CHAR)) {
137 if (structuredDescription == null){
138 MarkerType descriptionMarker;
139 try {
140 descriptionMarker = getMarkerType(state, state.getTransformer().getMarkerTypeUuid("structured description"),
141 "Structured Descriptions", "Marker to mark descriptions used for more structured descriptions", null, null);
142 } catch (UndefinedTransformerMethodException e) {
143 throw new RuntimeException(e);
144 }
145 String title = "Structured descriptive data for " + taxon.getName().getTitleCache();
146 structuredDescription = getMarkedTaxonDescription(taxon, descriptionMarker, NO_IMAGE_GALLERY, CREATE_NEW, state.getConfig().getSourceReference(), title);
147 }
148 List<TextData> textDataList = handleChar(state, reader, next, null, charOrder);
149 charOrder = charOrder.next();
150 for (TextData textData : textDataList){
151 structuredDescription.addElement(textData);
152 }
153 } else if (isStartingElement(next, STRING)) {
154 lastDescriptionElement = makeFeatureString(state, reader, feature,
155 taxonDescription, lastDescriptionElement, next, isFreetext);
156 } else if (isStartingElement(next, FIGURE_REF)) {
157 lastDescriptionElement = makeFeatureFigureRef(state, reader,
158 taxonDescription, isDescription, lastDescriptionElement, sourceReference, next);
159 } else if (isStartingElement(next, REFERENCES)) {
160 fireWarningEvent("Check correct handling of feature references", next, 4);
161 List<Reference> refs = handleReferences(state, reader, next);
162 if (!refs.isEmpty()) {
163 // TODO
164 Reference descriptionRef = state.getConfig().getSourceReference();
165 TaxonDescription description = getDefaultTaxonDescription(taxon, false, true, descriptionRef);
166 TextData featurePlaceholder = docImport.getFeaturePlaceholder(state, description, feature, true);
167 for (Reference citation : refs) {
168 featurePlaceholder.addPrimaryTaxonomicSource(citation);
169 }
170 } else {
171 String message = "No reference found in references";
172 fireWarningEvent(message, next, 6);
173 }
174 } else if (isStartingElement(next, NUM)) {
175 //TODO
176 handleNotYetImplementedElement(next);
177 } else if (isStartingElement(next, KEY)) {
178 keyImport.handleKey(state, reader, next);
179 } else {
180 handleUnexpectedElement(next);
181 }
182 }
183 throw new IllegalStateException("<Feature> has no closing tag");
184 }
185
186
187 /**
188 * Creates a full description text from the mark
189 * @param cachedEvents
190 * @return
191 */
192 private String makeFullDescriptionText(List<XMLEvent> events) {
193 String result = "";
194 for (XMLEvent event : events){
195 String text = normalize(event.asCharacters().getData());
196 result = CdmUtils.concat(" ", result, text);
197 }
198 return result;
199 }
200
201 /**
202 * @param state
203 * @param reader
204 * @param taxonDescription
205 * @param isDescription
206 * @param lastDescriptionElement
207 * @param next
208 * @return
209 * @throws XMLStreamException
210 */
211 public DescriptionElementBase makeFeatureFigureRef(MarkupImportState state, XMLEventReader reader,TaxonDescription taxonDescription,
212 boolean isDescription, DescriptionElementBase lastDescriptionElement, Reference sourceReference, XMLEvent next) throws XMLStreamException {
213 FigureDataHolder figureHolder = handleFigureRef(state, reader, next);
214 Feature figureFeature = getFeature(state, MarkupTransformer.uuidFigures, "Figures", "Figures", "Fig.",null);
215 if (isDescription) {
216 TextData figureHolderTextData = null;
217 // if (figureHolderTextData == null){
218 figureHolderTextData = TextData.NewInstance(figureFeature);
219 figureHolderTextData.addPrimaryTaxonomicSource(sourceReference);
220
221 if (StringUtils.isNotBlank(figureHolder.num)) {
222 String annotationText = "<num>" + figureHolder.num.trim() + "</num>";
223 Annotation annotation = Annotation.NewInstance(annotationText, AnnotationType.TECHNICAL(), getDefaultLanguage(state));
224 figureHolderTextData.addAnnotation(annotation);
225 }
226 if (StringUtils.isNotBlank(figureHolder.figurePart)) {
227 String annotationText = "<figurePart>"+ figureHolder.figurePart.trim() + "</figurePart>";
228 Annotation annotation = Annotation.NewInstance(annotationText,AnnotationType.EDITORIAL(), getDefaultLanguage(state));
229 figureHolderTextData.addAnnotation(annotation);
230 }
231 // if (StringUtils.isNotBlank(figureText)){
232 // figureHolderTextData.putText(language, figureText);
233 // }
234 taxonDescription.addElement(figureHolderTextData);
235 // }
236 registerFigureDemand(state, next, figureHolderTextData, figureHolder.ref);
237 } else {
238 if (lastDescriptionElement == null) {
239 String message = "No description element created yet that can be referred by figure. Create new TextData instead";
240 fireWarningEvent(message, next, 4);
241 lastDescriptionElement = TextData.NewInstance(figureFeature);
242 lastDescriptionElement.addPrimaryTaxonomicSource(sourceReference);
243 taxonDescription.addElement(lastDescriptionElement);
244 }
245 registerFigureDemand(state, next, lastDescriptionElement, figureHolder.ref);
246 }
247 return lastDescriptionElement;
248 }
249
250 /**
251 * @param state
252 * @param reader
253 * @param feature
254 * @param taxonDescription
255 * @param lastDescriptionElement
256 * @param distributionList
257 * @param next
258 * @return
259 * @throws XMLStreamException
260 * @throws
261 */
262 private DescriptionElementBase makeFeatureString(MarkupImportState state,XMLEventReader reader, Feature feature,
263 TaxonDescription taxonDescription, DescriptionElementBase lastDescriptionElement, XMLEvent next, Boolean isFreetext) throws XMLStreamException {
264
265 //for specimen only
266 if (feature.equals(Feature.SPECIMEN()) || feature.equals(Feature.MATERIALS_EXAMINED())
267 || feature.getUuid().equals(MarkupTransformer.uuidWoodSpecimens)){
268
269 List<DescriptionElementBase> specimens = specimenImport.handleMaterialsExamined(state, reader, next, feature, taxonDescription);
270 for (DescriptionElementBase specimen : specimens){
271 if (specimen.getInDescription() == null){
272 taxonDescription.addElement(specimen);
273 }
274 lastDescriptionElement = specimen;
275 }
276 state.setCurrentCollector(null);
277
278 return lastDescriptionElement;
279 }else if (feature.equals(Feature.COMMON_NAME()) && (isFreetext == null || !isFreetext)){
280 List<DescriptionElementBase> commonNames = makeCommonNameString(state, reader, next);
281 //NOTE: we do also have the old version makeVernacular, which was called from "others" below
282 for (DescriptionElementBase commonName : commonNames){
283 taxonDescription.addElement(commonName);
284 lastDescriptionElement = commonName;
285 }
286 return lastDescriptionElement;
287 }
288 else{
289
290 //others
291 Map<String, SubheadingResult> subheadingMap = handleString(state, reader, next, feature);
292 for (String subheading : subheadingMap.keySet()) {
293 Feature subheadingFeature = feature;
294 if (StringUtils.isNotBlank(subheading) && subheadingMap.size() > 1) {
295 subheadingFeature = makeFeature(subheading, state, next, null);
296 }
297 if (feature.equals(Feature.COMMON_NAME()) && (isFreetext == null || !isFreetext)){
298 //NOTE: see above
299 // List<DescriptionElementBase> commonNames = makeVernacular(state, subheading, subheadingMap.get(subheading));
300 // for (DescriptionElementBase commonName : commonNames){
301 // taxonDescription.addElement(commonName);
302 // lastDescriptionElement = commonName;
303 // }
304 }else {
305 TextData textData = TextData.NewInstance(subheadingFeature);
306 SubheadingResult subHeadingResult = subheadingMap.get(subheading);
307 LanguageString languageString = textData.putText(getDefaultLanguage(state), subHeadingResult.text);
308 if (isNotEmptyCollection(subHeadingResult.references.getReferences())){
309 for (LabeledReference reference : subHeadingResult.references.getReferences()){
310 textData.addPrimaryTaxonomicSource(reference.ref, reference.detail);
311 }
312 textData.addImportSource(null, null, state.getConfig().getSourceReference(), null);
313 }else{
314 textData.addPrimaryTaxonomicSource(state.getConfig().getSourceReference());
315 }
316 //intext references
317 for (IntextReference intext : subHeadingResult.inlineReferences){
318 languageString.addIntextReference(intext);
319 }
320 taxonDescription.addElement(textData);
321 lastDescriptionElement = textData;
322 // TODO how to handle figures when these data are split in
323 // subheadings
324 }
325 }
326 return lastDescriptionElement;
327 }
328 }
329
330 /**
331 * @param classValue
332 * @param state
333 * @param parentEvent
334 * @param parentFeature
335 * @return
336 * @throws UndefinedTransformerMethodException
337 */
338 private Feature makeFeature(String classValue, MarkupImportState state, XMLEvent parentEvent, Feature parentFeature) {
339 UUID uuid;
340 try {
341 String featureText = StringUtils.capitalize(classValue);
342 if (parentFeature != null){
343 featureText = "<%s>" + featureText;
344 featureText = String.format(featureText, parentFeature.getTitleCache());
345 classValue = "<%s>" + classValue;
346 classValue = String.format(classValue, parentFeature.getTitleCache());
347 }
348
349
350 //get existing feature
351 if (classValue.endsWith(".")){
352 classValue = classValue.substring(0, classValue.length() - 1);
353 }
354 Feature feature = state.getTransformer().getFeatureByKey(classValue);
355 if (feature != null) {
356 return feature;
357 }
358 uuid = state.getTransformer().getFeatureUuid(classValue);
359
360 if (uuid == null){
361 uuid = state.getUnknownFeatureUuid(classValue);
362 }
363
364 if (uuid == null) {
365 // TODO
366 String message = "Uuid is not defined for '%s'";
367 message = String.format(message, classValue);
368 if (! message.contains("<")){
369 //log only top level features
370 fireWarningEvent(message, parentEvent, 8);
371 }
372 uuid = UUID.randomUUID();
373 state.putUnknownFeatureUuid(classValue, uuid);
374 }
375
376 // TODO eFlora vocabulary
377 TermVocabulary<Feature> voc = null;
378 feature = getFeature(state, uuid, featureText, featureText, classValue, voc);
379 if (parentFeature != null){
380 parentFeature.addIncludes(feature);
381 save(parentFeature, state);
382 }
383 save(feature, state);
384
385 if (feature == null) {
386 throw new NullPointerException(classValue + " not recognized as a feature");
387 }
388 // state.putFeatureToCurrentList(feature);
389 return feature;
390 } catch (Exception e) {
391 String message = "Could not create feature for %s: %s";
392 message = String.format(message, classValue, e.getMessage());
393 fireWarningEvent(message, parentEvent, 4);
394 state.putUnknownFeatureUuid(classValue, null);
395 // e.printStackTrace();
396 return Feature.UNKNOWN();
397 }
398 }
399
400 public class CharOrder{
401 static final int strlength = 3;
402 private int order = 1;
403 private CharOrder parent;
404 private final List<CharOrder> children = new ArrayList<CharOrder>();
405
406 public CharOrder nextChild(){
407 CharOrder result = new CharOrder();
408 if (! children.isEmpty()) {
409 result.order = children.get(children.size() - 1).order + 1;
410 }
411 result.parent = this;
412 children.add(result);
413 return result;
414 }
415
416 public CharOrder next(){
417 CharOrder result = new CharOrder();
418 result.order = order + 1;
419 result.parent = parent;
420 if (parent != null){
421 parent.children.add(result);
422 }
423 return result;
424 }
425
426 public String orderString(){
427 String parentString = parent == null ? "" : parent.orderString();
428 String result = CdmUtils.concat("-", parentString, StringUtils.leftPad(String.valueOf(order), strlength, '0'));
429 return result;
430 }
431
432 @Override
433 public String toString(){
434 return orderString();
435 }
436 }
437
438
439 /**
440 * Handle the char or subchar element. As
441 * @param state the import state
442 * @param reader
443 * @param parentEvent
444 * @param parentFeature in case of subchars we need to attache the newly created feature to a parent feature, should be <code>null</code>
445 * for top level chars.
446 * @return List of TextData. Not a single one as the recursive TextData will also be returned
447 * @throws XMLStreamException
448 */
449 private List<TextData> handleChar(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature parentFeature, CharOrder myCharOrder) throws XMLStreamException {
450 List<TextData> result = new ArrayList<>();
451 String classValue = getClassOnlyAttribute(parentEvent);
452 Feature feature = makeFeature(classValue, state, parentEvent, parentFeature);
453 if(parentFeature == null){
454 state.putFeatureToCharSorterList(feature);
455 }else{
456 FeatureSorterInfo parentInfo = state.getLatestCharFeatureSorterInfo();
457 // if (! parentInfo.getUuid().equals(parentFeature.getUuid())){
458 // String message = "The parent char feature is not the same as the latest feature. This is the case for char hierarchies with > 2 levels, which is not yet handled by the import";
459 // fireWarningEvent(message, parentEvent, 6);
460 // }else{
461 state.getLatestCharFeatureSorterInfo().addSubFeature(new FeatureSorterInfo(feature));
462 // }
463 }
464
465 TextData textData = TextData.NewInstance(feature);
466 textData.addPrimaryTaxonomicSource(state.getConfig().getSourceReference());
467 result.add(textData);
468
469 AnnotationType annType = getAnnotationType(state, MarkupTransformer.uuidOriginalOrder, "Original order", "Order in original treatment", null, AnnotationType.TECHNICAL().getVocabulary());
470 textData.addAnnotation(Annotation.NewInstance(myCharOrder.orderString(), annType, Language.ENGLISH()));
471
472 boolean isTextMode = true;
473 String text = "";
474 while (reader.hasNext()) {
475 XMLEvent next = readNoWhitespace(reader);
476 if (isMyEndingElement(next, parentEvent)) {
477 text = text.trim();
478 textData.putText(getDefaultLanguage(state), text);
479 return result;
480 } else if (isStartingElement(next, FIGURE_REF)) {
481 //TODO
482 handleNotYetImplementedElement(next);
483 } else if (isStartingElement(next, FOOTNOTE_REF)) {
484 //TODO
485 handleNotYetImplementedElement(next);
486 } else if (isStartingElement(next, BR)) {
487 text += "<br/>";
488 isTextMode = false;
489 } else if (isEndingElement(next, BR)) {
490 isTextMode = true;
491 } else if (isHtml(next)) {
492 text += getXmlTag(next);
493 } else if (next.isStartElement()) {
494 if (isStartingElement(next, ANNOTATION)) {
495 handleNotYetImplementedElement(next); //TODO test handleSimpleAnnotation
496 } else if (isStartingElement(next, ITALICS)) {
497 handleNotYetImplementedElement(next);
498 } else if (isStartingElement(next, BOLD)) {
499 handleNotYetImplementedElement(next);
500 } else if (isStartingElement(next, FIGURE)) {
501 handleFigure(state, reader, next, specimenImport, nomenclatureImport);
502 } else if (isStartingElement(next, SUB_CHAR)) {
503 List<TextData> subTextData = handleChar(state, reader, next, feature, myCharOrder.nextChild());
504 result.addAll(subTextData);
505 } else if (isStartingElement(next, FOOTNOTE)) {
506 FootnoteDataHolder footnote = handleFootnote(state, reader, next, specimenImport, nomenclatureImport);
507 if (footnote.isRef()) {
508 String message = "Ref footnote not implemented here";
509 fireWarningEvent(message, next, 4);
510 } else {
511 registerGivenFootnote(state, footnote);
512 }
513 } else {
514 handleUnexpectedStartElement(next.asStartElement());
515 }
516 } else if (next.isCharacters()) {
517 if (!isTextMode) {
518 String message = "String is not in text mode";
519 fireWarningEvent(message, next, 6);
520 } else {
521 text += next.asCharacters().getData();
522 }
523 } else {
524 handleUnexpectedEndElement(next.asEndElement());
525 }
526 }
527 throw new IllegalStateException("RefPart has no closing tag");
528 }
529
530
531 /**
532 * @param state
533 * @param reader
534 * @param classValue
535 * @param feature
536 * @param next
537 * @throws XMLStreamException
538 */
539 private void makeFeatureHeading(MarkupImportState state, XMLEventReader reader, String classValue, Feature feature, XMLEvent next) throws XMLStreamException {
540 String heading = handleHeading(state, reader, next);
541 if (StringUtils.isNotBlank(heading)) {
542 if (!heading.equalsIgnoreCase(classValue)) {
543 try {
544 if (!feature.equals(state.getTransformer().getFeatureByKey(heading))) {
545 UUID headerFeatureUuid = state.getTransformer().getFeatureUuid(heading);
546 if (!feature.getUuid().equals(headerFeatureUuid)) {
547 String message = "Feature heading '%s' differs from feature class '%s' and can not be transformed to feature";
548 message = String.format(message, heading, classValue);
549 fireWarningEvent(message, next, 1);
550 }
551 }
552 } catch (UndefinedTransformerMethodException e) {
553 throw new RuntimeException(e);
554 }
555 } else {
556 // do nothing
557 }
558 }
559 }
560
561
562 /**
563 * @param state
564 * @param reader
565 * @param feature
566 * @param taxon
567 * @param next
568 * @throws XMLStreamException
569 */
570 private void makeFeatureWriter(MarkupImportState state,XMLEventReader reader, Feature feature, Taxon taxon, XMLEvent next) throws XMLStreamException {
571 WriterDataHolder writer = handleWriter(state, reader, next);
572 if (isNotBlank(writer.writer)) {
573 // TODO
574 Reference ref = state.getConfig().getSourceReference();
575 TaxonDescription description = getDefaultTaxonDescription(taxon, false, true, ref);
576 TextData featurePlaceholder = docImport.getFeaturePlaceholder(state,
577 description, feature, true);
578 featurePlaceholder.addAnnotation(writer.annotation);
579 registerFootnotes(state, featurePlaceholder, writer.footnotes);
580 } else {
581 String message = "Writer element is empty";
582 fireWarningEvent(message, next, 4);
583 }
584 }
585
586
587 protected String handleHabitat(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
588 checkNoAttributes(parentEvent);
589 Taxon taxon = state.getCurrentTaxon();
590 // TODO which ref to take?
591 Reference sourceReference = state.getConfig().getSourceReference();
592
593
594 boolean isTextMode = true;
595 String text = "";
596 while (reader.hasNext()) {
597 XMLEvent next = readNoWhitespace(reader);
598 if (isMyEndingElement(next, parentEvent)) {
599 Feature feature = getFeature(
600 state,
601 MarkupTransformer.uuidExtractedHabitat,
602 "Extracted Habitat",
603 "An structured habitat that was extracted from a habitat text",
604 "extr. habit.", null);
605 TextData habitat = TextData.NewInstance(feature);
606 habitat.addPrimaryTaxonomicSource(sourceReference);
607 habitat.putText(getDefaultLanguage(state), text);
608 TaxonDescription description = getExtractedMarkupMarkedDescription(state, taxon, sourceReference);
609
610 description.addElement(habitat);
611
612 return text;
613 } else if (isStartingElement(next, ALTITUDE)) {
614 // OLD: text = text.trim() + getTaggedCData(state, reader, next);
615 text += handleAltitude(state, reader, next);
616 } else if (isStartingElement(next, LIFE_CYCLE_PERIODS)) {
617 handleNotYetImplementedElement(next);
618 } else if (next.isCharacters()) {
619 if (! isTextMode) {
620 String message = "String is not in text mode";
621 fireWarningEvent(message, next, 6);
622 } else {
623 text += next.asCharacters().getData();
624 }
625 } else if (isStartingElement(next, BR)) {
626 text += "<br/>";
627 isTextMode = false;
628 } else if (isEndingElement(next, BR)) {
629 isTextMode = true;
630 } else if (isStartingElement(next, REFERENCES)) {
631 handleNotYetImplementedElement(next);
632 } else if (isStartingElement(next, FIGURE_REF)) {
633 handleNotYetImplementedElement(next);
634 } else {
635 String type = next.toString();
636 String location = String.valueOf(next.getLocation().getLineNumber());
637 System.out.println("MarkupFeature.handleHabitat: Unexpected element in habitat: " + type + ": " + location);
638 handleUnexpectedElement(next);
639 }
640 }
641 throw new IllegalStateException("<Habitat> has no closing tag");
642 }
643
644 /**
645 * Creates "Extracted factual data" with feature altitude and returns the original text as string
646 * to be used in parent element.
647 * @see #handleHabitat(MarkupImportState, XMLEventReader, XMLEvent)
648 */
649 private String handleAltitude(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
650 checkNoAttributes(parentEvent);
651 Taxon taxon = state.getCurrentTaxon();
652 // TODO which ref to take?
653 Reference sourceReference = state.getConfig().getSourceReference();
654
655 boolean isTextMode = true;
656 String text = "";
657 while (reader.hasNext()) {
658 XMLEvent next = readNoWhitespace(reader);
659 if (isMyEndingElement(next, parentEvent)) {
660 Feature feature = getFeature(
661 state,
662 MarkupTransformer.uuidExtractedAltitude,
663 "Extracted Altitude",
664 "An altitude that was extracted from a habitat text",
665 "extr. alt.", null);
666 //TODO try to make quantitative data
667 TextData altitude = TextData.NewInstance(feature);
668 altitude.putText(getDefaultLanguage(state), text);
669 altitude.addPrimaryTaxonomicSource(sourceReference);
670 TaxonDescription description = getExtractedMarkupMarkedDescription(state, taxon, sourceReference);
671
672 description.addElement(altitude);
673
674 return text;
675 } else if (next.isCharacters()) {
676 if (! isTextMode) {
677 String message = "String is not in text mode";
678 fireWarningEvent(message, next, 6);
679 } else {
680 text += next.asCharacters().getData();
681 }
682 } else if (isStartingElement(next, BR)) {
683 text += "<br/>";
684 isTextMode = false;
685 } else if (isEndingElement(next, BR)) {
686 isTextMode = true;
687 } else {
688 String type = next.toString();
689 String location = String.valueOf(next.getLocation().getLineNumber());
690 System.out.println("MarkupFeatureImport.handleAltitude: Unexpected element in habitat: " + type + ": " + location);
691 handleUnexpectedElement(next);
692 }
693 }
694 throw new IllegalStateException("<Habitat> has no closing tag");
695 }
696
697
698
699
700 private FigureDataHolder handleFigureRef(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent)
701 throws XMLStreamException {
702 FigureDataHolder result = new FigureDataHolder();
703 Map<String, Attribute> attributes = getAttributes(parentEvent);
704 result.ref = getAndRemoveAttributeValue(attributes, REF);
705 checkNoAttributes(attributes, parentEvent);
706
707 // text is not handled, needed only for debugging purposes
708 String text = "";
709 while (reader.hasNext()) {
710 XMLEvent next = readNoWhitespace(reader);
711 if (isMyEndingElement(next, parentEvent)) {
712 return result;
713 } else if (isStartingElement(next, NUM)) {
714 String num = getCData(state, reader, next);
715 result.num = num; // num is not handled during import
716 } else if (isStartingElement(next, FIGURE_PART)) {
717 result.figurePart = getCData(state, reader, next);
718 } else if (next.isCharacters()) {
719 text += next.asCharacters().getData();
720 } else {
721 fireUnexpectedEvent(next, 0);
722 }
723 }
724 throw new IllegalStateException("<figureRef> has no end tag");
725 }
726
727
728 private void registerFigureDemand(MarkupImportState state, XMLEvent next, AnnotatableEntity entity, String figureRef) {
729 Media existingFigure = state.getFigure(figureRef);
730 if (existingFigure != null) {
731 attachFigure(state, next, entity, existingFigure);
732 } else {
733 Set<AnnotatableEntity> demands = state.getFigureDemands(figureRef);
734 if (demands == null) {
735 demands = new HashSet<AnnotatableEntity>();
736 state.putFigureDemands(figureRef, demands);
737 }
738 demands.add(entity);
739 }
740 }
741
742 private List<DescriptionElementBase> makeCommonNameString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException{
743
744 List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
745
746 checkNoAttributes(parentEvent);
747
748 while (reader.hasNext()) {
749 XMLEvent next = readNoWhitespace(reader);
750 if (isMyEndingElement(next, parentEvent)) {
751 if (result.isEmpty()){
752 fireWarningEvent("Common name was not created", next, 4);
753 }
754 return result;
755 } else if (isStartingElement(next, VERNACULAR_NAMES)) {
756 result = makeVernacularNames(state, reader, next);
757 } else if (isStartingElement(next, SUB_HEADING)) {
758 String subheading = getCData(state, reader, next);
759 if (! subheading.matches("(Nom(s)? vernaculaire(s)?\\:|Vern.)")){
760 fireWarningEvent("Subheading for vernacular name not recognized: " + subheading, next, 4);
761 }
762 } else if (next.isCharacters()) {
763 String chars = next.asCharacters().toString().trim();
764 if (chars.equals(".")){
765 //do nothing
766 }else{
767 fireWarningEvent("Character not handled in vernacular name: " + chars, next, 4);
768 }
769 } else if (isStartingElement(next, REFERENCES)) {
770 handleNotYetImplementedElement(next);
771 }else {
772 handleUnexpectedElement(next);
773 }
774 }
775 throw new IllegalStateException("closing tag is missing");
776
777
778 }
779
780 private List<DescriptionElementBase> makeVernacularNames(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException{
781 List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
782 checkNoAttributes(parentEvent);
783
784 while (reader.hasNext()) {
785 XMLEvent next = readNoWhitespace(reader);
786 if (isMyEndingElement(next, parentEvent)) {
787 state.removeCurrentAreas();
788 return result;
789 } else if (isStartingElement(next, VERNACULAR_NAME)) {
790 List<CommonTaxonName> names = makeSingleVernacularName(state, reader, next);
791 result.addAll(names);
792 } else if (isStartingElement(next, SUB_HEADING)) {
793 makeVernacularNamesSubHeading(state, reader, next);
794 } else {
795 handleUnexpectedElement(next);
796 }
797 }
798 throw new IllegalStateException("closing tag is missing");
799
800 }
801
802 private void makeVernacularNamesSubHeading(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
803 checkNoAttributes(parentEvent);
804
805 String text = "";
806 while (reader.hasNext()) {
807 XMLEvent next = readNoWhitespace(reader);
808 if (isMyEndingElement(next, parentEvent)) {
809 if (StringUtils.isNotBlank(text)){
810 NamedArea area = getCommonNameArea(text);
811 if (area != null){
812 state.removeCurrentAreas();
813 state.addCurrentArea(area);
814 }else{
815 fireWarningEvent("Vernacular subheading not recognized", next, 8);
816 }
817 }
818
819 return ;
820 } else if (next.isCharacters()) {
821 text += next.asCharacters().getData();
822 } else {
823 handleUnexpectedElement(next);
824 }
825 }
826 throw new IllegalStateException("closing tag is missing");
827
828 }
829
830 private NamedArea getCommonNameArea(String text) {
831 if (text.endsWith(":")){
832 text = text.substring(0, text.length()-1);
833 }
834
835 // for now we do it hardcoded
836 if (text.equalsIgnoreCase("Guyana")){
837 return Country.GUYANAREPUBLICOF();
838 }else if (text.equalsIgnoreCase("Suriname")){
839 return Country.SURINAMEREPUBLICOF();
840 }else if (text.equalsIgnoreCase("French Guiana")){
841 return Country.FRENCHGUIANA();
842 }
843 return null;
844 }
845
846 private List<CommonTaxonName> makeSingleVernacularName(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException{
847 checkNoAttributes(parentEvent);
848 List<CommonTaxonName> result = new ArrayList<CommonTaxonName>();
849
850 Language language = state.getDefaultLanguage();
851 while (reader.hasNext()) {
852 XMLEvent next = readNoWhitespace(reader);
853 if (isMyEndingElement(next, parentEvent)) {
854 for (CommonTaxonName commonName : result){
855 commonName.setLanguage(language);
856 }
857 // if (isNotBlank(name)){
858 // result.setName(name);
859 // }else{
860 // fireWarningEvent("No name string for common name", parentEvent, 4);
861 // }
862
863 return result;
864 } else if (isStartingElement(next, NAME)) {
865 //TODO test
866 CommonTaxonName name = handleVernacularNameName(state, reader, next);
867 if (name != null){
868 result.add(name);
869 }
870 } else if (isStartingElement(next, LOCAL_LANGUAGE)) {
871 Language localLanguage = handleLocalLanguage(state, reader, next);
872 if (localLanguage != null){
873 language = localLanguage;
874 }
875 } else if (isStartingElement(next, TRANSLATION)) {
876 //TODO
877 handleNotYetImplementedElement(next);
878 } else if (isStartingElement(next, LOCALITY)) {
879 //TODO
880 handleNotYetImplementedElement(next);
881 } else if (isStartingElement(next, ANNOTATION)){
882 //TODO
883 handleNotYetImplementedElement(next);
884 } else if (isStartingElement(next, FOOTNOTE_REF)) {
885 //TODO
886 handleNotYetImplementedElement(next);
887 } else if (next.isCharacters()) {
888 String chars = next.asCharacters().toString().trim();
889 if (chars.equals("(") || chars.equals(")") || chars.equals(",")){
890 //do nothing
891 }else{
892 fireWarningEvent("Character not handled in vernacular name: " + chars, next, 4);
893 }
894 } else {
895 handleUnexpectedElement(next);
896 }
897 }
898 throw new IllegalStateException("closing tag is missing");
899 }
900
901 private CommonTaxonName handleVernacularNameName(MarkupImportState state, XMLEventReader reader,
902 XMLEvent parentEvent) throws XMLStreamException {
903 //attributes
904 Map<String, Attribute> attributes = getAttributes(parentEvent);
905 this.checkAndRemoveAttributeValue(attributes, CLASS, "vernacular");
906 this.checkNoAttributes(attributes, parentEvent);
907
908 //
909 String text = getCData(state, reader, parentEvent, false);
910 CommonTaxonName name = CommonTaxonName.NewInstance(text, null);
911 if (! state.getCurrentAreas().isEmpty()){
912 if (state.getCurrentAreas().size() > 1){
913 fireWarningEvent("Multiple areas for common name not yet covered by CDM", parentEvent , 8);
914 }else{
915 name.setArea(state.getCurrentAreas().iterator().next());
916 }
917 }
918 return name;
919 }
920
921 private Language handleLocalLanguage(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
922 //attributes
923 Map<String, Attribute> attributes = getAttributes(parentEvent);
924 boolean doubtful = getAndRemoveBooleanAttributeValue(parentEvent, attributes, DOUBTFUL, false);
925 boolean unknown = getAndRemoveBooleanAttributeValue(parentEvent, attributes, UNKNOWN, false);
926 this.checkNoAttributes(attributes, parentEvent);
927
928 if (doubtful == true){
929 fireWarningEvent("Doubtful not yet implemented for local language", parentEvent, 2);
930 }
931 if (unknown == true){
932 fireWarningEvent("Unknown not yet implemented for local language ", parentEvent, 2);
933 }
934
935 //
936 String text = getCData(state, reader, parentEvent);
937 Language lang = makeLanguageByLangStr(state, text);
938 return lang;
939
940 }
941
942 private List<DescriptionElementBase> makeVernacular(MarkupImportState state, String subheading, String commonNameString) throws XMLStreamException {
943 List<DescriptionElementBase> result = new ArrayList<>();
944 Reference sourceReference = state.getConfig().getSourceReference();
945 String[] splits = commonNameString.split(",");
946 for (String split : splits){
947 split = split.trim();
948 if (! split.matches(".*\\(.*\\)\\.?")){
949 fireWarningEvent("Common name string '"+split+"' does not match given pattern", state.getReader().peek(), 4);
950 }
951
952 String name = split.replaceAll("\\(.*\\)", "").replace(".", "").trim();
953 String languageStr = split.replaceFirst(".*\\(", "").replaceAll("\\)\\.?", "").trim();
954
955 Language language = null;
956 if (StringUtils.isNotBlank(languageStr)){
957 language = makeLanguageByLangStr(state, languageStr);
958 }
959 DescriptionElementBase commonName;
960 if (name != null && name.length() < 255 ){
961 NamedArea area = null;
962 commonName = CommonTaxonName.NewInstance(name, language, area);
963 commonName.addPrimaryTaxonomicSource(sourceReference);
964 }else{
965 if (language == null){
966 language = getDefaultLanguage(state);
967 }
968 commonName = TextData.NewInstance(Feature.COMMON_NAME(), name, language, null);
969 commonName.addPrimaryTaxonomicSource(sourceReference);
970 String warning = "Vernacular feature is >255 size. Therefore it is handled as TextData, not CommonTaxonName: " + name;
971 fireWarningEvent(warning, state.getReader().peek(), 1);
972 }
973 result.add(commonName);
974 }
975
976 return result;
977 }
978
979 private Language makeLanguageByLangStr(MarkupImportState state, String languageStr) throws XMLStreamException {
980 try {
981 Language language = state.getTransformer().getLanguageByKey(languageStr);
982 if (language == null){
983 UUID langUuid = state.getTransformer().getLanguageUuid(languageStr);
984 TermVocabulary<?> voc = null;
985 language = getLanguage(state, langUuid, languageStr, languageStr, null, voc);
986 }
987 if (language == null){
988 String warning = "Language " + languageStr + " not recognized by transformer";
989 fireWarningEvent(warning, state.getReader().peek(), 4);
990 }
991 return language;
992 } catch (UndefinedTransformerMethodException e) {
993 throw new RuntimeException(e);
994 }
995 }
996
997
998 private String handleHeading(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
999 checkNoAttributes(parentEvent);
1000
1001 String text = "";
1002 while (reader.hasNext()) {
1003 XMLEvent next = readNoWhitespace(reader);
1004 if (isMyEndingElement(next, parentEvent)) {
1005 return text;
1006 } else if (next.isStartElement()) {
1007 if (isStartingElement(next, FOOTNOTE)) {
1008 handleNotYetImplementedElement(next);
1009 } else {
1010 handleUnexpectedStartElement(next.asStartElement());
1011 }
1012 } else if (next.isCharacters()) {
1013 text += next.asCharacters().getData();
1014 } else {
1015 handleUnexpectedEndElement(next.asEndElement());
1016 }
1017 }
1018 throw new IllegalStateException("<String> has no closing tag");
1019
1020 }
1021
1022
1023 private List<Reference> handleReferences(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1024 // attributes
1025 Map<String, Attribute> attributes = getAttributes(parentEvent);
1026 String bibliography = getAndRemoveAttributeValue(attributes,
1027 BIBLIOGRAPHY);
1028 String serialsAbbreviations = getAndRemoveAttributeValue(attributes,
1029 SERIALS_ABBREVIATIONS);
1030 if (isNotBlank(bibliography) || isNotBlank(serialsAbbreviations)) {
1031 String message = "Attributes not yet implemented for <references>";
1032 fireWarningEvent(message, parentEvent, 4);
1033 }
1034
1035 List<Reference> result = new ArrayList<>();
1036
1037 // elements
1038 while (reader.hasNext()) {
1039 XMLEvent next = readNoWhitespace(reader);
1040 if (next.isEndElement()) {
1041 if (isMyEndingElement(next, parentEvent)) {
1042 return result;
1043 } else {
1044 if (isEndingElement(next, HEADING)) {
1045 // NOT YET IMPLEMENTED
1046 popUnimplemented(next.asEndElement());
1047 } else if (isEndingElement(next, WRITER)) {
1048 // NOT YET IMPLEMENTED
1049 popUnimplemented(next.asEndElement());
1050 } else if (isEndingElement(next, FOOTNOTE)) {
1051 // NOT YET IMPLEMENTED
1052 popUnimplemented(next.asEndElement());
1053 } else if (isEndingElement(next, STRING)) {
1054 // NOT YET IMPLEMENTED
1055 popUnimplemented(next.asEndElement());
1056 } else if (isEndingElement(next, REF_NUM)) {
1057 // NOT YET IMPLEMENTED
1058 popUnimplemented(next.asEndElement());
1059 } else {
1060 handleUnexpectedEndElement(next.asEndElement());
1061 }
1062 }
1063 } else if (next.isStartElement()) {
1064 if (isStartingElement(next, HEADING)) {
1065 handleNotYetImplementedElement(next);
1066 } else if (isStartingElement(next, SUB_HEADING)) {
1067 String subheading = getCData(state, reader, next).trim();
1068 String excludePattern = "(i?)(References?|Literature):?";
1069 if (!subheading.matches(excludePattern)) {
1070 fireNotYetImplementedElement(next.getLocation(), next.asStartElement().getName(), 0);
1071 }
1072 } else if (isStartingElement(next, WRITER)) {
1073 handleNotYetImplementedElement(next);
1074 } else if (isStartingElement(next, FOOTNOTE)) {
1075 handleNotYetImplementedElement(next);
1076 } else if (isStartingElement(next, STRING)) {
1077 handleNotYetImplementedElement(next);
1078 } else if (isStartingElement(next, REF_NUM)) {
1079 handleNotYetImplementedElement(next);
1080 } else if (isStartingElement(next, REFERENCE)) {
1081 Reference ref = nomenclatureImport.handleReference(state, reader, next);
1082 result.add(ref);
1083 } else {
1084 handleUnexpectedStartElement(next);
1085 }
1086 } else {
1087 handleUnexpectedElement(next);
1088 }
1089 }
1090 throw new IllegalStateException("<References> has no closing tag");
1091 }
1092
1093
1094 /**
1095 * Returns all the included text and tags as string. The result should look
1096 * similar to the original xml part.
1097 */
1098 private String getTaggedCData(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1099 checkNoAttributes(parentEvent);
1100
1101 String text = getXmlTag(parentEvent);
1102 while (reader.hasNext()) {
1103 XMLEvent next = readNoWhitespace(reader);
1104 if (isMyEndingElement(next, parentEvent)) {
1105 text += getXmlTag(next);
1106 return text;
1107 } else if (next.isStartElement()) {
1108 text += getTaggedCData(state, reader, next);
1109 } else if (next.isEndElement()) {
1110 //is this needed?
1111 text += getTaggedCData(state, reader, next);
1112 } else if (next.isCharacters()) {
1113 text += next.asCharacters().getData();
1114 } else {
1115 handleUnexpectedEndElement(next.asEndElement());
1116 }
1117 }
1118 throw new IllegalStateException("Some tag has no closing tag");
1119 }
1120 }