2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.markup
;
12 import java
.util
.ArrayList
;
13 import java
.util
.HashSet
;
14 import java
.util
.List
;
17 import java
.util
.UUID
;
19 import javax
.xml
.stream
.XMLEventReader
;
20 import javax
.xml
.stream
.XMLStreamException
;
21 import javax
.xml
.stream
.events
.Attribute
;
22 import javax
.xml
.stream
.events
.XMLEvent
;
24 import org
.apache
.commons
.lang
.StringUtils
;
25 import org
.apache
.log4j
.Logger
;
27 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
28 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
29 import eu
.etaxonomy
.cdm
.model
.common
.AnnotatableEntity
;
30 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
31 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
32 import eu
.etaxonomy
.cdm
.model
.common
.IntextReference
;
33 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
34 import eu
.etaxonomy
.cdm
.model
.common
.LanguageString
;
35 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
36 import eu
.etaxonomy
.cdm
.model
.common
.TermVocabulary
;
37 import eu
.etaxonomy
.cdm
.model
.description
.CommonTaxonName
;
38 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
39 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
40 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
41 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
42 import eu
.etaxonomy
.cdm
.model
.location
.Country
;
43 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
44 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
45 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
46 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
53 public class MarkupFeatureImport
extends MarkupImportBase
{
54 @SuppressWarnings("unused")
55 private static final Logger logger
= Logger
.getLogger(MarkupFeatureImport
.class);
57 protected static final String MODS_TITLEINFO
= "titleInfo";
59 private final MarkupSpecimenImport specimenImport
;
60 private final MarkupNomenclatureImport nomenclatureImport
;
61 private final MarkupKeyImport keyImport
;
63 public MarkupFeatureImport(MarkupDocumentImport docImport
, MarkupSpecimenImport specimenImport
,
64 MarkupNomenclatureImport nomenclatureImport
, MarkupKeyImport keyImport
) {
66 this.specimenImport
= specimenImport
;
67 this.nomenclatureImport
= nomenclatureImport
;
68 this.keyImport
= keyImport
;
69 this.featureImport
= this;
72 public void handleFeature(MarkupImportState state
, XMLEventReader readerOrig
, XMLEvent parentEvent
) throws XMLStreamException
{
73 Map
<String
, Attribute
> attrs
= getAttributes(parentEvent
);
74 Boolean isFreetext
= getAndRemoveBooleanAttributeValue(parentEvent
, attrs
, IS_FREETEXT
, false);
75 String classValue
=getAndRemoveRequiredAttributeValue(parentEvent
, attrs
, CLASS
);
76 checkNoAttributes(attrs
, parentEvent
);
78 Reference sourceReference
= state
.getConfig().getSourceReference();
79 Feature feature
= makeFeature(classValue
, state
, parentEvent
, null);
80 Taxon taxon
= state
.getCurrentTaxon();
81 TaxonDescription taxonDescription
= getDefaultTaxonDescription(taxon
, NO_IMAGE_GALLERY
, CREATE_NEW
, sourceReference
);
82 if (!taxonDescription
.isDefault()){
83 taxonDescription
.setDefault(true);
85 // TextData figureHolderTextData = null; //for use with one TextData for
89 TaxonDescription structuredDescription
= null;
91 boolean isDescription
= feature
.equals(Feature
.DESCRIPTION());
93 XMLEventReader reader
;
95 LookAheadEventReader lookAhead
= new LookAheadEventReader(parentEvent
.asStartElement(), readerOrig
);
96 String descriptionText
= makeFullDescriptionText(lookAhead
.getCachedEvents(true));
97 TextData descriptionTextData
= TextData
.NewInstance(Feature
.DESCRIPTION(), descriptionText
, getDefaultLanguage(state
),null);
98 descriptionTextData
.addPrimaryTaxonomicSource(sourceReference
);
99 taxonDescription
.addElement(descriptionTextData
);
105 DescriptionElementBase lastDescriptionElement
= null;
107 CharOrder charOrder
= new CharOrder();
108 while (reader
.hasNext()) {
109 XMLEvent next
= readNoWhitespace(reader
);
110 if (isMyEndingElement(next
, parentEvent
)) {
111 state
.putFeatureToGeneralSorterList(feature
);
113 } else if (isEndingElement(next
, DISTRIBUTION_LIST
) || isEndingElement(next
, HABITAT_LIST
)) {
114 // only handle list elements
115 } else if (isStartingElement(next
, HEADING
)) {
116 makeFeatureHeading(state
, reader
, classValue
, feature
, next
);
117 } else if (isStartingElement(next
, WRITER
)) {
118 makeFeatureWriter(state
, reader
, feature
, taxon
, next
);
119 // } else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
120 // if (!feature.equals(Feature.DISTRIBUTION())) {
121 // String message = "Distribution locality only allowed for feature of type 'distribution'";
122 // fireWarningEvent(message, next, 4);
124 // handleDistributionLocality(state, reader, next);
125 } else if (isStartingElement(next
, DISTRIBUTION_LIST
) || isStartingElement(next
, HABITAT_LIST
)) {
126 // only handle single list elements
127 } else if (isStartingElement(next
, HABITAT
)) {
128 if (!(feature
.equals(Feature
.HABITAT())
129 || feature
.equals(Feature
.HABITAT_ECOLOGY())
130 || feature
.equals(Feature
.ECOLOGY()))) {
131 String message
= "Habitat only allowed for feature of type 'habitat','habitat ecology' or 'ecology'";
132 fireWarningEvent(message
, next
, 4);
134 String habitatString
= handleHabitat(state
, reader
, next
);
135 fireWarningEvent("Return value from habitat tag not yet handled: " + habitatString
, next
, 4);
136 } else if (isStartingElement(next
, CHAR
)) {
137 if (structuredDescription
== null){
138 MarkerType descriptionMarker
;
140 descriptionMarker
= getMarkerType(state
, state
.getTransformer().getMarkerTypeUuid("structured description"),
141 "Structured Descriptions", "Marker to mark descriptions used for more structured descriptions", null, null);
142 } catch (UndefinedTransformerMethodException e
) {
143 throw new RuntimeException(e
);
145 String title
= "Structured descriptive data for " + taxon
.getName().getTitleCache();
146 structuredDescription
= getMarkedTaxonDescription(taxon
, descriptionMarker
, NO_IMAGE_GALLERY
, CREATE_NEW
, state
.getConfig().getSourceReference(), title
);
148 List
<TextData
> textDataList
= handleChar(state
, reader
, next
, null, charOrder
);
149 charOrder
= charOrder
.next();
150 for (TextData textData
: textDataList
){
151 structuredDescription
.addElement(textData
);
153 } else if (isStartingElement(next
, STRING
)) {
154 lastDescriptionElement
= makeFeatureString(state
, reader
, feature
,
155 taxonDescription
, lastDescriptionElement
, next
, isFreetext
);
156 } else if (isStartingElement(next
, FIGURE_REF
)) {
157 lastDescriptionElement
= makeFeatureFigureRef(state
, reader
,
158 taxonDescription
, isDescription
, lastDescriptionElement
, sourceReference
, next
);
159 } else if (isStartingElement(next
, REFERENCES
)) {
160 fireWarningEvent("Check correct handling of feature references", next
, 4);
161 List
<Reference
> refs
= handleReferences(state
, reader
, next
);
162 if (!refs
.isEmpty()) {
164 Reference descriptionRef
= state
.getConfig().getSourceReference();
165 TaxonDescription description
= getDefaultTaxonDescription(taxon
, false, true, descriptionRef
);
166 TextData featurePlaceholder
= docImport
.getFeaturePlaceholder(state
, description
, feature
, true);
167 for (Reference citation
: refs
) {
168 featurePlaceholder
.addPrimaryTaxonomicSource(citation
);
171 String message
= "No reference found in references";
172 fireWarningEvent(message
, next
, 6);
174 } else if (isStartingElement(next
, NUM
)) {
176 handleNotYetImplementedElement(next
);
177 } else if (isStartingElement(next
, KEY
)) {
178 keyImport
.handleKey(state
, reader
, next
);
180 handleUnexpectedElement(next
);
183 throw new IllegalStateException("<Feature> has no closing tag");
188 * Creates a full description text from the mark
189 * @param cachedEvents
192 private String
makeFullDescriptionText(List
<XMLEvent
> events
) {
194 for (XMLEvent event
: events
){
195 String text
= normalize(event
.asCharacters().getData());
196 result
= CdmUtils
.concat(" ", result
, text
);
204 * @param taxonDescription
205 * @param isDescription
206 * @param lastDescriptionElement
209 * @throws XMLStreamException
211 public DescriptionElementBase
makeFeatureFigureRef(MarkupImportState state
, XMLEventReader reader
,TaxonDescription taxonDescription
,
212 boolean isDescription
, DescriptionElementBase lastDescriptionElement
, Reference sourceReference
, XMLEvent next
) throws XMLStreamException
{
213 FigureDataHolder figureHolder
= handleFigureRef(state
, reader
, next
);
214 Feature figureFeature
= getFeature(state
, MarkupTransformer
.uuidFigures
, "Figures", "Figures", "Fig.",null);
216 TextData figureHolderTextData
= null;
217 // if (figureHolderTextData == null){
218 figureHolderTextData
= TextData
.NewInstance(figureFeature
);
219 figureHolderTextData
.addPrimaryTaxonomicSource(sourceReference
);
221 if (StringUtils
.isNotBlank(figureHolder
.num
)) {
222 String annotationText
= "<num>" + figureHolder
.num
.trim() + "</num>";
223 Annotation annotation
= Annotation
.NewInstance(annotationText
, AnnotationType
.TECHNICAL(), getDefaultLanguage(state
));
224 figureHolderTextData
.addAnnotation(annotation
);
226 if (StringUtils
.isNotBlank(figureHolder
.figurePart
)) {
227 String annotationText
= "<figurePart>"+ figureHolder
.figurePart
.trim() + "</figurePart>";
228 Annotation annotation
= Annotation
.NewInstance(annotationText
,AnnotationType
.EDITORIAL(), getDefaultLanguage(state
));
229 figureHolderTextData
.addAnnotation(annotation
);
231 // if (StringUtils.isNotBlank(figureText)){
232 // figureHolderTextData.putText(language, figureText);
234 taxonDescription
.addElement(figureHolderTextData
);
236 registerFigureDemand(state
, next
, figureHolderTextData
, figureHolder
.ref
);
238 if (lastDescriptionElement
== null) {
239 String message
= "No description element created yet that can be referred by figure. Create new TextData instead";
240 fireWarningEvent(message
, next
, 4);
241 lastDescriptionElement
= TextData
.NewInstance(figureFeature
);
242 lastDescriptionElement
.addPrimaryTaxonomicSource(sourceReference
);
243 taxonDescription
.addElement(lastDescriptionElement
);
245 registerFigureDemand(state
, next
, lastDescriptionElement
, figureHolder
.ref
);
247 return lastDescriptionElement
;
254 * @param taxonDescription
255 * @param lastDescriptionElement
256 * @param distributionList
259 * @throws XMLStreamException
262 private DescriptionElementBase
makeFeatureString(MarkupImportState state
,XMLEventReader reader
, Feature feature
,
263 TaxonDescription taxonDescription
, DescriptionElementBase lastDescriptionElement
, XMLEvent next
, Boolean isFreetext
) throws XMLStreamException
{
266 if (feature
.equals(Feature
.SPECIMEN()) || feature
.equals(Feature
.MATERIALS_EXAMINED())
267 || feature
.getUuid().equals(MarkupTransformer
.uuidWoodSpecimens
)){
269 List
<DescriptionElementBase
> specimens
= specimenImport
.handleMaterialsExamined(state
, reader
, next
, feature
, taxonDescription
);
270 for (DescriptionElementBase specimen
: specimens
){
271 if (specimen
.getInDescription() == null){
272 taxonDescription
.addElement(specimen
);
274 lastDescriptionElement
= specimen
;
276 state
.setCurrentCollector(null);
278 return lastDescriptionElement
;
279 }else if (feature
.equals(Feature
.COMMON_NAME()) && (isFreetext
== null || !isFreetext
)){
280 List
<DescriptionElementBase
> commonNames
= makeCommonNameString(state
, reader
, next
);
281 //NOTE: we do also have the old version makeVernacular, which was called from "others" below
282 for (DescriptionElementBase commonName
: commonNames
){
283 taxonDescription
.addElement(commonName
);
284 lastDescriptionElement
= commonName
;
286 return lastDescriptionElement
;
291 Map
<String
, SubheadingResult
> subheadingMap
= handleString(state
, reader
, next
, feature
);
292 for (String subheading
: subheadingMap
.keySet()) {
293 Feature subheadingFeature
= feature
;
294 if (StringUtils
.isNotBlank(subheading
) && subheadingMap
.size() > 1) {
295 subheadingFeature
= makeFeature(subheading
, state
, next
, null);
297 if (feature
.equals(Feature
.COMMON_NAME()) && (isFreetext
== null || !isFreetext
)){
299 // List<DescriptionElementBase> commonNames = makeVernacular(state, subheading, subheadingMap.get(subheading));
300 // for (DescriptionElementBase commonName : commonNames){
301 // taxonDescription.addElement(commonName);
302 // lastDescriptionElement = commonName;
305 TextData textData
= TextData
.NewInstance(subheadingFeature
);
306 SubheadingResult subHeadingResult
= subheadingMap
.get(subheading
);
307 LanguageString languageString
= textData
.putText(getDefaultLanguage(state
), subHeadingResult
.text
);
308 if (isNotEmptyCollection(subHeadingResult
.references
.getReferences())){
309 for (LabeledReference reference
: subHeadingResult
.references
.getReferences()){
310 textData
.addPrimaryTaxonomicSource(reference
.ref
, reference
.detail
);
312 textData
.addImportSource(null, null, state
.getConfig().getSourceReference(), null);
314 textData
.addPrimaryTaxonomicSource(state
.getConfig().getSourceReference());
317 for (IntextReference intext
: subHeadingResult
.inlineReferences
){
318 languageString
.addIntextReference(intext
);
320 taxonDescription
.addElement(textData
);
321 lastDescriptionElement
= textData
;
322 // TODO how to handle figures when these data are split in
326 return lastDescriptionElement
;
334 * @param parentFeature
336 * @throws UndefinedTransformerMethodException
338 private Feature
makeFeature(String classValue
, MarkupImportState state
, XMLEvent parentEvent
, Feature parentFeature
) {
341 String featureText
= StringUtils
.capitalize(classValue
);
342 if (parentFeature
!= null){
343 featureText
= "<%s>" + featureText
;
344 featureText
= String
.format(featureText
, parentFeature
.getTitleCache());
345 classValue
= "<%s>" + classValue
;
346 classValue
= String
.format(classValue
, parentFeature
.getTitleCache());
350 //get existing feature
351 if (classValue
.endsWith(".")){
352 classValue
= classValue
.substring(0, classValue
.length() - 1);
354 Feature feature
= state
.getTransformer().getFeatureByKey(classValue
);
355 if (feature
!= null) {
358 uuid
= state
.getTransformer().getFeatureUuid(classValue
);
361 uuid
= state
.getUnknownFeatureUuid(classValue
);
366 String message
= "Uuid is not defined for '%s'";
367 message
= String
.format(message
, classValue
);
368 if (! message
.contains("<")){
369 //log only top level features
370 fireWarningEvent(message
, parentEvent
, 8);
372 uuid
= UUID
.randomUUID();
373 state
.putUnknownFeatureUuid(classValue
, uuid
);
376 // TODO eFlora vocabulary
377 TermVocabulary
<Feature
> voc
= null;
378 feature
= getFeature(state
, uuid
, featureText
, featureText
, classValue
, voc
);
379 if (parentFeature
!= null){
380 parentFeature
.addIncludes(feature
);
381 save(parentFeature
, state
);
383 save(feature
, state
);
385 if (feature
== null) {
386 throw new NullPointerException(classValue
+ " not recognized as a feature");
388 // state.putFeatureToCurrentList(feature);
390 } catch (Exception e
) {
391 String message
= "Could not create feature for %s: %s";
392 message
= String
.format(message
, classValue
, e
.getMessage());
393 fireWarningEvent(message
, parentEvent
, 4);
394 state
.putUnknownFeatureUuid(classValue
, null);
395 // e.printStackTrace();
396 return Feature
.UNKNOWN();
400 public class CharOrder
{
401 static final int strlength
= 3;
402 private int order
= 1;
403 private CharOrder parent
;
404 private final List
<CharOrder
> children
= new ArrayList
<CharOrder
>();
406 public CharOrder
nextChild(){
407 CharOrder result
= new CharOrder();
408 if (! children
.isEmpty()) {
409 result
.order
= children
.get(children
.size() - 1).order
+ 1;
411 result
.parent
= this;
412 children
.add(result
);
416 public CharOrder
next(){
417 CharOrder result
= new CharOrder();
418 result
.order
= order
+ 1;
419 result
.parent
= parent
;
421 parent
.children
.add(result
);
426 public String
orderString(){
427 String parentString
= parent
== null ?
"" : parent
.orderString();
428 String result
= CdmUtils
.concat("-", parentString
, StringUtils
.leftPad(String
.valueOf(order
), strlength
, '0'));
433 public String
toString(){
434 return orderString();
440 * Handle the char or subchar element. As
441 * @param state the import state
444 * @param parentFeature in case of subchars we need to attache the newly created feature to a parent feature, should be <code>null</code>
445 * for top level chars.
446 * @return List of TextData. Not a single one as the recursive TextData will also be returned
447 * @throws XMLStreamException
449 private List
<TextData
> handleChar(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, Feature parentFeature
, CharOrder myCharOrder
) throws XMLStreamException
{
450 List
<TextData
> result
= new ArrayList
<>();
451 String classValue
= getClassOnlyAttribute(parentEvent
);
452 Feature feature
= makeFeature(classValue
, state
, parentEvent
, parentFeature
);
453 if(parentFeature
== null){
454 state
.putFeatureToCharSorterList(feature
);
456 FeatureSorterInfo parentInfo
= state
.getLatestCharFeatureSorterInfo();
457 // if (! parentInfo.getUuid().equals(parentFeature.getUuid())){
458 // String message = "The parent char feature is not the same as the latest feature. This is the case for char hierarchies with > 2 levels, which is not yet handled by the import";
459 // fireWarningEvent(message, parentEvent, 6);
461 state
.getLatestCharFeatureSorterInfo().addSubFeature(new FeatureSorterInfo(feature
));
465 TextData textData
= TextData
.NewInstance(feature
);
466 textData
.addPrimaryTaxonomicSource(state
.getConfig().getSourceReference());
467 result
.add(textData
);
469 AnnotationType annType
= getAnnotationType(state
, MarkupTransformer
.uuidOriginalOrder
, "Original order", "Order in original treatment", null, AnnotationType
.TECHNICAL().getVocabulary());
470 textData
.addAnnotation(Annotation
.NewInstance(myCharOrder
.orderString(), annType
, Language
.ENGLISH()));
472 boolean isTextMode
= true;
474 while (reader
.hasNext()) {
475 XMLEvent next
= readNoWhitespace(reader
);
476 if (isMyEndingElement(next
, parentEvent
)) {
478 textData
.putText(getDefaultLanguage(state
), text
);
480 } else if (isStartingElement(next
, FIGURE_REF
)) {
482 handleNotYetImplementedElement(next
);
483 } else if (isStartingElement(next
, FOOTNOTE_REF
)) {
485 handleNotYetImplementedElement(next
);
486 } else if (isStartingElement(next
, BR
)) {
489 } else if (isEndingElement(next
, BR
)) {
491 } else if (isHtml(next
)) {
492 text
+= getXmlTag(next
);
493 } else if (next
.isStartElement()) {
494 if (isStartingElement(next
, ANNOTATION
)) {
495 handleNotYetImplementedElement(next
); //TODO test handleSimpleAnnotation
496 } else if (isStartingElement(next
, ITALICS
)) {
497 handleNotYetImplementedElement(next
);
498 } else if (isStartingElement(next
, BOLD
)) {
499 handleNotYetImplementedElement(next
);
500 } else if (isStartingElement(next
, FIGURE
)) {
501 handleFigure(state
, reader
, next
, specimenImport
, nomenclatureImport
);
502 } else if (isStartingElement(next
, SUB_CHAR
)) {
503 List
<TextData
> subTextData
= handleChar(state
, reader
, next
, feature
, myCharOrder
.nextChild());
504 result
.addAll(subTextData
);
505 } else if (isStartingElement(next
, FOOTNOTE
)) {
506 FootnoteDataHolder footnote
= handleFootnote(state
, reader
, next
, specimenImport
, nomenclatureImport
);
507 if (footnote
.isRef()) {
508 String message
= "Ref footnote not implemented here";
509 fireWarningEvent(message
, next
, 4);
511 registerGivenFootnote(state
, footnote
);
514 handleUnexpectedStartElement(next
.asStartElement());
516 } else if (next
.isCharacters()) {
518 String message
= "String is not in text mode";
519 fireWarningEvent(message
, next
, 6);
521 text
+= next
.asCharacters().getData();
524 handleUnexpectedEndElement(next
.asEndElement());
527 throw new IllegalStateException("RefPart has no closing tag");
537 * @throws XMLStreamException
539 private void makeFeatureHeading(MarkupImportState state
, XMLEventReader reader
, String classValue
, Feature feature
, XMLEvent next
) throws XMLStreamException
{
540 String heading
= handleHeading(state
, reader
, next
);
541 if (StringUtils
.isNotBlank(heading
)) {
542 if (!heading
.equalsIgnoreCase(classValue
)) {
544 if (!feature
.equals(state
.getTransformer().getFeatureByKey(heading
))) {
545 UUID headerFeatureUuid
= state
.getTransformer().getFeatureUuid(heading
);
546 if (!feature
.getUuid().equals(headerFeatureUuid
)) {
547 String message
= "Feature heading '%s' differs from feature class '%s' and can not be transformed to feature";
548 message
= String
.format(message
, heading
, classValue
);
549 fireWarningEvent(message
, next
, 1);
552 } catch (UndefinedTransformerMethodException e
) {
553 throw new RuntimeException(e
);
568 * @throws XMLStreamException
570 private void makeFeatureWriter(MarkupImportState state
,XMLEventReader reader
, Feature feature
, Taxon taxon
, XMLEvent next
) throws XMLStreamException
{
571 WriterDataHolder writer
= handleWriter(state
, reader
, next
);
572 if (isNotBlank(writer
.writer
)) {
574 Reference ref
= state
.getConfig().getSourceReference();
575 TaxonDescription description
= getDefaultTaxonDescription(taxon
, false, true, ref
);
576 TextData featurePlaceholder
= docImport
.getFeaturePlaceholder(state
,
577 description
, feature
, true);
578 featurePlaceholder
.addAnnotation(writer
.annotation
);
579 registerFootnotes(state
, featurePlaceholder
, writer
.footnotes
);
581 String message
= "Writer element is empty";
582 fireWarningEvent(message
, next
, 4);
587 protected String
handleHabitat(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
588 checkNoAttributes(parentEvent
);
589 Taxon taxon
= state
.getCurrentTaxon();
590 // TODO which ref to take?
591 Reference sourceReference
= state
.getConfig().getSourceReference();
594 boolean isTextMode
= true;
596 while (reader
.hasNext()) {
597 XMLEvent next
= readNoWhitespace(reader
);
598 if (isMyEndingElement(next
, parentEvent
)) {
599 Feature feature
= getFeature(
601 MarkupTransformer
.uuidExtractedHabitat
,
603 "An structured habitat that was extracted from a habitat text",
604 "extr. habit.", null);
605 TextData habitat
= TextData
.NewInstance(feature
);
606 habitat
.addPrimaryTaxonomicSource(sourceReference
);
607 habitat
.putText(getDefaultLanguage(state
), text
);
608 TaxonDescription description
= getExtractedMarkupMarkedDescription(state
, taxon
, sourceReference
);
610 description
.addElement(habitat
);
613 } else if (isStartingElement(next
, ALTITUDE
)) {
614 // OLD: text = text.trim() + getTaggedCData(state, reader, next);
615 text
+= handleAltitude(state
, reader
, next
);
616 } else if (isStartingElement(next
, LIFE_CYCLE_PERIODS
)) {
617 handleNotYetImplementedElement(next
);
618 } else if (next
.isCharacters()) {
620 String message
= "String is not in text mode";
621 fireWarningEvent(message
, next
, 6);
623 text
+= next
.asCharacters().getData();
625 } else if (isStartingElement(next
, BR
)) {
628 } else if (isEndingElement(next
, BR
)) {
630 } else if (isStartingElement(next
, REFERENCES
)) {
631 handleNotYetImplementedElement(next
);
632 } else if (isStartingElement(next
, FIGURE_REF
)) {
633 handleNotYetImplementedElement(next
);
635 String type
= next
.toString();
636 String location
= String
.valueOf(next
.getLocation().getLineNumber());
637 System
.out
.println("MarkupFeature.handleHabitat: Unexpected element in habitat: " + type
+ ": " + location
);
638 handleUnexpectedElement(next
);
641 throw new IllegalStateException("<Habitat> has no closing tag");
645 * Creates "Extracted factual data" with feature altitude and returns the original text as string
646 * to be used in parent element.
647 * @see #handleHabitat(MarkupImportState, XMLEventReader, XMLEvent)
649 private String
handleAltitude(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
650 checkNoAttributes(parentEvent
);
651 Taxon taxon
= state
.getCurrentTaxon();
652 // TODO which ref to take?
653 Reference sourceReference
= state
.getConfig().getSourceReference();
655 boolean isTextMode
= true;
657 while (reader
.hasNext()) {
658 XMLEvent next
= readNoWhitespace(reader
);
659 if (isMyEndingElement(next
, parentEvent
)) {
660 Feature feature
= getFeature(
662 MarkupTransformer
.uuidExtractedAltitude
,
663 "Extracted Altitude",
664 "An altitude that was extracted from a habitat text",
666 //TODO try to make quantitative data
667 TextData altitude
= TextData
.NewInstance(feature
);
668 altitude
.putText(getDefaultLanguage(state
), text
);
669 altitude
.addPrimaryTaxonomicSource(sourceReference
);
670 TaxonDescription description
= getExtractedMarkupMarkedDescription(state
, taxon
, sourceReference
);
672 description
.addElement(altitude
);
675 } else if (next
.isCharacters()) {
677 String message
= "String is not in text mode";
678 fireWarningEvent(message
, next
, 6);
680 text
+= next
.asCharacters().getData();
682 } else if (isStartingElement(next
, BR
)) {
685 } else if (isEndingElement(next
, BR
)) {
688 String type
= next
.toString();
689 String location
= String
.valueOf(next
.getLocation().getLineNumber());
690 System
.out
.println("MarkupFeatureImport.handleAltitude: Unexpected element in habitat: " + type
+ ": " + location
);
691 handleUnexpectedElement(next
);
694 throw new IllegalStateException("<Habitat> has no closing tag");
700 private FigureDataHolder
handleFigureRef(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
)
701 throws XMLStreamException
{
702 FigureDataHolder result
= new FigureDataHolder();
703 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
704 result
.ref
= getAndRemoveAttributeValue(attributes
, REF
);
705 checkNoAttributes(attributes
, parentEvent
);
707 // text is not handled, needed only for debugging purposes
709 while (reader
.hasNext()) {
710 XMLEvent next
= readNoWhitespace(reader
);
711 if (isMyEndingElement(next
, parentEvent
)) {
713 } else if (isStartingElement(next
, NUM
)) {
714 String num
= getCData(state
, reader
, next
);
715 result
.num
= num
; // num is not handled during import
716 } else if (isStartingElement(next
, FIGURE_PART
)) {
717 result
.figurePart
= getCData(state
, reader
, next
);
718 } else if (next
.isCharacters()) {
719 text
+= next
.asCharacters().getData();
721 fireUnexpectedEvent(next
, 0);
724 throw new IllegalStateException("<figureRef> has no end tag");
728 private void registerFigureDemand(MarkupImportState state
, XMLEvent next
, AnnotatableEntity entity
, String figureRef
) {
729 Media existingFigure
= state
.getFigure(figureRef
);
730 if (existingFigure
!= null) {
731 attachFigure(state
, next
, entity
, existingFigure
);
733 Set
<AnnotatableEntity
> demands
= state
.getFigureDemands(figureRef
);
734 if (demands
== null) {
735 demands
= new HashSet
<AnnotatableEntity
>();
736 state
.putFigureDemands(figureRef
, demands
);
742 private List
<DescriptionElementBase
> makeCommonNameString(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
744 List
<DescriptionElementBase
> result
= new ArrayList
<DescriptionElementBase
>();
746 checkNoAttributes(parentEvent
);
748 while (reader
.hasNext()) {
749 XMLEvent next
= readNoWhitespace(reader
);
750 if (isMyEndingElement(next
, parentEvent
)) {
751 if (result
.isEmpty()){
752 fireWarningEvent("Common name was not created", next
, 4);
755 } else if (isStartingElement(next
, VERNACULAR_NAMES
)) {
756 result
= makeVernacularNames(state
, reader
, next
);
757 } else if (isStartingElement(next
, SUB_HEADING
)) {
758 String subheading
= getCData(state
, reader
, next
);
759 if (! subheading
.matches("(Nom(s)? vernaculaire(s)?\\:|Vern.)")){
760 fireWarningEvent("Subheading for vernacular name not recognized: " + subheading
, next
, 4);
762 } else if (next
.isCharacters()) {
763 String chars
= next
.asCharacters().toString().trim();
764 if (chars
.equals(".")){
767 fireWarningEvent("Character not handled in vernacular name: " + chars
, next
, 4);
769 } else if (isStartingElement(next
, REFERENCES
)) {
770 handleNotYetImplementedElement(next
);
772 handleUnexpectedElement(next
);
775 throw new IllegalStateException("closing tag is missing");
780 private List
<DescriptionElementBase
> makeVernacularNames(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
781 List
<DescriptionElementBase
> result
= new ArrayList
<DescriptionElementBase
>();
782 checkNoAttributes(parentEvent
);
784 while (reader
.hasNext()) {
785 XMLEvent next
= readNoWhitespace(reader
);
786 if (isMyEndingElement(next
, parentEvent
)) {
787 state
.removeCurrentAreas();
789 } else if (isStartingElement(next
, VERNACULAR_NAME
)) {
790 List
<CommonTaxonName
> names
= makeSingleVernacularName(state
, reader
, next
);
791 result
.addAll(names
);
792 } else if (isStartingElement(next
, SUB_HEADING
)) {
793 makeVernacularNamesSubHeading(state
, reader
, next
);
795 handleUnexpectedElement(next
);
798 throw new IllegalStateException("closing tag is missing");
802 private void makeVernacularNamesSubHeading(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
803 checkNoAttributes(parentEvent
);
806 while (reader
.hasNext()) {
807 XMLEvent next
= readNoWhitespace(reader
);
808 if (isMyEndingElement(next
, parentEvent
)) {
809 if (StringUtils
.isNotBlank(text
)){
810 NamedArea area
= getCommonNameArea(text
);
812 state
.removeCurrentAreas();
813 state
.addCurrentArea(area
);
815 fireWarningEvent("Vernacular subheading not recognized", next
, 8);
820 } else if (next
.isCharacters()) {
821 text
+= next
.asCharacters().getData();
823 handleUnexpectedElement(next
);
826 throw new IllegalStateException("closing tag is missing");
830 private NamedArea
getCommonNameArea(String text
) {
831 if (text
.endsWith(":")){
832 text
= text
.substring(0, text
.length()-1);
835 // for now we do it hardcoded
836 if (text
.equalsIgnoreCase("Guyana")){
837 return Country
.GUYANAREPUBLICOF();
838 }else if (text
.equalsIgnoreCase("Suriname")){
839 return Country
.SURINAMEREPUBLICOF();
840 }else if (text
.equalsIgnoreCase("French Guiana")){
841 return Country
.FRENCHGUIANA();
846 private List
<CommonTaxonName
> makeSingleVernacularName(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
847 checkNoAttributes(parentEvent
);
848 List
<CommonTaxonName
> result
= new ArrayList
<CommonTaxonName
>();
850 Language language
= state
.getDefaultLanguage();
851 while (reader
.hasNext()) {
852 XMLEvent next
= readNoWhitespace(reader
);
853 if (isMyEndingElement(next
, parentEvent
)) {
854 for (CommonTaxonName commonName
: result
){
855 commonName
.setLanguage(language
);
857 // if (isNotBlank(name)){
858 // result.setName(name);
860 // fireWarningEvent("No name string for common name", parentEvent, 4);
864 } else if (isStartingElement(next
, NAME
)) {
866 CommonTaxonName name
= handleVernacularNameName(state
, reader
, next
);
870 } else if (isStartingElement(next
, LOCAL_LANGUAGE
)) {
871 Language localLanguage
= handleLocalLanguage(state
, reader
, next
);
872 if (localLanguage
!= null){
873 language
= localLanguage
;
875 } else if (isStartingElement(next
, TRANSLATION
)) {
877 handleNotYetImplementedElement(next
);
878 } else if (isStartingElement(next
, LOCALITY
)) {
880 handleNotYetImplementedElement(next
);
881 } else if (isStartingElement(next
, ANNOTATION
)){
883 handleNotYetImplementedElement(next
);
884 } else if (isStartingElement(next
, FOOTNOTE_REF
)) {
886 handleNotYetImplementedElement(next
);
887 } else if (next
.isCharacters()) {
888 String chars
= next
.asCharacters().toString().trim();
889 if (chars
.equals("(") || chars
.equals(")") || chars
.equals(",")){
892 fireWarningEvent("Character not handled in vernacular name: " + chars
, next
, 4);
895 handleUnexpectedElement(next
);
898 throw new IllegalStateException("closing tag is missing");
901 private CommonTaxonName
handleVernacularNameName(MarkupImportState state
, XMLEventReader reader
,
902 XMLEvent parentEvent
) throws XMLStreamException
{
904 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
905 this.checkAndRemoveAttributeValue(attributes
, CLASS
, "vernacular");
906 this.checkNoAttributes(attributes
, parentEvent
);
909 String text
= getCData(state
, reader
, parentEvent
, false);
910 CommonTaxonName name
= CommonTaxonName
.NewInstance(text
, null);
911 if (! state
.getCurrentAreas().isEmpty()){
912 if (state
.getCurrentAreas().size() > 1){
913 fireWarningEvent("Multiple areas for common name not yet covered by CDM", parentEvent
, 8);
915 name
.setArea(state
.getCurrentAreas().iterator().next());
921 private Language
handleLocalLanguage(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
923 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
924 boolean doubtful
= getAndRemoveBooleanAttributeValue(parentEvent
, attributes
, DOUBTFUL
, false);
925 boolean unknown
= getAndRemoveBooleanAttributeValue(parentEvent
, attributes
, UNKNOWN
, false);
926 this.checkNoAttributes(attributes
, parentEvent
);
928 if (doubtful
== true){
929 fireWarningEvent("Doubtful not yet implemented for local language", parentEvent
, 2);
931 if (unknown
== true){
932 fireWarningEvent("Unknown not yet implemented for local language ", parentEvent
, 2);
936 String text
= getCData(state
, reader
, parentEvent
);
937 Language lang
= makeLanguageByLangStr(state
, text
);
942 private List
<DescriptionElementBase
> makeVernacular(MarkupImportState state
, String subheading
, String commonNameString
) throws XMLStreamException
{
943 List
<DescriptionElementBase
> result
= new ArrayList
<>();
944 Reference sourceReference
= state
.getConfig().getSourceReference();
945 String
[] splits
= commonNameString
.split(",");
946 for (String split
: splits
){
947 split
= split
.trim();
948 if (! split
.matches(".*\\(.*\\)\\.?")){
949 fireWarningEvent("Common name string '"+split
+"' does not match given pattern", state
.getReader().peek(), 4);
952 String name
= split
.replaceAll("\\(.*\\)", "").replace(".", "").trim();
953 String languageStr
= split
.replaceFirst(".*\\(", "").replaceAll("\\)\\.?", "").trim();
955 Language language
= null;
956 if (StringUtils
.isNotBlank(languageStr
)){
957 language
= makeLanguageByLangStr(state
, languageStr
);
959 DescriptionElementBase commonName
;
960 if (name
!= null && name
.length() < 255 ){
961 NamedArea area
= null;
962 commonName
= CommonTaxonName
.NewInstance(name
, language
, area
);
963 commonName
.addPrimaryTaxonomicSource(sourceReference
);
965 if (language
== null){
966 language
= getDefaultLanguage(state
);
968 commonName
= TextData
.NewInstance(Feature
.COMMON_NAME(), name
, language
, null);
969 commonName
.addPrimaryTaxonomicSource(sourceReference
);
970 String warning
= "Vernacular feature is >255 size. Therefore it is handled as TextData, not CommonTaxonName: " + name
;
971 fireWarningEvent(warning
, state
.getReader().peek(), 1);
973 result
.add(commonName
);
979 private Language
makeLanguageByLangStr(MarkupImportState state
, String languageStr
) throws XMLStreamException
{
981 Language language
= state
.getTransformer().getLanguageByKey(languageStr
);
982 if (language
== null){
983 UUID langUuid
= state
.getTransformer().getLanguageUuid(languageStr
);
984 TermVocabulary
<?
> voc
= null;
985 language
= getLanguage(state
, langUuid
, languageStr
, languageStr
, null, voc
);
987 if (language
== null){
988 String warning
= "Language " + languageStr
+ " not recognized by transformer";
989 fireWarningEvent(warning
, state
.getReader().peek(), 4);
992 } catch (UndefinedTransformerMethodException e
) {
993 throw new RuntimeException(e
);
998 private String
handleHeading(MarkupImportState state
,XMLEventReader reader
, XMLEvent parentEvent
)throws XMLStreamException
{
999 checkNoAttributes(parentEvent
);
1002 while (reader
.hasNext()) {
1003 XMLEvent next
= readNoWhitespace(reader
);
1004 if (isMyEndingElement(next
, parentEvent
)) {
1006 } else if (next
.isStartElement()) {
1007 if (isStartingElement(next
, FOOTNOTE
)) {
1008 handleNotYetImplementedElement(next
);
1010 handleUnexpectedStartElement(next
.asStartElement());
1012 } else if (next
.isCharacters()) {
1013 text
+= next
.asCharacters().getData();
1015 handleUnexpectedEndElement(next
.asEndElement());
1018 throw new IllegalStateException("<String> has no closing tag");
1023 private List
<Reference
> handleReferences(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
1025 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
1026 String bibliography
= getAndRemoveAttributeValue(attributes
,
1028 String serialsAbbreviations
= getAndRemoveAttributeValue(attributes
,
1029 SERIALS_ABBREVIATIONS
);
1030 if (isNotBlank(bibliography
) || isNotBlank(serialsAbbreviations
)) {
1031 String message
= "Attributes not yet implemented for <references>";
1032 fireWarningEvent(message
, parentEvent
, 4);
1035 List
<Reference
> result
= new ArrayList
<>();
1038 while (reader
.hasNext()) {
1039 XMLEvent next
= readNoWhitespace(reader
);
1040 if (next
.isEndElement()) {
1041 if (isMyEndingElement(next
, parentEvent
)) {
1044 if (isEndingElement(next
, HEADING
)) {
1045 // NOT YET IMPLEMENTED
1046 popUnimplemented(next
.asEndElement());
1047 } else if (isEndingElement(next
, WRITER
)) {
1048 // NOT YET IMPLEMENTED
1049 popUnimplemented(next
.asEndElement());
1050 } else if (isEndingElement(next
, FOOTNOTE
)) {
1051 // NOT YET IMPLEMENTED
1052 popUnimplemented(next
.asEndElement());
1053 } else if (isEndingElement(next
, STRING
)) {
1054 // NOT YET IMPLEMENTED
1055 popUnimplemented(next
.asEndElement());
1056 } else if (isEndingElement(next
, REF_NUM
)) {
1057 // NOT YET IMPLEMENTED
1058 popUnimplemented(next
.asEndElement());
1060 handleUnexpectedEndElement(next
.asEndElement());
1063 } else if (next
.isStartElement()) {
1064 if (isStartingElement(next
, HEADING
)) {
1065 handleNotYetImplementedElement(next
);
1066 } else if (isStartingElement(next
, SUB_HEADING
)) {
1067 String subheading
= getCData(state
, reader
, next
).trim();
1068 String excludePattern
= "(i?)(References?|Literature):?";
1069 if (!subheading
.matches(excludePattern
)) {
1070 fireNotYetImplementedElement(next
.getLocation(), next
.asStartElement().getName(), 0);
1072 } else if (isStartingElement(next
, WRITER
)) {
1073 handleNotYetImplementedElement(next
);
1074 } else if (isStartingElement(next
, FOOTNOTE
)) {
1075 handleNotYetImplementedElement(next
);
1076 } else if (isStartingElement(next
, STRING
)) {
1077 handleNotYetImplementedElement(next
);
1078 } else if (isStartingElement(next
, REF_NUM
)) {
1079 handleNotYetImplementedElement(next
);
1080 } else if (isStartingElement(next
, REFERENCE
)) {
1081 Reference ref
= nomenclatureImport
.handleReference(state
, reader
, next
);
1084 handleUnexpectedStartElement(next
);
1087 handleUnexpectedElement(next
);
1090 throw new IllegalStateException("<References> has no closing tag");
1095 * Returns all the included text and tags as string. The result should look
1096 * similar to the original xml part.
1098 private String
getTaggedCData(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
1099 checkNoAttributes(parentEvent
);
1101 String text
= getXmlTag(parentEvent
);
1102 while (reader
.hasNext()) {
1103 XMLEvent next
= readNoWhitespace(reader
);
1104 if (isMyEndingElement(next
, parentEvent
)) {
1105 text
+= getXmlTag(next
);
1107 } else if (next
.isStartElement()) {
1108 text
+= getTaggedCData(state
, reader
, next
);
1109 } else if (next
.isEndElement()) {
1111 text
+= getTaggedCData(state
, reader
, next
);
1112 } else if (next
.isCharacters()) {
1113 text
+= next
.asCharacters().getData();
1115 handleUnexpectedEndElement(next
.asEndElement());
1118 throw new IllegalStateException("Some tag has no closing tag");