2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.markup
;
12 import java
.net
.MalformedURLException
;
13 import java
.util
.ArrayList
;
14 import java
.util
.Arrays
;
15 import java
.util
.Collection
;
16 import java
.util
.HashMap
;
17 import java
.util
.HashSet
;
18 import java
.util
.Iterator
;
19 import java
.util
.List
;
22 import java
.util
.Stack
;
23 import java
.util
.UUID
;
24 import java
.util
.regex
.Matcher
;
25 import java
.util
.regex
.Pattern
;
27 import javax
.xml
.namespace
.QName
;
28 import javax
.xml
.stream
.Location
;
29 import javax
.xml
.stream
.XMLEventReader
;
30 import javax
.xml
.stream
.XMLStreamConstants
;
31 import javax
.xml
.stream
.XMLStreamException
;
32 import javax
.xml
.stream
.events
.Attribute
;
33 import javax
.xml
.stream
.events
.Characters
;
34 import javax
.xml
.stream
.events
.EndElement
;
35 import javax
.xml
.stream
.events
.StartElement
;
36 import javax
.xml
.stream
.events
.XMLEvent
;
38 import org
.apache
.commons
.lang
.StringUtils
;
39 import org
.apache
.commons
.lang
.WordUtils
;
40 import org
.apache
.log4j
.Logger
;
42 import eu
.etaxonomy
.cdm
.api
.service
.IClassificationService
;
43 import eu
.etaxonomy
.cdm
.api
.service
.ITermService
;
44 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
45 import eu
.etaxonomy
.cdm
.ext
.geo
.GeoServiceArea
;
46 import eu
.etaxonomy
.cdm
.ext
.geo
.IEditGeoService
;
47 import eu
.etaxonomy
.cdm
.io
.common
.CdmImportBase
;
48 import eu
.etaxonomy
.cdm
.io
.common
.CdmImportBase
.TermMatchMode
;
49 import eu
.etaxonomy
.cdm
.io
.common
.events
.IIoEvent
;
50 import eu
.etaxonomy
.cdm
.io
.common
.events
.IoProblemEvent
;
51 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
52 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
53 import eu
.etaxonomy
.cdm
.model
.common
.AnnotatableEntity
;
54 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
55 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
56 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
57 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTerm
;
58 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
59 import eu
.etaxonomy
.cdm
.model
.common
.Extension
;
60 import eu
.etaxonomy
.cdm
.model
.common
.ExtensionType
;
61 import eu
.etaxonomy
.cdm
.model
.common
.IntextReference
;
62 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
63 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
64 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
65 import eu
.etaxonomy
.cdm
.model
.common
.TermVocabulary
;
66 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
67 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
68 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
69 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
70 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKey
;
71 import eu
.etaxonomy
.cdm
.model
.description
.PresenceAbsenceTerm
;
72 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
73 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
74 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
75 import eu
.etaxonomy
.cdm
.model
.location
.NamedAreaLevel
;
76 import eu
.etaxonomy
.cdm
.model
.location
.NamedAreaType
;
77 import eu
.etaxonomy
.cdm
.model
.media
.IdentifiableMediaEntity
;
78 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
79 import eu
.etaxonomy
.cdm
.model
.name
.INonViralName
;
80 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
81 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
82 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
83 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationBase
;
84 import eu
.etaxonomy
.cdm
.model
.reference
.IArticle
;
85 import eu
.etaxonomy
.cdm
.model
.reference
.IBook
;
86 import eu
.etaxonomy
.cdm
.model
.reference
.IBookSection
;
87 import eu
.etaxonomy
.cdm
.model
.reference
.IJournal
;
88 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
89 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
90 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceType
;
91 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
92 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
93 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
94 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
95 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
96 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
100 * @created 04.08.2008
102 public abstract class MarkupImportBase
{
103 private static final Logger logger
= Logger
.getLogger(MarkupImportBase
.class);
106 protected static final String ALTITUDE
= "altitude";
107 protected static final String ANNOTATION
= "annotation";
108 protected static final String BOLD
= "bold";
109 protected static final String BR
= "br";
110 protected static final String DOUBTFUL
= "doubtful";
111 protected static final String CITATION
= "citation";
112 protected static final String CLASS
= "class";
113 protected static final String COORDINATES
= "coordinates";
114 protected static final String DATES
= "dates";
115 protected static final String GATHERING
= "gathering";
116 protected static final String GATHERING_GROUP
= "gatheringGroup";
117 protected static final String GENUS_ABBREVIATION
= "genus abbreviation";
118 protected static final String FOOTNOTE
= "footnote";
119 protected static final String FOOTNOTE_REF
= "footnoteRef";
120 protected static final String FULL_NAME
= "fullName";
121 protected static final String ITALICS
= "italics";
122 protected static final String NUM
= "num";
123 protected static final String NOTES
= "notes";
124 protected static final String PUBLICATION
= "publication";
125 protected static final String SPECIMEN_TYPE
= "specimenType";
126 protected static final String STATUS
= "status";
127 protected static final String SUB_HEADING
= "subHeading";
128 protected static final String TYPE
= "type";
129 protected static final String TYPE_STATUS
= "typeStatus";
130 protected static final String UNKNOWN
= "unknown";
133 protected static final boolean CREATE_NEW
= true;
134 protected static final boolean NO_IMAGE_GALLERY
= false;
135 protected static final boolean IMAGE_GALLERY
= true;
137 protected static final String ADDENDA
= "addenda";
138 protected static final String BIBLIOGRAPHY
= "bibliography";
139 protected static final String BIOGRAPHIES
= "biographies";
140 protected static final String CHAR
= "char";
141 protected static final String DEDICATION
= "dedication";
142 protected static final String DEFAULT_MEDIA_URL
= "defaultMediaUrl";
143 protected static final String DISTRIBUTION_LIST
= "distributionList";
144 protected static final String DISTRIBUTION_LOCALITY
= "distributionLocality";
145 protected static final String FEATURE
= "feature";
146 protected static final String FIGURE
= "figure";
147 protected static final String FIGURE_LEGEND
= "figureLegend";
148 protected static final String FIGURE_PART
= "figurePart";
149 protected static final String FIGURE_REF
= "figureRef";
150 protected static final String FIGURE_TITLE
= "figureTitle";
151 protected static final String FOOTNOTE_STRING
= "footnoteString";
152 protected static final String FREQUENCY
= "frequency";
153 protected static final String HEADING
= "heading";
154 protected static final String HABITAT
= "habitat";
155 protected static final String HABITAT_LIST
= "habitatList";
156 protected static final String IS_FREETEXT
= "isFreetext";
157 protected static final String ID
= "id";
158 protected static final String KEY
= "key";
159 protected static final String LIFE_CYCLE_PERIODS
= "lifeCyclePeriods";
160 protected static final String META_DATA
= "metaData";
161 protected static final String MODS
= "mods";
163 protected static final String NOMENCLATURE
= "nomenclature";
164 protected static final String QUOTE
= "quote";
165 protected static final String RANK
= "rank";
166 protected static final String REF
= "ref";
167 protected static final String REF_NUM
= "refNum";
168 protected static final String REFERENCE
= "reference";
169 protected static final String REFERENCES
= "references";
170 protected static final String SUB_CHAR
= "subChar";
171 protected static final String TAXON
= "taxon";
172 protected static final String TAXONTITLE
= "taxontitle";
173 protected static final String TAXONTYPE
= "taxontype";
174 protected static final String TEXT_SECTION
= "textSection";
175 protected static final String TREATMENT
= "treatment";
176 protected static final String SERIALS_ABBREVIATIONS
= "serialsAbbreviations";
177 protected static final String STRING
= "string";
178 protected static final String URL
= "url";
179 protected static final String WRITER
= "writer";
181 protected static final String LOCALITY
= "locality";
186 protected static final String ACCEPTED
= "accepted";
187 protected static final String ACCEPTED_NAME
= "acceptedName";
188 protected static final String ALTERNATEPUBTITLE
= "alternatepubtitle";
189 protected static final String APPENDIX
= "appendix";
190 protected static final String AUTHOR
= "author";
191 protected static final String DETAILS
= "details";
192 protected static final String EDITION
= "edition";
193 protected static final String EDITORS
= "editors";
194 protected static final String HOMONYM
= "homonym";
195 protected static final String HOMOTYPES
= "homotypes";
196 protected static final String NOMENCLATURAL_NOTES
= "nomenclaturalNotes";
197 protected static final String INFRANK
= "infrank";
198 protected static final String INFRAUT
= "infraut";
199 protected static final String INFRPARAUT
= "infrparaut";
200 protected static final String ISSUE
= "issue";
201 protected static final String NAME_STATUS
= "namestatus";
202 protected static final String NAME
= "name";
203 protected static final String NAME_TYPE
= "nameType";
204 protected static final String NOM
= "nom";
205 protected static final String PAGES
= "pages";
206 protected static final String PARAUT
= "paraut";
207 protected static final String PUBFULLNAME
= "pubfullname";
208 protected static final String PUBLOCATION
= "publocation";
209 protected static final String PUBLISHER
= "publisher";
210 protected static final String PUBNAME
= "pubname";
211 protected static final String PUBTITLE
= "pubtitle";
212 protected static final String PUBTYPE
= "pubtype";
213 protected static final String REF_PART
= "refPart";
214 protected static final String SYNONYM
= "synonym";
215 protected static final String USAGE
= "usage";
216 protected static final String VOLUME
= "volume";
217 protected static final String YEAR
= "year";
221 protected static final String COUPLET
= "couplet";
222 protected static final String IS_SPOTCHARACTERS
= "isSpotcharacters";
223 protected static final String ONLY_NUMBERED_TAXA_EXIST
= "onlyNumberedTaxaExist";
224 protected static final String EXISTS
= "exists";
225 protected static final String KEYNOTES
= "keynotes";
226 protected static final String KEY_TITLE
= "keyTitle";
227 protected static final String QUESTION
= "question";
228 protected static final String TEXT
= "text";
229 protected static final String TO_COUPLET
= "toCouplet";
230 protected static final String TO_KEY
= "toKey";
231 protected static final String TO_TAXON
= "toTaxon";
235 protected static final String VERNACULAR_NAMES
= "vernacularNames";
236 protected static final String VERNACULAR_NAME
= "vernacularName";
237 protected static final String TRANSLATION
= "translation";
238 protected static final String LOCAL_LANGUAGE
= "localLanguage";
242 protected MarkupDocumentImport docImport
;
244 private final IEditGeoService editGeoService
;
245 protected MarkupFeatureImport featureImport
;
247 public MarkupImportBase(MarkupDocumentImport docImport
) {
249 this.docImport
= docImport
;
250 this.editGeoService
= docImport
.getEditGeoService();
253 private final Stack
<QName
> unhandledElements
= new Stack
<QName
>();
254 private final Stack
<QName
> handledElements
= new Stack
<QName
>();
257 protected <T
extends CdmBase
> void save(Collection
<T
> collection
, MarkupImportState state
) {
258 if (state
.isCheck() || collection
.isEmpty()){
261 T example
= collection
.iterator().next();
262 if (example
.isInstanceOf(TaxonBase
.class)){
263 Collection
<TaxonBase
> typedCollection
= (Collection
<TaxonBase
>)collection
;
264 docImport
.getTaxonService().saveOrUpdate(typedCollection
);
265 }else if (example
.isInstanceOf(Classification
.class)){
266 Collection
<Classification
> typedCollection
= (Collection
<Classification
>)collection
;
267 docImport
.getClassificationService().saveOrUpdate(typedCollection
);
268 }else if (example
.isInstanceOf(PolytomousKey
.class)){
269 Collection
<PolytomousKey
> typedCollection
= (Collection
<PolytomousKey
>)collection
;
270 docImport
.getPolytomousKeyService().saveOrUpdate(typedCollection
);
271 }else if (example
.isInstanceOf(DefinedTermBase
.class)){
272 Collection
<DefinedTermBase
> typedCollection
= (Collection
<DefinedTermBase
>)collection
;
273 getTermService().saveOrUpdate(typedCollection
);
279 //TODO move to service layer for all IdentifiableEntities
280 protected void save(CdmBase cdmBase
, MarkupImportState state
) {
281 if (state
.isCheck()){
284 cdmBase
= CdmBase
.deproxy(cdmBase
, CdmBase
.class);
285 if (cdmBase
== null){
286 String message
= "Tried to save a null object.";
287 fireWarningEvent(message
, "--location ?? --", 6,1);
288 } else if (cdmBase
.isInstanceOf(TaxonBase
.class)){
289 docImport
.getTaxonService().saveOrUpdate((TaxonBase
<?
>)cdmBase
);
290 }else if (cdmBase
.isInstanceOf(Classification
.class)){
291 docImport
.getClassificationService().saveOrUpdate((Classification
)cdmBase
);
292 }else if (cdmBase
.isInstanceOf(PolytomousKey
.class)){
293 docImport
.getPolytomousKeyService().saveOrUpdate((PolytomousKey
)cdmBase
);
294 }else if (cdmBase
.isInstanceOf(DefinedTermBase
.class)){
295 docImport
.getTermService().saveOrUpdate((DefinedTermBase
<?
>)cdmBase
);
296 }else if (cdmBase
.isInstanceOf(Media
.class)){
297 docImport
.getMediaService().saveOrUpdate((Media
)cdmBase
);
298 }else if (cdmBase
.isInstanceOf(SpecimenOrObservationBase
.class)){
299 docImport
.getOccurrenceService().saveOrUpdate((SpecimenOrObservationBase
<?
>)cdmBase
);
300 }else if (cdmBase
.isInstanceOf(DescriptionElementBase
.class)){
301 docImport
.getDescriptionService().saveDescriptionElement((DescriptionElementBase
)cdmBase
);
302 }else if (cdmBase
.isInstanceOf(Reference
.class)){
303 docImport
.getReferenceService().saveOrUpdate((Reference
)cdmBase
);
305 String message
= "Unknown cdmBase type to save: " + cdmBase
.getClass();
306 fireWarningEvent(message
, "Unknown location", 8);
308 //logger.warn("Saved " + cdmBase);
312 protected ITermService
getTermService() {
313 return docImport
.getTermService();
316 protected IClassificationService
getClassificationService() {
317 return docImport
.getClassificationService();
320 //*********************** Attribute methods *************************************/
323 * Returns a map for all attributes of an start element
327 protected Map
<String
, Attribute
> getAttributes(XMLEvent event
) {
328 Map
<String
, Attribute
> result
= new HashMap
<>();
329 if (!event
.isStartElement()){
330 fireWarningEvent("Event is not an startElement. Can't check attributes", makeLocationStr(event
.getLocation()), 1, 1);
333 StartElement element
= event
.asStartElement();
334 @SuppressWarnings("unchecked")
335 Iterator
<Attribute
> attributes
= element
.getAttributes();
336 while (attributes
.hasNext()){
337 Attribute attribute
= attributes
.next();
339 result
.put(attribute
.getName().getLocalPart(), attribute
);
345 * Throws an unexpected attributes event if the event has any attributes.
348 protected void checkNoAttributes(Map
<String
, Attribute
> attributes
, XMLEvent event
) {
349 String
[] exceptions
= new String
[]{};
350 handleUnexpectedAttributes(event
.getLocation(), attributes
, 1, exceptions
);
356 * Throws an unexpected attributes event if the event has any attributes.
359 protected void checkNoAttributes(XMLEvent event
) {
360 String
[] exceptions
= new String
[]{};
361 checkNoAttributes(event
, 1, exceptions
);
365 * Throws an unexpected attributes event if the event has any attributes except those mentioned in "exceptions".
369 protected void checkNoAttributes(XMLEvent event
, int stackDepth
, String
... exceptions
) {
370 if (! event
.isStartElement()){
371 fireWarningEvent("Event is not an startElement. Can't check attributes", makeLocationStr(event
.getLocation()), 1, 1);
374 StartElement startElement
= event
.asStartElement();
375 Map
<String
, Attribute
> attributes
= getAttributes(startElement
);
376 handleUnexpectedAttributes(startElement
.getLocation(), attributes
, stackDepth
+1, exceptions
);
381 * Checks if the given attribute exists and has the given value.
382 * If yes, true is returned and the attribute is removed from the attributes map.
383 * Otherwise false is returned.
387 * @return <code>true</code> if attribute has given value, <code>false</code> otherwise
389 protected boolean checkAndRemoveAttributeValue( Map
<String
, Attribute
> attributes
, String attrName
, String value
) {
390 Attribute attr
= attributes
.get(attrName
);
391 if (attr
== null ||value
== null ){
394 if (value
.equals(attr
.getValue())){
395 attributes
.remove(attrName
);
405 * Returns the value of a given attribute name and removes the attribute from the attributes map.
406 * Returns <code>null</code> if attribute does not exist.
407 * @param attributes the list of all attributes
408 * @param attrName the requested attribute name
409 * @return the value for the attribute
411 protected String
getAndRemoveAttributeValue(Map
<String
, Attribute
> attributes
, String attrName
) {
412 return getAndRemoveAttributeValue(null, attributes
, attrName
, false, 1);
416 * Returns the value of a boolean attribute with the given name and removes the attribute from the attributes map.
417 * Returns <code>defaultValue</code> if the attribute does not exist. ALso returns <code>defaultValue</code> and throws a warning if the
418 * attribute has no boolean value (true, false).
420 * @param attributes the
421 * @param attrName the name of the attribute
422 * @param defaultValue the default value to return if attribute does not exist or can not be defined
425 protected Boolean
getAndRemoveBooleanAttributeValue(XMLEvent event
, Map
<String
, Attribute
> attributes
, String attrName
, Boolean defaultValue
) {
426 String value
= getAndRemoveAttributeValue(null, attributes
, attrName
, false, 1);
427 Boolean result
= defaultValue
;
429 if (value
.equalsIgnoreCase("true")){
431 }else if (value
.equalsIgnoreCase("false")){
434 String message
= "Boolean attribute has no boolean value ('true', 'false') but '%s'";
435 fireWarningEvent(String
.format(message
, value
), makeLocationStr(event
.getLocation()), 6, 1);
443 * Returns the value of a given attribute name and returns the attribute from the attributes map.
444 * Fires a mandatory field is missing event if the attribute does not exist.
450 protected String
getAndRemoveRequiredAttributeValue(XMLEvent xmlEvent
, Map
<String
, Attribute
> attributes
, String attrName
) {
451 return getAndRemoveAttributeValue(xmlEvent
, attributes
, attrName
, true, 1);
455 * Returns the value of a given attribute name and returns the attribute from the attributes map.
456 * If required is <code>true</code> and the attribute does not exist a mandatory field is missing event is fired.
463 private String
getAndRemoveAttributeValue(XMLEvent xmlEvent
, Map
<String
, Attribute
> attributes
, String attrName
, boolean isRequired
, int stackDepth
) {
464 Attribute attr
= attributes
.get(attrName
);
467 fireMandatoryElementIsMissing(xmlEvent
, attrName
, 8, stackDepth
+1);
471 attributes
.remove(attrName
);
472 return attr
.getValue();
477 * Fires an not yet implemented event if the given attribute exists in attributes.
482 protected void handleNotYetImplementedAttribute(Map
<String
, Attribute
> attributes
,
483 String attrName
, XMLEvent event
) {
484 Attribute attr
= attributes
.get(attrName
);
486 attributes
.remove(attrName
);
487 QName qName
= attr
.getName();
488 fireNotYetImplementedAttribute(event
.getLocation(), qName
, attr
.getValue(), 1);
493 * Fires an unhandled attributes event, if attributes exist in attributes map not covered by the exceptions.
494 * No event is fired if the unhandled elements stack is not empty.
499 protected void handleUnexpectedAttributes(Location location
,Map
<String
, Attribute
> attributes
, String
... exceptions
) {
500 handleUnexpectedAttributes(location
, attributes
, 1, exceptions
);
504 * see {@link #handleUnexpectedAttributes(Location, Map, String...)}
508 * @param stackDepth the stack trace depth
511 private void handleUnexpectedAttributes(Location location
,Map
<String
, Attribute
> attributes
, int stackDepth
, String
... exceptions
) {
512 if (attributes
.size() > 0){
513 if (this.unhandledElements
.size() == 0 ){
514 boolean hasUnhandledAttributes
= false;
515 for (String key
: attributes
.keySet()){
516 boolean isException
= false;
517 for (String exception
: exceptions
){
518 if(key
.equals(exception
)){
523 hasUnhandledAttributes
= true;
526 if (hasUnhandledAttributes
){
527 fireUnexpectedAttributes(location
, attributes
, stackDepth
+1);
534 private void fireUnexpectedAttributes(Location location
, Map
<String
, Attribute
> attributes
, int stackDepth
) {
535 String attributesString
= "";
536 for (String key
: attributes
.keySet()){
537 Attribute attribute
= attributes
.get(key
);
538 attributesString
= CdmUtils
.concat(",", attributesString
, attribute
.getName().getLocalPart() + ":" + attribute
.getValue());
540 String message
= "Unexpected attributes: %s";
541 IoProblemEvent event
= makeProblemEvent(location
, String
.format(message
, attributesString
), 1 , stackDepth
+1 );
546 protected void fireUnexpectedAttributeValue(XMLEvent parentEvent
, String attrName
, String attrValue
) {
547 String message
= "Unexpected attribute value %s='%s'";
548 message
= String
.format(message
, attrName
, attrValue
);
549 IoProblemEvent event
= makeProblemEvent(parentEvent
.getLocation(), message
, 1 , 1 );
553 protected void handleNotYetImplementedAttributeValue(XMLEvent xmlEvent
, String attrName
, String attrValue
) {
554 String message
= "Attribute %s not yet implemented for value '%s'";
555 message
= String
.format(message
, attrName
, attrValue
);
556 IIoEvent event
= makeProblemEvent(xmlEvent
.getLocation(), message
, 1, 1 );
560 protected void fireNotYetImplementedAttribute(Location location
, QName qName
,
561 String value
, int stackDepth
) {
562 String message
= "Attribute not yet implemented: %s (%s)";
563 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, qName
.getLocalPart(), value
), 1, stackDepth
+1 );
568 protected void fireUnexpectedEvent(XMLEvent xmlEvent
, int stackDepth
) {
569 Location location
= xmlEvent
.getLocation();
570 String message
= "Unexpected event: %s";
571 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, xmlEvent
.toString()), 2, stackDepth
+1);
575 protected void fireUnexpectedStartElement(Location location
, StartElement startElement
, int stackDepth
) {
576 QName qName
= startElement
.getName();
577 String message
= "Unexpected start element: %s";
578 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, qName
.getLocalPart()), 2, stackDepth
+1);
583 protected void fireUnexpectedEndElement(Location location
, EndElement endElement
, int stackDepth
) {
584 QName qName
= endElement
.getName();
585 String message
= "Unexpected end element: %s";
586 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, qName
.getLocalPart()), 16, stackDepth
+1);
590 protected void fireNotYetImplementedElement(Location location
, QName qName
, int stackDepth
) {
591 String message
= "Element not yet implemented: %s";
592 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, qName
.getLocalPart()), 1, stackDepth
+1 );
596 protected void fireNotYetImplementedCharacters(Location location
, Characters chars
, int stackDepth
) {
597 String message
= "Characters not yet handled: %s";
598 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, chars
.getData()), 1, stackDepth
+1 );
603 * Creates a problem event.
604 * Be aware of the right depths of the stack trace !
610 private IoProblemEvent
makeProblemEvent(Location location
, String message
, int severity
, int stackDepth
) {
612 StackTraceElement
[] stackTrace
= new Exception().getStackTrace();
613 int lineNumber
= stackTrace
[stackDepth
].getLineNumber();
614 String methodName
= stackTrace
[stackDepth
].getMethodName();
615 String locationStr
= makeLocationStr(location
);
616 String className
= stackTrace
[stackDepth
].getClassName();
617 Class
<?
> declaringClass
;
619 declaringClass
= Class
.forName(className
);
620 } catch (ClassNotFoundException e
) {
621 declaringClass
= this.getClass();
623 IoProblemEvent event
= IoProblemEvent
.NewInstance(declaringClass
, message
,
624 locationStr
, lineNumber
, severity
, methodName
);
629 * Creates a string from a location
633 protected String
makeLocationStr(Location location
) {
634 String locationStr
= location
== null ?
" - no location - " : "l." + location
.getLineNumber() + "/c."+ location
.getColumnNumber();
640 * Fires an unexpected element event if the unhandled elements stack is empty.
641 * Otherwise adds the element to the stack.
644 protected void handleUnexpectedStartElement(XMLEvent event
) {
645 handleUnexpectedStartElement(event
, 1);
649 * Fires an unexpected element event if the unhandled elements stack is empty.
650 * Otherwise adds the element to the stack.
653 protected void handleUnexpectedStartElement(XMLEvent event
, int stackDepth
) {
654 QName qName
= event
.asStartElement().getName();
655 if (! unhandledElements
.empty()){
656 unhandledElements
.push(qName
);
658 fireUnexpectedStartElement(event
.getLocation(), event
.asStartElement(), stackDepth
+ 1);
663 protected void handleUnexpectedEndElement(EndElement event
) {
664 handleUnexpectedEndElement(event
, 1);
668 * Fires an unexpected element event if the event is not the last on the stack.
669 * Otherwise removes last stack element.
672 protected void handleUnexpectedEndElement(EndElement event
, int stackDepth
) {
673 QName qName
= event
.asEndElement().getName();
674 if (!unhandledElements
.isEmpty() && unhandledElements
.peek().equals(qName
)){
675 unhandledElements
.pop();
677 fireUnexpectedEndElement(event
.getLocation(), event
.asEndElement(), stackDepth
+ 1);
685 protected void popUnimplemented(EndElement endElement
) {
686 QName qName
= endElement
.asEndElement().getName();
687 if (unhandledElements
.peek().equals(qName
)){
688 unhandledElements
.pop();
690 String message
= "End element is not last on stack: %s";
691 message
= String
.format(message
, qName
.getLocalPart());
692 IIoEvent event
= makeProblemEvent(endElement
.getLocation(), message
, 16, 1);
700 * Fires an unexpected element event if the unhandled element stack is empty.
703 protected void handleUnexpectedElement(XMLEvent event
) {
704 if (event
.isStartElement()){
705 handleUnexpectedStartElement(event
, 2);
706 }else if (event
.isEndElement()){
707 handleUnexpectedEndElement(event
.asEndElement(), 2);
708 }else if (event
.getEventType() == XMLStreamConstants
.COMMENT
){
710 }else if (! unhandledElements
.empty()){
713 fireUnexpectedEvent(event
, 1);
718 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
721 protected void handleNotYetImplementedCharacters(XMLEvent event
) {
722 Characters chars
= event
.asCharacters();
723 fireNotYetImplementedCharacters(event
.getLocation(), chars
, 1);
727 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
730 protected void handleNotYetImplementedElement(XMLEvent event
) {
731 QName qName
= event
.asStartElement().getName();
732 boolean isTopLevel
= unhandledElements
.isEmpty();
733 unhandledElements
.push(qName
);
735 fireNotYetImplementedElement(event
.getLocation(), qName
, 1);
740 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
743 protected void handleIgnoreElement(XMLEvent event
) {
744 QName qName
= event
.asStartElement().getName();
745 unhandledElements
.push(qName
);
748 protected void handleAmbigousManually(MarkupImportState state
,
749 XMLEventReader reader
, StartElement startElement
) {
750 QName qName
= startElement
.getName();
751 unhandledElements
.push(qName
);
753 "Handle manually: " + qName
.getLocalPart() + " is ambigous and should therefore be handled manually",
754 makeLocationStr(startElement
.getLocation()), 2, 2);
758 * Checks if a mandatory text is not empty or null.
759 * Returns true if text is given.
760 * Fires an mandatory element is missing event otherwise and returns <code>null</code>.
765 protected boolean checkMandatoryText(String text
, XMLEvent parentEvent
) {
766 if (! StringUtils
.isNotBlank(text
)){
767 fireMandatoryElementIsMissing(parentEvent
, "CData", 4, 1);
774 * Fires an mandatory element is missing event if exists is <code>false</code>.
775 * @param hasMandatory
779 protected void checkMandatoryElement(boolean exists
, StartElement parentEvent
, String attrName
) {
781 fireMandatoryElementIsMissing(parentEvent
, attrName
, 5, 1);
787 * Fires an element is missing event.
792 * @throws IllegalStateException if xmlEvent is not a StartElement and not an Attribute
794 private void fireMandatoryElementIsMissing(XMLEvent xmlEvent
, String missingEventName
, int severity
, int stackDepth
) throws IllegalStateException
{
795 Location location
= xmlEvent
.getLocation();
798 if (xmlEvent
.isAttribute()){
799 Attribute attribute
= ((Attribute
)xmlEvent
);
800 typeName
= "attribute";
801 qName
= attribute
.getName();
802 }else if (xmlEvent
.isStartElement()){
803 typeName
= "element";
804 qName
= xmlEvent
.asStartElement().getName();
806 throw new IllegalStateException("mandatory element only allowed for attributes and start tags in " + makeLocationStr(location
));
808 String message
= "Mandatory %s '%s' is missing in %s";
809 message
= String
.format(message
, typeName
, missingEventName
, qName
.getLocalPart());
810 IIoEvent event
= makeProblemEvent(location
, message
, severity
, stackDepth
+1);
818 * Returns <code>true</code> if the "next" event is the ending tag for the "parent" event.
819 * @param next end element to test, must not be null
820 * @param parentEvent start element to test
821 * @return true if the "next" event is the ending tag for the "parent" event.
822 * @throws XMLStreamException
824 protected boolean isMyEndingElement(XMLEvent next
, XMLEvent parentEvent
) throws XMLStreamException
{
825 if (! parentEvent
.isStartElement()){
826 String message
= "Parent event should be start tag";
827 fireWarningEvent(message
, makeLocationStr(next
.getLocation()), 6);
830 return isEndingElement(next
, parentEvent
.asStartElement().getName().getLocalPart());
834 * Trims the text and removes turns all whitespaces into single empty space.
838 protected String
normalize(String text
) {
839 text
= StringUtils
.trimToEmpty(text
);
840 text
= text
.replaceAll("\\s+", " ");
847 * Removes whitespaces at beginning and end and makes the first letter
848 * a capital letter and all other letters small letters.
852 protected String
toFirstCapital(String value
) {
853 if (StringUtils
.isBlank(value
)){
857 value
= value
.trim();
858 result
+= value
.trim().substring(0,1).toUpperCase();
859 if (value
.length()>1){
860 result
+= value
.substring(1).toLowerCase();
867 * Currently not used.
869 * @param allowedNumberOfCharacters
870 * @param onlyFirstCapital
873 protected boolean isAbbreviation(String str
, int allowedNumberOfCharacters
, boolean onlyFirstCapital
){
878 if (! str
.endsWith(".")){
881 str
= str
.substring(0, str
.length() -1);
882 if (str
.length() > allowedNumberOfCharacters
){
885 final String re
= "^\\p{javaUpperCase}\\p{javaLowerCase}*$";
886 if (str
.matches(re
)){
894 * Checks if <code>abbrev</code> is the short form for the genus name (strGenusName).
895 * Usually this is the case if <code>abbrev</code> is the first letter (optional with ".")
896 * of strGenusName. But in older floras it may also be the first 2 or 3 letters (optional with dot).
897 * However, we allow only a maximum of 2 letters to be anambigous. In cases with 3 letters better
898 * change the original markup data.
900 * @param strGenusName
903 protected boolean isGenusAbbrev(String abbrev
, String strGenusName
) {
904 if (! abbrev
.matches("[A-Z][a-z]?\\.?")) {
906 }else if (abbrev
.length() == 0 || strGenusName
== null || strGenusName
.length() == 0){
909 abbrev
= abbrev
.replace(".", "");
910 return strGenusName
.startsWith(abbrev
);
911 // boolean result = true;
912 // for (int i = 0 ; i < abbrev.length(); i++){
913 // result &= ( abbrev.charAt(i) == strGenusName.charAt(i));
921 * Checks if all words in the given string start with a capital letter but do not have any further capital letter.
922 * @param word the string to be checekd. Usually should be a single word.
923 * @return true if the above is the case, false otherwise
925 protected boolean isFirstCapitalWord(String word
) {
926 if (WordUtils
.capitalizeFully(word
).equals(word
)){
928 }else if (WordUtils
.capitalizeFully(word
,new char[]{'-'}).equals(word
)){
929 //for words like Le-Testui (which is a species epithet)
938 * Read next event. Ignore whitespace events.
941 * @throws XMLStreamException
943 protected XMLEvent
readNoWhitespace(XMLEventReader reader
) throws XMLStreamException
{
944 XMLEvent event
= reader
.nextEvent();
945 while (!unhandledElements
.isEmpty()){
946 if (event
.isStartElement()){
947 handleNotYetImplementedElement(event
);
948 }else if (event
.isEndElement()){
949 popUnimplemented(event
.asEndElement());
951 event
= reader
.nextEvent();
953 while (event
.isCharacters() && event
.asCharacters().isWhiteSpace()){
954 event
= reader
.nextEvent();
960 * Returns the REQUIRED "class" attribute for a given event and checks that it is the only attribute.
964 protected String
getClassOnlyAttribute(XMLEvent parentEvent
) {
965 return getClassOnlyAttribute(parentEvent
, true);
970 * Returns the "class" attribute for a given event and checks that it is the only attribute.
974 protected String
getClassOnlyAttribute(XMLEvent parentEvent
, boolean required
) {
975 return getOnlyAttribute(parentEvent
, CLASS
, required
);
979 * Returns the value for the only attribute for a given event and checks that it is the only attribute.
983 protected String
getOnlyAttribute(XMLEvent parentEvent
, String attrName
, boolean required
) {
984 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
985 String classValue
=getAndRemoveAttributeValue(parentEvent
, attributes
, attrName
, required
, 1);
986 checkNoAttributes(attributes
, parentEvent
);
991 protected void fireWarningEvent(String message
, String locationStr
, Integer severity
, Integer depth
) {
992 docImport
.fireWarningEvent(message
, locationStr
, severity
, depth
);
995 protected void fireWarningEvent(String message
, XMLEvent event
, Integer severity
) {
996 docImport
.fireWarningEvent(message
, makeLocationStr(event
.getLocation()), severity
, 1);
999 protected void fireSchemaConflictEventExpectedStartTag(String elName
, XMLEventReader reader
) throws XMLStreamException
{
1000 docImport
.fireSchemaConflictEventExpectedStartTag(elName
, reader
);
1004 protected void fireWarningEvent(String message
, String locationStr
, int severity
) {
1005 docImport
.fireWarningEvent(message
, locationStr
, severity
, 1);
1008 protected void fire(IIoEvent event
) {
1009 docImport
.fire(event
);
1012 protected boolean isNotBlank(String str
){
1013 return StringUtils
.isNotBlank(str
);
1016 protected boolean isBlank(String str
){
1017 return StringUtils
.isBlank(str
);
1020 protected TaxonDescription
getTaxonDescription(Taxon taxon
, Reference ref
, boolean isImageGallery
, boolean createNewIfNotExists
) {
1021 return docImport
.getTaxonDescription(taxon
, ref
, isImageGallery
, createNewIfNotExists
);
1024 protected TaxonDescription
getDefaultTaxonDescription(Taxon taxon
, boolean isImageGallery
, boolean createNewIfNotExists
, Reference source
) {
1025 return docImport
.getDefaultTaxonDescription(taxon
, isImageGallery
, createNewIfNotExists
, source
);
1029 * Returns the taxon description with marked as <code>true</code> with the given marker type.
1030 * If createNewIfNotExists a new description is created if it does not yet exist.
1031 * For the new description the source and the title are set if not <code>null</code>.
1034 * @param isImageGallery
1035 * @param createNewIfNotExists
1038 * @return the existing or new taxon description
1040 protected TaxonDescription
getMarkedTaxonDescription(Taxon taxon
, MarkerType markerType
, boolean isImageGallery
, boolean createNewIfNotExists
, Reference source
, String title
) {
1041 return docImport
.getMarkedTaxonDescription(taxon
, markerType
, isImageGallery
, createNewIfNotExists
, source
, title
);
1046 * Returns the default language defined in the state. If no default language is defined in the state,
1047 * the CDM default language is returned.
1051 protected Language
getDefaultLanguage(MarkupImportState state
) {
1052 Language result
= state
.getDefaultLanguage();
1053 if (result
== null){
1054 result
= Language
.DEFAULT();
1060 //*********************** FROM XML IMPORT BASE ****************************************
1061 protected boolean isEndingElement(XMLEvent event
, String elName
) throws XMLStreamException
{
1062 return docImport
.isEndingElement(event
, elName
);
1065 protected boolean isStartingElement(XMLEvent event
, String elName
) throws XMLStreamException
{
1066 return docImport
.isStartingElement(event
, elName
);
1070 protected void fillMissingEpithetsForTaxa(Taxon parentTaxon
, Taxon childTaxon
) {
1071 docImport
.fillMissingEpithetsForTaxa(parentTaxon
, childTaxon
);
1074 protected Feature
getFeature(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<Feature
> voc
){
1075 return docImport
.getFeature(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1078 protected PresenceAbsenceTerm
getPresenceAbsenceTerm(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, boolean isAbsenceTerm
, TermVocabulary
<PresenceAbsenceTerm
> voc
){
1079 return docImport
.getPresenceTerm(state
, uuid
, label
, text
, labelAbbrev
, isAbsenceTerm
, voc
);
1082 protected ExtensionType
getExtensionType(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
){
1083 return docImport
.getExtensionType(state
, uuid
, label
, text
, labelAbbrev
);
1086 protected DefinedTerm
getIdentifierType(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<DefinedTerm
> voc
){
1087 return docImport
.getIdentifierType(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1090 protected AnnotationType
getAnnotationType(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<AnnotationType
> voc
){
1091 return docImport
.getAnnotationType(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1094 protected MarkerType
getMarkerType(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<MarkerType
> voc
){
1095 return docImport
.getMarkerType(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1098 protected NamedAreaLevel
getNamedAreaLevel(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<NamedAreaLevel
> voc
){
1099 return docImport
.getNamedAreaLevel(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1102 protected NamedArea
getNamedArea(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, NamedAreaType areaType
, NamedAreaLevel level
, TermVocabulary voc
, TermMatchMode matchMode
){
1103 return docImport
.getNamedArea(state
, uuid
, label
, text
, labelAbbrev
, areaType
, level
, voc
, matchMode
);
1106 protected Language
getLanguage(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<?
> voc
){
1107 return docImport
.getLanguage(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1110 // *************************************** Concrete methods **********************************************/
1119 protected Rank
makeRank(MarkupImportState state
, String value
, boolean byAbbrev
) {
1121 if (StringUtils
.isBlank(value
)) {
1125 boolean useUnknown
= true;
1126 NomenclaturalCode nc
= makeNomenclaturalCode(state
);
1127 if (value
.equals(GENUS_ABBREVIATION
)){
1128 rank
= Rank
.GENUS();
1129 }else if (byAbbrev
) {
1130 rank
= Rank
.getRankByIdInVoc(value
.toLowerCase(), nc
, useUnknown
);
1131 if (value
.equalsIgnoreCase("forma")){
1133 }else if (value
.toLowerCase().matches("(sub)?(section|genus|series|tribe)")){
1134 return Rank
.getRankByEnglishName(value
, nc
, useUnknown
);
1135 }else if (value
.equals("§")){
1136 return Rank
.SECTION_BOTANY(); //Special case in Flora Malesiana
1139 rank
= Rank
.getRankByEnglishName(value
, nc
, useUnknown
);
1141 if (rank
.equals(Rank
.UNKNOWN_RANK())) {
1144 if (rank
== null && "sous-genre".equalsIgnoreCase(value
)){
1145 rank
= Rank
.SUBGENUS();
1147 } catch (UnknownCdmTypeException e
) {
1153 NonViralNameParserImpl parser
= NonViralNameParserImpl
.NewInstance();
1154 protected TeamOrPersonBase
<?
> createAuthor(MarkupImportState state
, String authorTitle
) {
1155 TeamOrPersonBase
<?
> result
= parser
.author(authorTitle
);
1156 return state
.getDeduplicationHelper(docImport
).getExistingAuthor(state
, result
);
1159 protected String
getAndRemoveMapKey(Map
<String
, String
> map
, String key
) {
1160 String result
= map
.get(key
);
1162 if (result
!= null) {
1163 result
= normalize(result
);
1165 return StringUtils
.stripToNull(result
);
1170 * Creates a {@link NonViralName} object depending on the defined {@link NomenclaturalCode}
1171 * and the given parameters.
1176 protected INonViralName
createNameByCode(MarkupImportState state
, Rank rank
) {
1177 NomenclaturalCode nc
= makeNomenclaturalCode(state
);
1178 INonViralName name
= nc
.getNewTaxonNameInstance(rank
);
1182 protected void handleFullName(MarkupImportState state
, XMLEventReader reader
,
1183 INonViralName name
, XMLEvent event
) throws XMLStreamException
{
1185 Map
<String
, Attribute
> attrs
= getAttributes(event
);
1186 String rankStr
= getAndRemoveRequiredAttributeValue(event
, attrs
, "rank");
1187 String hybridClass
= getAndRemoveAttributeValue(attrs
, "hybridClass");
1189 Rank rank
= makeRank(state
, rankStr
, false);
1192 String message
= "Rank was computed as null. This must not be.";
1193 fireWarningEvent(message
, event
, 6);
1194 name
.setRank(Rank
.UNKNOWN_RANK());
1196 if (!attrs
.isEmpty()) {
1197 handleUnexpectedAttributes(event
.getLocation(), attrs
);
1199 fullNameStr
= getCData(state
, reader
, event
, false);
1200 NonViralNameParserImpl
.NewInstance().parseFullName(name
, fullNameStr
, rank
, false);
1201 if (hybridClass
!= null ){
1202 if ("hybrid formula".equals(hybridClass
)){
1203 if (!name
.isHybridFormula()){
1204 fireWarningEvent("Hybrid formula is not set though requested: " + fullNameStr
, event
, 4);
1206 }else if ("hybrid".equals(hybridClass
)){
1207 if (!name
.isHybridName()){
1208 fireWarningEvent("Hybrid name is recognized: " + fullNameStr
, event
, 4);
1211 handleNotYetImplementedAttributeValue(event
, "hybridClass", hybridClass
);
1218 * Returns the {@link NomenclaturalCode} for this import. Default is {@link NomenclaturalCode#ICBN} if
1219 * no code is defined.
1223 protected NomenclaturalCode
makeNomenclaturalCode(MarkupImportState state
) {
1224 NomenclaturalCode nc
= state
.getConfig().getNomenclaturalCode();
1226 nc
= NomenclaturalCode
.ICNAFP
; // default;
1234 * @param levelString
1238 protected NamedAreaLevel
makeNamedAreaLevel(MarkupImportState state
, String levelString
, XMLEvent next
) {
1239 NamedAreaLevel level
;
1241 level
= state
.getTransformer().getNamedAreaLevelByKey(levelString
);
1242 if (level
== null) {
1243 UUID levelUuid
= state
.getTransformer().getNamedAreaLevelUuid(levelString
);
1244 if (levelUuid
== null) {
1245 String message
= "Unknown distribution locality class (named area level): %s. Create new level instead.";
1246 message
= String
.format(message
, levelString
);
1247 fireWarningEvent(message
, next
, 6);
1249 level
= getNamedAreaLevel(state
, levelUuid
, levelString
, levelString
, levelString
, null);
1251 } catch (UndefinedTransformerMethodException e
) {
1252 throw new RuntimeException(e
);
1264 protected NamedArea
makeArea(MarkupImportState state
, String areaName
, NamedAreaLevel level
) {
1266 //TODO FM vocabulary
1267 TermVocabulary
<NamedArea
> voc
= null;
1268 NamedAreaType areaType
= null;
1270 NamedArea area
= null;
1272 area
= state
.getTransformer().getNamedAreaByKey(areaName
);
1273 } catch (UndefinedTransformerMethodException e
) {
1274 throw new RuntimeException(e
);
1277 boolean isNewInState
= false;
1278 UUID uuid
= state
.getAreaUuid(areaName
);
1280 isNewInState
= true;
1282 uuid
= state
.getTransformer().getNamedAreaUuid(areaName
);
1284 uuid
= UUID
.randomUUID();
1285 state
.putAreaUuid(areaName
, uuid
);
1287 } catch (UndefinedTransformerMethodException e
) {
1288 throw new RuntimeException(e
);
1292 CdmImportBase
.TermMatchMode matchMode
= CdmImportBase
.TermMatchMode
.UUID_LABEL
;
1293 area
= getNamedArea(state
, uuid
, areaName
, areaName
, areaName
, areaType
, level
, voc
, matchMode
);
1295 state
.putAreaUuid(areaName
, area
.getUuid());
1297 //TODO just for testing -> make generic and move to better place
1298 String geoServiceLayer
="vmap0_as_bnd_political_boundary_a";
1299 String layerFieldName
="nam";
1301 if ("Bangka".equals(areaName
)){
1302 String areaValue
= "PULAU BANGKA#SUMATERA SELATAN";
1303 GeoServiceArea geoServiceArea
= new GeoServiceArea();
1304 geoServiceArea
.add(geoServiceLayer
, layerFieldName
, areaValue
);
1305 this.editGeoService
.setMapping(area
, geoServiceArea
);
1306 // save(area, state);
1308 if ("Luzon".equals(areaName
)){
1309 GeoServiceArea geoServiceArea
= new GeoServiceArea();
1311 List
<String
> list
= Arrays
.asList("HERMANA MAYOR ISLAND#CENTRAL LUZON",
1312 "HERMANA MENOR ISLAND#CENTRAL LUZON",
1314 for (String areaValue
: list
){
1315 geoServiceArea
.add(geoServiceLayer
, layerFieldName
, areaValue
);
1318 this.editGeoService
.setMapping(area
, geoServiceArea
);
1319 // save(area, state);
1321 if ("Mindanao".equals(areaName
)){
1322 GeoServiceArea geoServiceArea
= new GeoServiceArea();
1324 List
<String
> list
= Arrays
.asList("NORTHERN MINDANAO",
1325 "SOUTHERN MINDANAO",
1326 "WESTERN MINDANAO");
1327 //TODO to be continued
1328 for (String areaValue
: list
){
1329 geoServiceArea
.add(geoServiceLayer
, layerFieldName
, areaValue
);
1332 this.editGeoService
.setMapping(area
, geoServiceArea
);
1333 // save(area, state);
1335 if ("Palawan".equals(areaName
)){
1336 GeoServiceArea geoServiceArea
= new GeoServiceArea();
1338 List
<String
> list
= Arrays
.asList("PALAWAN#SOUTHERN TAGALOG");
1339 for (String areaValue
: list
){
1340 geoServiceArea
.add(geoServiceLayer
, layerFieldName
, areaValue
);
1343 this.editGeoService
.setMapping(area
, geoServiceArea
);
1344 // save(area, state);
1355 * Reads character data. Any element other than character data or the ending
1356 * tag will fire an unexpected element event.
1358 * @see #getCData(MarkupImportState, XMLEventReader, XMLEvent, boolean)
1363 * @throws XMLStreamException
1365 protected String
getCData(MarkupImportState state
, XMLEventReader reader
, XMLEvent next
) throws XMLStreamException
{
1366 return getCData(state
, reader
, next
, true);
1370 * Reads character data. Any element other than character data or the ending
1371 * tag will fire an unexpected element event.
1376 * @param inlineMarkup map for inline markup, this is used for e.g. the locality markup within a subheading
1377 * The map will be filled by the markup element name as key. The value may be a String, a CdmBase or any other object.
1378 * If null any markup text will be neglected but a warning will be fired if they exist.
1379 * @param removeInlineMarkupText if true the markedup text will be removed from the returned String
1380 * @param checkAttributes
1382 * @throws XMLStreamException
1384 protected String
getCData(MarkupImportState state
, XMLEventReader reader
, XMLEvent parent
, /*Map<String, Object> inlineMarkup, *boolean removeInlineMarkupText,*/ boolean checkAttributes
) throws XMLStreamException
{
1385 if (checkAttributes
){
1386 checkNoAttributes(parent
);
1390 while (reader
.hasNext()) {
1391 XMLEvent next
= readNoWhitespace(reader
);
1392 if (isMyEndingElement(next
, parent
)) {
1394 } else if (next
.isCharacters()) {
1395 text
+= next
.asCharacters().getData();
1396 } else if (isStartingElement(next
, FOOTNOTE_REF
)){
1397 handleNotYetImplementedElement(next
);
1398 // } else if (isStartingElement(next, LOCALITY)){
1399 // handleCDataLocality(state, reader, parent);
1401 handleUnexpectedElement(next
);
1404 throw new IllegalStateException("Event has no closing tag");
1408 // private void handleCDataLocality(MarkupImportState state, XMLEventReader reader, XMLEvent parent) {
1409 // checkAndRemoveAttributeValue(attributes, attrName, value)
1416 * For it returns a pure CData annotation string. This behaviour may change in future. More complex annotations
1417 * should be handled differently.
1420 * @param parentEvent
1422 * @throws XMLStreamException
1424 protected String
handleSimpleAnnotation(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
1425 String annotation
= getCData(state
, reader
, parentEvent
);
1430 * True if text is single "." oder "," or ";" or ":"
1434 protected boolean isPunctuation(String text
) {
1435 return text
== null ?
false : text
.trim().matches("^[\\.,;:]$");
1440 * Text indicating that type information is following but no information about the type of the type
1444 protected boolean charIsSimpleType(String text
) {
1445 return text
.matches("(?i)Type:");
1448 protected String
getXmlTag(XMLEvent event
) {
1450 if (event
.isStartElement()) {
1451 result
= "<" + event
.asStartElement().getName().getLocalPart()
1453 } else if (event
.isEndElement()) {
1454 result
= "</" + event
.asEndElement().getName().getLocalPart() + ">";
1456 String message
= "Only start or end elements are allowed as Html tags";
1457 throw new IllegalStateException(message
);
1462 protected WriterDataHolder
handleWriter(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
1464 checkNoAttributes(parentEvent
);
1465 WriterDataHolder dataHolder
= new WriterDataHolder();
1466 List
<FootnoteDataHolder
> footnotes
= new ArrayList
<>();
1468 // TODO handle attributes
1469 while (reader
.hasNext()) {
1470 XMLEvent next
= readNoWhitespace(reader
);
1471 if (isMyEndingElement(next
, parentEvent
)) {
1472 text
= CdmUtils
.removeBrackets(text
);
1473 if (checkMandatoryText(text
, parentEvent
)) {
1474 text
= normalize(text
);
1475 dataHolder
.writer
= text
;
1476 dataHolder
.footnotes
= footnotes
;
1479 UUID uuidWriterExtension
= MarkupTransformer
.uuidWriterExtension
;
1480 ExtensionType writerExtensionType
=
1481 this.getExtensionType(state
, uuidWriterExtension
,"Writer", "writer", "writer");
1482 Extension extension
= Extension
.NewInstance();
1483 extension
.setType(writerExtensionType
);
1484 extension
.setValue(text
);
1485 dataHolder
.extension
= extension
;
1488 UUID uuidWriterAnnotation
= MarkupTransformer
.uuidWriterAnnotation
;
1489 AnnotationType writerAnnotationType
= this.getAnnotationType(state
, uuidWriterAnnotation
, "Writer", "writer", "writer", null);
1490 Annotation annotation
= Annotation
.NewInstance(text
, writerAnnotationType
, getDefaultLanguage(state
));
1491 dataHolder
.annotation
= annotation
;
1497 } else if (isStartingElement(next
, FOOTNOTE_REF
)) {
1498 FootnoteDataHolder footNote
= handleFootnoteRef(state
, reader
, next
);
1499 if (footNote
.isRef()) {
1500 footnotes
.add(footNote
);
1502 logger
.warn("Non ref footnotes not yet impelemnted");
1504 } else if (next
.isCharacters()) {
1505 text
+= next
.asCharacters().getData();
1508 handleUnexpectedElement(next
);
1509 state
.setUnsuccessfull();
1512 throw new IllegalStateException("<writer> has no end tag");
1516 protected void registerFootnotes(MarkupImportState state
, AnnotatableEntity entity
, List
<FootnoteDataHolder
> footnotes
) {
1517 for (FootnoteDataHolder footNote
: footnotes
) {
1518 registerFootnoteDemand(state
, entity
, footNote
);
1523 private void registerFootnoteDemand(MarkupImportState state
, AnnotatableEntity entity
, FootnoteDataHolder footnote
) {
1524 FootnoteDataHolder existingFootnote
= state
.getFootnote(footnote
.ref
);
1525 if (existingFootnote
!= null) {
1526 attachFootnote(state
, entity
, existingFootnote
);
1528 Set
<AnnotatableEntity
> demands
= state
.getFootnoteDemands(footnote
.ref
);
1529 if (demands
== null) {
1530 demands
= new HashSet
<>();
1531 state
.putFootnoteDemands(footnote
.ref
, demands
);
1533 demands
.add(entity
);
1538 protected void attachFootnote(MarkupImportState state
, AnnotatableEntity entity
, FootnoteDataHolder footnote
) {
1539 AnnotationType annotationType
= this.getAnnotationType(state
, MarkupTransformer
.uuidFootnote
, "Footnote", "An e-flora footnote", "fn", null);
1540 Annotation annotation
= Annotation
.NewInstance(footnote
.string
, annotationType
, getDefaultLanguage(state
));
1541 // TODO transient objects
1542 entity
.addAnnotation(annotation
);
1543 save(entity
, state
);
1547 protected void attachFigure(MarkupImportState state
, XMLEvent next
, AnnotatableEntity entity
, Media figure
) {
1548 // IdentifiableEntity<?> toSave;
1549 if (entity
.isInstanceOf(TextData
.class)) {
1550 TextData deb
= CdmBase
.deproxy(entity
, TextData
.class);
1551 deb
.addMedia(figure
);
1552 // toSave = ((TaxonDescription)deb.getInDescription()).getTaxon();
1553 } else if (entity
.isInstanceOf(SpecimenOrObservationBase
.class)) {
1554 String message
= "figures for specimen should be handled as Textdata";
1555 fireWarningEvent(message
, next
, 4);
1557 } else if (entity
.isInstanceOf(IdentifiableMediaEntity
.class)) {
1558 IdentifiableMediaEntity
<?
> ime
= CdmBase
.deproxy(entity
, IdentifiableMediaEntity
.class);
1559 ime
.addMedia(figure
);
1562 String message
= "Unsupported entity to attach media: %s";
1563 message
= String
.format(message
, entity
.getClass().getName());
1566 save(entity
, state
);
1570 protected void registerGivenFootnote(MarkupImportState state
, FootnoteDataHolder footnote
) {
1571 state
.registerFootnote(footnote
);
1572 Set
<AnnotatableEntity
> demands
= state
.getFootnoteDemands(footnote
.id
);
1573 if (demands
!= null) {
1574 for (AnnotatableEntity entity
: demands
) {
1575 attachFootnote(state
, entity
, footnote
);
1581 protected FootnoteDataHolder
handleFootnote(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
,
1582 MarkupSpecimenImport specimenImport
, MarkupNomenclatureImport nomenclatureImport
) throws XMLStreamException
{
1583 FootnoteDataHolder result
= new FootnoteDataHolder();
1584 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
1585 result
.id
= getAndRemoveAttributeValue(attributes
, ID
);
1586 // result.ref = getAndRemoveAttributeValue(attributes, REF);
1587 checkNoAttributes(attributes
, parentEvent
);
1589 while (reader
.hasNext()) {
1590 XMLEvent next
= readNoWhitespace(reader
);
1591 if (isStartingElement(next
, FOOTNOTE_STRING
)) {
1592 String string
= handleFootnoteString(state
, reader
, next
, specimenImport
, nomenclatureImport
);
1593 result
.string
= string
;
1594 } else if (isMyEndingElement(next
, parentEvent
)) {
1597 fireUnexpectedEvent(next
, 0);
1604 protected Media
handleFigure(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
,
1605 MarkupSpecimenImport specimenImport
, MarkupNomenclatureImport nomenclatureImport
) throws XMLStreamException
{
1606 // FigureDataHolder result = new FigureDataHolder();
1608 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
1609 String id
= getAndRemoveAttributeValue(attributes
, ID
);
1610 String type
= getAndRemoveAttributeValue(attributes
, TYPE
);
1611 String urlAttr
= getAndRemoveAttributeValue(attributes
, URL
);
1612 checkNoAttributes(attributes
, parentEvent
);
1614 String urlString
= null;
1615 String legendString
= null;
1616 String titleString
= null;
1617 String numString
= null;
1619 if (isNotBlank(urlAttr
)){
1620 urlString
= CdmUtils
.Nz(state
.getBaseMediaUrl()) + urlAttr
;
1622 while (reader
.hasNext()) {
1623 XMLEvent next
= readNoWhitespace(reader
);
1624 if (isMyEndingElement(next
, parentEvent
)) {
1625 if (isNotBlank(text
)){
1626 if (isNeglectableFigureText(text
)){
1627 fireWarningEvent("Text not yet handled for figures: " + text
, next
, 4);
1630 Media media
= makeFigure(state
, id
, type
, urlString
, legendString
, titleString
, numString
, next
);
1632 } else if (isStartingElement(next
, FIGURE_LEGEND
)) {
1633 // TODO same as figure string ?
1634 legendString
= handleFootnoteString(state
, reader
, next
, specimenImport
, nomenclatureImport
);
1635 } else if (isStartingElement(next
, FIGURE_TITLE
)) {
1636 titleString
= getCData(state
, reader
, next
);
1637 } else if (isStartingElement(next
, URL
)) {
1638 String localUrl
= getCData(state
, reader
, next
);
1639 String url
= CdmUtils
.Nz(state
.getBaseMediaUrl()) + localUrl
;
1640 if (isBlank(urlString
)){
1643 if (! url
.equals(urlString
)){
1644 String message
= "URL attribute and URL element differ. Attribute: %s, Element: %s";
1645 fireWarningEvent(String
.format(message
, urlString
, url
), next
, 2);
1647 } else if (isStartingElement(next
, NUM
)) {
1648 numString
= getCData(state
, reader
, next
);
1649 } else if (next
.isCharacters()) {
1650 text
= CdmUtils
.concat("", text
, next
.asCharacters().getData());
1652 fireUnexpectedEvent(next
, 0);
1655 throw new IllegalStateException("<figure> has no end tag");
1663 private boolean isNeglectableFigureText(String text
) {
1664 if (text
.matches("Fig\\.*")){
1677 * @param legendString
1678 * @param titleString
1682 private Media
makeFigure(MarkupImportState state
, String id
, String type
, String urlString
,
1683 String legendString
, String titleString
, String numString
, XMLEvent next
) {
1685 // boolean isFigure = false; //no difference between figure and media since v3.3
1687 //TODO maybe everything is a figure as it is all taken from a book
1688 if ("lineart".equals(type
)) {
1690 // media = Figure.NewInstance(url.toURI(), null, null, null);
1691 } else if (type
== null || "photo".equals(type
)
1692 || "signature".equals(type
)
1693 || "others".equals(type
)) {
1696 String message
= "Unknown figure type '%s'";
1697 message
= String
.format(message
, type
);
1698 fireWarningEvent(message
, next
, 2);
1700 media
= docImport
.getImageMedia(urlString
, docImport
.getReadMediaData());
1704 if (StringUtils
.isNotBlank(titleString
)) {
1705 media
.putTitle(getDefaultLanguage(state
), titleString
);
1708 if (StringUtils
.isNotBlank(legendString
)) {
1709 media
.putDescription(getDefaultLanguage(state
), legendString
);
1711 if (StringUtils
.isNotBlank(numString
)) {
1712 // TODO use concrete source (e.g. DAPHNIPHYLLACEAE in FM
1714 Reference citation
= state
.getConfig().getSourceReference();
1715 media
.addSource(OriginalSourceType
.Import
, numString
, "num", citation
, null);
1716 // TODO name used in source if available
1718 // TODO which citation
1719 if (StringUtils
.isNotBlank(id
)) {
1720 media
.addSource(OriginalSourceType
.Import
, id
, null, state
.getConfig().getSourceReference(), null);
1722 String message
= "Figure id should never be empty or null";
1723 fireWarningEvent(message
, next
, 6);
1728 registerGivenFigure(state
, next
, id
, media
);
1731 String message
= "No media found: ";
1732 fireWarningEvent(message
, next
, 4);
1734 } catch (MalformedURLException e
) {
1735 String message
= "Media uri has incorrect syntax: %s";
1736 message
= String
.format(message
, urlString
);
1737 fireWarningEvent(message
, next
, 4);
1738 // } catch (URISyntaxException e) {
1739 // String message = "Media uri has incorrect syntax: %s";
1740 // message = String.format(message, urlString);
1741 // fireWarningEvent(message, next, 4);
1748 private void registerGivenFigure(MarkupImportState state
, XMLEvent next
, String id
, Media figure
) {
1749 state
.registerFigure(id
, figure
);
1750 Set
<AnnotatableEntity
> demands
= state
.getFigureDemands(id
);
1751 if (demands
!= null) {
1752 for (AnnotatableEntity entity
: demands
) {
1753 attachFigure(state
, next
, entity
, figure
);
1756 save(figure
, state
);
1760 private FootnoteDataHolder
handleFootnoteRef(MarkupImportState state
,
1761 XMLEventReader reader
, XMLEvent parentEvent
)
1762 throws XMLStreamException
{
1763 FootnoteDataHolder result
= new FootnoteDataHolder();
1764 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
1765 result
.ref
= getAndRemoveAttributeValue(attributes
, REF
);
1766 checkNoAttributes(attributes
, parentEvent
);
1768 // text is not handled, needed only for debugging purposes
1770 while (reader
.hasNext()) {
1771 XMLEvent next
= readNoWhitespace(reader
);
1772 // if (isStartingElement(next, FOOTNOTE_STRING)){
1773 // String string = handleFootnoteString(state, reader, next);
1774 // result.string = string;
1776 if (isMyEndingElement(next
, parentEvent
)) {
1777 if (StringUtils
.isNotBlank(text
)){
1778 fireWarningEvent("text is not empty but not handled during import", parentEvent
, 4);
1781 } else if (next
.isCharacters() && unhandledElements
.isEmpty()) {
1782 text
+= next
.asCharacters().getData();
1783 } else if (isStartingElement(next
, NUM
)) {
1784 //ignore numbering of footnotes as they are numbered differently in the CDM
1785 handleIgnoreElement(next
);
1787 handleUnexpectedElement(next
);
1795 private String
handleFootnoteString(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, MarkupSpecimenImport specimenImport
, MarkupNomenclatureImport nomenclatureImport
) throws XMLStreamException
{
1796 boolean isTextMode
= true;
1798 while (reader
.hasNext()) {
1799 XMLEvent next
= readNoWhitespace(reader
);
1800 if (isMyEndingElement(next
, parentEvent
)) {
1802 } else if (next
.isEndElement()) {
1803 if (isEndingElement(next
, FULL_NAME
)) {
1804 popUnimplemented(next
.asEndElement());
1805 } else if (isEndingElement(next
, BR
)) {
1807 } else if (isHtml(next
)) {
1808 text
+= getXmlTag(next
);
1810 handleUnexpectedEndElement(next
.asEndElement());
1812 } else if (next
.isStartElement()) {
1813 if (isStartingElement(next
, FULL_NAME
)) {
1814 handleNotYetImplementedElement(next
);
1815 } else if (isStartingElement(next
, GATHERING
)) {
1816 text
+= specimenImport
.handleInLineGathering(state
, reader
, next
);
1817 } else if (isStartingElement(next
, REFERENCES
)) {
1818 text
+= " " + handleInLineReferences(state
, reader
, next
, nomenclatureImport
) + " ";
1819 } else if (isStartingElement(next
, BR
)) {
1822 } else if (isStartingElement(next
, NOMENCLATURE
)) {
1823 handleNotYetImplementedElement(next
);
1824 } else if (isHtml(next
)) {
1825 text
+= getXmlTag(next
);
1827 handleUnexpectedStartElement(next
.asStartElement());
1829 } else if (next
.isCharacters()) {
1831 String message
= "footnoteString is not in text mode";
1832 fireWarningEvent(message
, next
, 6);
1834 text
+= next
.asCharacters().getData().trim();
1835 // getCData(state, reader, next); does not work as we have inner tags like <references>
1838 handleUnexpectedEndElement(next
.asEndElement());
1841 throw new IllegalStateException("<footnoteString> has no closing tag");
1845 private static final List
<String
> htmlList
= Arrays
.asList("sub", "sup",
1846 "ol", "ul", "li", "i", "b", "table", "br","tr","td","th");
1848 protected boolean isHtml(XMLEvent event
) {
1849 if (event
.isStartElement()) {
1850 String tag
= event
.asStartElement().getName().getLocalPart();
1851 return htmlList
.contains(tag
);
1852 } else if (event
.isEndElement()) {
1853 String tag
= event
.asEndElement().getName().getLocalPart();
1854 return htmlList
.contains(tag
);
1862 private String
handleInLineReferences(MarkupImportState state
,XMLEventReader reader
, XMLEvent parentEvent
,
1863 MarkupNomenclatureImport nomenclatureImport
) throws XMLStreamException
{
1864 checkNoAttributes(parentEvent
);
1866 boolean hasReference
= false;
1868 while (reader
.hasNext()) {
1869 XMLEvent next
= readNoWhitespace(reader
);
1870 if (isMyEndingElement(next
, parentEvent
)) {
1871 checkMandatoryElement(hasReference
, parentEvent
.asStartElement(), REFERENCE
);
1873 } else if (isStartingElement(next
, REFERENCE
)) {
1874 text
+= handleInLineReference(state
, reader
, next
, nomenclatureImport
);
1875 hasReference
= true;
1877 handleUnexpectedElement(next
);
1880 throw new IllegalStateException("<References> has no closing tag");
1883 private String
handleInLineReference(MarkupImportState state
,XMLEventReader reader
, XMLEvent parentEvent
, MarkupNomenclatureImport nomenclatureImport
)throws XMLStreamException
{
1884 Reference reference
= nomenclatureImport
.handleReference(state
, reader
, parentEvent
);
1885 fireWarningEvent("Check correct usage of inline reference", parentEvent
, 3);
1886 IntextReference intext
= IntextReference
.NewInstance(reference
, null, 0, 0);
1887 save(reference
, state
);
1888 return intext
.toInlineString(reference
.getTitleCache());
1891 protected class SubheadingResult
{
1893 StringReferences references
;
1894 List
<IntextReference
> inlineReferences
;
1898 * Handle < string > .
1901 * @param parentEvent
1902 * @param feature only needed for distributionLocalities
1904 * @throws XMLStreamException
1906 protected Map
<String
, SubheadingResult
> handleString(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, Feature feature
)throws XMLStreamException
{
1908 String classValue
= getClassOnlyAttribute(parentEvent
, false);
1909 if (StringUtils
.isNotBlank(classValue
)) {
1910 String message
= "class attribute for <string> not yet implemented";
1911 fireWarningEvent(message
, parentEvent
, 2);
1913 boolean isHabitat
= false;
1916 Map
<String
, SubheadingResult
> subHeadingMap
= new HashMap
<>();
1917 String currentSubheading
= null;
1919 boolean isTextMode
= true;
1921 StringReferences currentReferences
= null;
1922 List
<IntextReference
> inlineReferences
= new ArrayList
<>();
1923 boolean lastWasReference
= false;
1924 while (reader
.hasNext()) {
1925 XMLEvent next
= readNoWhitespace(reader
);
1926 if (isMyEndingElement(next
, parentEvent
)) {
1927 putCurrentSubheading(subHeadingMap
, currentSubheading
, text
, currentReferences
, inlineReferences
);
1928 return subHeadingMap
;
1930 //check if last event was reference
1931 if (lastWasReference
&& !isStartingElement(next
, BR
) && !isEndingElement(next
, BR
)
1932 && !isStartingElement(next
, SUB_HEADING
)){
1933 for (LabeledReference labeledRef
: currentReferences
.content
){
1934 if (labeledRef
.ref
!= null){
1935 IntextReference intext
= IntextReference
.NewInstance(labeledRef
.ref
, null, 0, 0);
1936 inlineReferences
.add(intext
);
1937 text
+= intext
.toInlineString(labeledRef
.label
);
1939 text
+= labeledRef
.label
;
1942 lastWasReference
= false;
1944 if (isStartingElement(next
, BR
)) {
1947 } else if (isEndingElement(next
, BR
)) {
1949 } else if (isHtml(next
)) {
1950 text
+= getXmlTag(next
);
1951 } else if (isStartingElement(next
, SUB_HEADING
)) {
1952 text
= putCurrentSubheading(subHeadingMap
, currentSubheading
, text
, currentReferences
, inlineReferences
);
1953 currentReferences
= null;
1954 inlineReferences
= new ArrayList
<>();
1955 lastWasReference
= false;
1957 currentSubheading
= getCData(state
, reader
, next
).trim();
1958 } else if (isStartingElement(next
, DISTRIBUTION_LOCALITY
)) {
1959 if (feature
!= null && !feature
.equals(Feature
.DISTRIBUTION())) {
1960 String message
= "Distribution locality only allowed for feature of type 'distribution'";
1961 fireWarningEvent(message
, next
, 4);
1963 text
+= handleDistributionLocality(state
, reader
, next
);
1964 } else if (next
.isCharacters()) {
1966 String message
= "String is not in text mode";
1967 fireWarningEvent(message
, next
, 6);
1969 text
+= next
.asCharacters().getData();
1971 } else if (isStartingElement(next
, HEADING
)) {
1973 handleNotYetImplementedElement(next
);
1974 } else if (isStartingElement(next
, VERNACULAR_NAMES
)) {
1976 handleNotYetImplementedElement(next
);
1977 } else if (isStartingElement(next
, QUOTE
)) {
1979 handleNotYetImplementedElement(next
);
1980 } else if (isStartingElement(next
, DEDICATION
)) {
1982 handleNotYetImplementedElement(next
);
1983 } else if (isStartingElement(next
, TAXONTYPE
)) {
1985 handleNotYetImplementedElement(next
);
1986 } else if (isStartingElement(next
, FULL_NAME
)) {
1988 handleNotYetImplementedElement(next
);
1989 }else if (isStartingElement(next
, REFERENCES
)) {
1990 if (currentReferences
!= null){
1991 fireWarningEvent("References do already exist", next
, 2);
1993 currentReferences
= handleStringReferences(state
, reader
, next
);
1994 lastWasReference
= true;
1995 }else if (isStartingElement(next
, REFERENCE
)) {
1997 handleNotYetImplementedElement(next
);
1998 } else if (isStartingElement(next
, GATHERING
)) {
2000 handleNotYetImplementedElement(next
);
2001 } else if (isStartingElement(next
, ANNOTATION
)) {
2002 //TODO //TODO test handleSimpleAnnotation
2003 handleNotYetImplementedElement(next
);
2004 } else if (isStartingElement(next
, HABITAT
)) {
2005 text
+= featureImport
.handleHabitat(state
, reader
, next
);
2007 } else if (isStartingElement(next
, FIGURE_REF
)) {
2009 handleNotYetImplementedElement(next
);
2010 } else if (isStartingElement(next
, FIGURE
)) {
2012 handleNotYetImplementedElement(next
);
2013 } else if (isStartingElement(next
, FOOTNOTE_REF
)) {
2015 handleNotYetImplementedElement(next
);
2016 } else if (isStartingElement(next
, FOOTNOTE
)) {
2018 handleNotYetImplementedElement(next
);
2019 } else if (isStartingElement(next
, WRITER
)) {
2021 handleNotYetImplementedElement(next
);
2022 } else if (isStartingElement(next
, DATES
)) {
2024 handleNotYetImplementedElement(next
);
2025 } else if (isStartingElement(next
, TO_KEY
)) {
2026 handleNotYetImplementedElement(next
);
2028 handleUnexpectedElement(next
);
2031 throw new IllegalStateException("<String> has no closing tag");
2036 * container class more or less representing a list of labeled references
2038 protected class StringReferences
{
2040 List
<LabeledReference
> content
= new ArrayList
<>() ; //either String or LabeledReference
2042 public String
toString(){
2043 String result
= null;
2044 for (LabeledReference labRef
: content
){
2045 result
= CdmUtils
.concat("", labRef
.label
);
2049 public List
<LabeledReference
> getReferences() {
2050 List
<LabeledReference
> result
= new ArrayList
<>();
2051 for (LabeledReference labRef
: content
){
2052 if (labRef
.ref
!= null){
2060 protected class LabeledReference
{
2061 public LabeledReference(Reference ref
, String detail
, String label
) {
2062 this.ref
= ref
; this.detail
= detail
; this.label
= label
;
2064 protected Reference ref
; //if null, this LabeledReference represents only a string in between references
2065 protected String detail
; //micro reference
2066 protected String label
;
2069 private StringReferences
handleStringReferences(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
2070 checkNoAttributes(parentEvent
);
2071 StringReferences result
= new StringReferences();
2072 while (reader
.hasNext()) {
2073 XMLEvent next
= readNoWhitespace(reader
);
2074 if (isMyEndingElement(next
, parentEvent
)) {
2076 } else if (isStartingElement(next
, SUB_HEADING
)) {
2077 String subheading
= getCData(state
, reader
, next
);
2078 if (!subheading
.matches("(References?|Literature):?")){
2079 fireWarningEvent("Subheading for references not recognized: " + subheading
, next
, 4);
2081 result
.subheading
= subheading
;
2082 } else if (isStartingElement(next
, REFERENCE
)) {
2083 handleInlineReference(state
, reader
, next
, result
);
2085 handleUnexpectedElement(next
);
2088 throw new IllegalStateException("<References> has no closing tag");
2091 private void handleInlineReference(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
,
2092 StringReferences result
) throws XMLStreamException
{
2093 checkNoAttributes(parentEvent
);
2094 boolean hasRefPart
= false;
2095 Map
<String
, String
> refMap
= new HashMap
<>();
2097 while (reader
.hasNext()) {
2098 XMLEvent next
= readNoWhitespace(reader
);
2099 if (isMyEndingElement(next
, parentEvent
)) {
2100 checkMandatoryElement(hasRefPart
, parentEvent
.asStartElement(), REF_PART
);
2101 String details
= refMap
.get(DETAILS
);
2102 // String label = makeLabel(state, refMap, next);
2103 Reference ref
= createReference(state
, refMap
, next
);
2104 ref
= state
.getDeduplicationHelper(docImport
).getExistingReference(state
, ref
);
2106 String label2
= ref
.getTitleCache(); //TODO preliminary for debugging and testing
2107 result
.content
.add(new LabeledReference(ref
, details
, label
));
2109 } else if (isStartingElement(next
, REF_PART
)) {
2110 String classValue
= handleRefPart(state
, reader
, next
, refMap
);
2111 String text
= refMap
.get(classValue
);
2112 if (classValue
.equals(YEAR
)){
2113 text
= "("+text
+")";
2116 label
= CdmUtils
.concat(" ", label
, text
);
2118 handleUnexpectedElement(next
);
2121 throw new IllegalStateException("<References> has no closing tag");
2126 // this is more or less a duplicate Nomenclature import, maybe merge later
2127 private Reference
createReference(MarkupImportState state
,
2128 Map
<String
, String
> refMap
, XMLEvent parentEvent
) {
2130 Reference reference
;
2132 String type
= getAndRemoveMapKey(refMap
, PUBTYPE
);
2133 String authorStr
= getAndRemoveMapKey(refMap
, AUTHOR
);
2134 String titleStr
= getAndRemoveMapKey(refMap
, PUBTITLE
);
2135 String titleCache
= getAndRemoveMapKey(refMap
, PUBFULLNAME
);
2136 String volume
= getAndRemoveMapKey(refMap
, VOLUME
);
2137 String edition
= getAndRemoveMapKey(refMap
, EDITION
);
2138 String editors
= getAndRemoveMapKey(refMap
, EDITORS
);
2139 String year
= getAndRemoveMapKey(refMap
, YEAR
);
2140 String pubName
= getAndRemoveMapKey(refMap
, PUBNAME
);
2141 String pages
= getAndRemoveMapKey(refMap
, PAGES
);
2142 String publication
= getAndRemoveMapKey(refMap
, PUBLOCATION
);
2143 String publisher
= getAndRemoveMapKey(refMap
, PUBLISHER
);
2144 String appendix
= getAndRemoveMapKey(refMap
, APPENDIX
);
2145 String issue
= getAndRemoveMapKey(refMap
, ISSUE
);
2147 reference
= handleNonCitationSpecific(state
, type
, authorStr
, titleStr
,
2148 titleCache
, volume
, issue
, edition
, editors
, pubName
, appendix
, pages
, parentEvent
);
2151 TimePeriod timeperiod
= TimePeriodParser
.parseString(year
);
2152 if (reference
.getType().equals(ReferenceType
.BookSection
)){
2153 reference
.getInBook().setDatePublished(timeperiod
);
2155 reference
.setDatePublished(timeperiod
);
2157 //Quickfix for these 2 attributes (publication, publisher) used in feature.references
2158 Reference inRef
= reference
.getInReference() == null ? reference
: reference
.getInReference();
2160 if (isNotBlank(publisher
)){
2161 inRef
.setPublisher(publisher
);
2165 if (isNotBlank(publication
)){
2166 inRef
.setPlacePublished(publication
);
2170 String
[] unhandledList
= new String
[] { ALTERNATEPUBTITLE
, NOTES
, STATUS
};
2171 for (String unhandled
: unhandledList
) {
2172 String value
= getAndRemoveMapKey(refMap
, unhandled
);
2173 if (isNotBlank(value
)) {
2174 this.handleNotYetImplementedAttributeValue(parentEvent
, CLASS
, unhandled
);
2178 for (String key
: refMap
.keySet()) {
2179 if (!DETAILS
.equalsIgnoreCase(key
)) {
2180 this.fireUnexpectedAttributeValue(parentEvent
, CLASS
, key
);
2189 * Create reference for non nomenclatural references
2192 protected Reference
handleNonCitationSpecific(MarkupImportState state
, String type
, String authorStr
,
2193 String titleStr
, String titleCache
, String volume
, String issue
, String edition
,
2194 String editors
, String pubName
, String appendix
, String pages
, XMLEvent parentEvent
) {
2196 Reference reference
;
2199 if (isBlank(volume
) && isNotBlank(issue
)){
2200 String message
= "Issue ('"+issue
+"') exists but no volume";
2201 fireWarningEvent(message
, parentEvent
, 4);
2203 }else if (isNotBlank(issue
)){
2204 volume
= volume
+ "("+ issue
+ ")";
2207 //pubName / appendix
2208 if (isNotBlank(appendix
)){
2209 pubName
= pubName
== null ? appendix
: (pubName
+ " " + appendix
).replaceAll(" ", " ");
2212 if (isArticleNonCitation(type
, pubName
, volume
, editors
)) {
2213 IArticle article
= ReferenceFactory
.newArticle();
2214 if (pubName
!= null) {
2215 IJournal journal
= ReferenceFactory
.newJournal();
2216 journal
.setTitle(pubName
);
2217 article
.setInJournal(journal
);
2219 fireWarningEvent("Article has no journal", parentEvent
, 4);
2221 reference
= (Reference
) article
;
2223 if (isBookSection(type
, authorStr
, titleStr
, editors
, pubName
, volume
)){
2224 IBookSection bookSection
= ReferenceFactory
.newBookSection();
2225 if (pubName
!= null) {
2226 IBook book
= ReferenceFactory
.newBook();
2227 book
.setTitle(pubName
);
2228 bookSection
.setInBook(book
);
2230 reference
= (Reference
)bookSection
;
2233 Reference bookOrPartOf
= ReferenceFactory
.newGeneric();
2234 if (pubName
!= null && titleStr
!= null) {
2235 Reference inReference
= ReferenceFactory
.newGeneric();
2236 inReference
.setTitle(pubName
);
2237 bookOrPartOf
.setInReference(inReference
);
2239 reference
= bookOrPartOf
;
2244 TeamOrPersonBase
<?
> author
= createAuthor(state
, authorStr
);
2245 reference
.setAuthorship(author
);
2248 reference
.setTitle(titleStr
);
2249 if (StringUtils
.isNotBlank(titleCache
)) {
2250 reference
.setTitleCache(titleCache
, true);
2254 if(reference
.getInReference() != null){
2255 reference
.getInReference().setEdition(edition
);
2256 reference
.getInReference().setEditor(editors
);
2259 reference
.setEdition(edition
);
2260 reference
.setEditor(editors
);
2264 reference
.setVolume(volume
);
2267 reference
.setPages(pages
);
2272 private boolean isBookSection(String type
, String authorStr
, String pubTitle
,
2273 String editors
, String pubName
, String volume
) {
2274 //type not yet handled
2275 if (authorStr
!= null && editors
!= null
2276 && pubTitle
!= null && pubName
!= null){
2278 }else if (pubTitle
!= null && pubName
!= null && volume
== null){
2286 private boolean isArticleNonCitation(String type
, String pubName
, String volume
, String editors
) {
2287 if ("journal".equalsIgnoreCase(type
)){
2289 }else if (volume
!= null && editors
== null){
2290 if (pubName
!= null && IJournal
.guessIsJournalName(pubName
)){
2293 return false; //unclear
2300 protected String
handleRefPart(MarkupImportState state
, XMLEventReader reader
,
2301 XMLEvent parentEvent
, Map
<String
, String
> refMap
)
2302 throws XMLStreamException
{
2303 String classValue
= getClassOnlyAttribute(parentEvent
);
2306 while (reader
.hasNext()) {
2307 XMLEvent next
= readNoWhitespace(reader
);
2308 if (isMyEndingElement(next
, parentEvent
)) {
2309 refMap
.put(classValue
, text
);
2311 } else if (next
.isStartElement()) {
2312 if (isStartingElement(next
, ANNOTATION
)) {
2313 handleNotYetImplementedElement(next
); // TODO test handleSimpleAnnotation
2314 } else if (isStartingElement(next
, ITALICS
)) {
2315 handleNotYetImplementedElement(next
);
2316 } else if (isStartingElement(next
, BOLD
)) {
2317 handleNotYetImplementedElement(next
);
2319 handleUnexpectedStartElement(next
.asStartElement());
2321 } else if (next
.isCharacters()) {
2322 text
+= next
.asCharacters().getData();
2324 handleUnexpectedEndElement(next
.asEndElement());
2327 throw new IllegalStateException("RefPart has no closing tag");
2331 private boolean isBlankOrPunctuation(String text
) {
2335 return text
.matches("^[\\s\\.,;:]*$");
2341 *Is heading an "habitat" type heading
2343 * @return true if heading matches something like Eco(logy), Habitat(s) or Habitat & Ecology
2345 private boolean isHabitatHeading(String heading
) {
2346 return heading
.trim().matches("(Ecol(ogy)?|Habitat|Habitat\\s&\\sEcology)\\.?");
2350 private String
putCurrentSubheading(Map
<String
, SubheadingResult
> subHeadingMap
, String currentSubheading
,
2351 String text
, StringReferences fullReferences
, List
<IntextReference
> inlineReferences
) {
2352 if (isNotBlank(text
) || (fullReferences
!= null && isNotEmptyCollection(fullReferences
.content
))
2353 ||isNotEmptyCollection(inlineReferences
)) {
2354 SubheadingResult result
= new SubheadingResult();
2355 text
= removeStartingMinus(text
);
2356 result
.text
= text
.trim();
2357 result
.references
= fullReferences
== null ?
new StringReferences() : fullReferences
;
2358 result
.inlineReferences
= inlineReferences
;
2359 subHeadingMap
.put(currentSubheading
, result
);
2365 * @param references2
2368 protected boolean isNotEmptyCollection(Collection
<?
> list
) {
2369 return list
!= null && !list
.isEmpty();
2373 private String
removeStartingMinus(String string
) {
2374 string
= replaceStart(string
, "-");
2375 string
= replaceStart(string
, "\u002d");
2376 string
= replaceStart(string
, "\u2013");
2377 string
= replaceStart(string
, "\u2014");
2378 string
= replaceStart(string
, "--");
2385 * @param replacementString
2387 private String
replaceStart(String value
, String replacementString
) {
2388 if (value
.startsWith(replacementString
) ){
2389 value
= value
.substring(replacementString
.length()).trim();
2391 while (value
.startsWith("-") || value
.startsWith("\u2014") ){
2392 value
= value
.substring("-".length()).trim();
2398 private String
handleDistributionLocality(MarkupImportState state
,XMLEventReader reader
, XMLEvent parentEvent
)throws XMLStreamException
{
2399 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
2400 String classValue
= getAndRemoveRequiredAttributeValue(parentEvent
, attributes
, CLASS
);
2401 String statusValue
=getAndRemoveAttributeValue(attributes
, STATUS
);
2402 String frequencyValue
=getAndRemoveAttributeValue(attributes
, FREQUENCY
);
2404 Taxon taxon
= state
.getCurrentTaxon();
2405 // TODO which ref to take?
2406 Reference sourceReference
= state
.getConfig().getSourceReference();
2409 while (reader
.hasNext()) {
2410 XMLEvent next
= readNoWhitespace(reader
);
2411 if (isMyEndingElement(next
, parentEvent
)) {
2412 if (StringUtils
.isNotBlank(text
)) {
2413 String label
= CdmUtils
.removeTrailingDot(normalize(text
));
2414 TaxonDescription description
= getExtractedMarkupMarkedDescription(state
, taxon
, sourceReference
);
2415 NamedAreaLevel level
= makeNamedAreaLevel(state
,classValue
, next
);
2418 PresenceAbsenceTerm status
= null;
2419 if (isNotBlank(statusValue
)){
2421 status
= state
.getTransformer().getPresenceTermByKey(statusValue
);
2422 if (status
== null){
2423 UUID uuid
= state
.getTransformer().getPresenceTermUuid(statusValue
);
2425 status
= this.getPresenceAbsenceTerm(state
, uuid
, statusValue
, statusValue
, statusValue
, false, null);
2428 if (status
== null){
2430 String message
= "The presence/absence status '%s' could not be transformed to an CDM status";
2431 fireWarningEvent(String
.format(message
, statusValue
), next
, 4);
2433 } catch (UndefinedTransformerMethodException e
) {
2434 throw new RuntimeException(e
);
2437 status
= PresenceAbsenceTerm
.PRESENT();
2440 if (isNotBlank(frequencyValue
)){
2441 if (frequencyValue
.equalsIgnoreCase("absent") && PresenceAbsenceTerm
.PRESENT().equals(status
)){ //to be on the safe side that not real status has been defined yet.
2442 status
= PresenceAbsenceTerm
.ABSENT();
2444 String message
= "The frequency attribute is currently not yet available in CDM";
2445 fireWarningEvent(message
, parentEvent
, 6);
2449 NamedArea higherArea
= null;
2450 List
<NamedArea
> areas
= new ArrayList
<>();
2452 String patSingleArea
= "([^,\\(]{3,})";
2453 String patSeparator
= "(,|\\sand\\s)";
2454 String hierarchiePattern
= String
.format("%s\\((%s(%s%s)*)\\)", patSingleArea
, patSingleArea
, patSeparator
, patSingleArea
);
2455 Pattern patHierarchie
= Pattern
.compile(hierarchiePattern
, Pattern
.CASE_INSENSITIVE
);
2456 Matcher matcher
= patHierarchie
.matcher(label
);
2457 if (matcher
.matches()){
2458 String higherAreaStr
= matcher
.group(1).trim();
2459 higherArea
= makeArea(state
, higherAreaStr
, level
);
2460 String
[] innerAreas
= matcher
.group(2).split(patSeparator
);
2461 for (String innerArea
: innerAreas
){
2462 if (isNotBlank(innerArea
)){
2463 NamedArea singleArea
= makeArea(state
, innerArea
.trim(), level
);
2464 areas
.add(singleArea
);
2465 NamedArea partOf
= singleArea
.getPartOf();
2466 // if (partOf == null){
2467 // singleArea.setPartOf(higherArea);
2472 NamedArea singleArea
= makeArea(state
, label
, level
);
2473 areas
.add(singleArea
);
2476 for (NamedArea area
: areas
){
2477 //create distribution
2478 Distribution distribution
= Distribution
.NewInstance(area
,status
);
2479 distribution
.addPrimaryTaxonomicSource(sourceReference
);
2480 description
.addElement(distribution
);
2483 String message
= "Empty distribution locality";
2484 fireWarningEvent(message
, next
, 4);
2487 } else if (isStartingElement(next
, COORDINATES
)) {
2489 handleNotYetImplementedElement(next
);
2490 } else if (isEndingElement(next
, COORDINATES
)) {
2492 popUnimplemented(next
.asEndElement());
2493 } else if (next
.isCharacters()) {
2494 text
+= next
.asCharacters().getData();
2496 handleUnexpectedElement(next
);
2499 throw new IllegalStateException("<DistributionLocality> has no closing tag");
2508 protected TaxonDescription
getExtractedMarkupMarkedDescription(MarkupImportState state
, Taxon taxon
, Reference sourceReference
) {
2509 MarkerType markerType
= getMarkerType(
2511 MarkupTransformer
.uuidMarkerExtractedMarkupData
,
2512 "Extracted factual data", "Marker type for factual data imported from markup where the markup for this data was included in parent markup that was also imported including the text from this markup.",
2515 String title
= "Extracted markup data for " + taxon
.getName().getTitleCache();
2516 TaxonDescription description
= getMarkedTaxonDescription(taxon
, markerType
, false, true, sourceReference
, title
);