2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.markup
;
12 import java
.net
.MalformedURLException
;
13 import java
.util
.ArrayList
;
14 import java
.util
.Arrays
;
15 import java
.util
.Collection
;
16 import java
.util
.HashMap
;
17 import java
.util
.HashSet
;
18 import java
.util
.Iterator
;
19 import java
.util
.List
;
22 import java
.util
.Stack
;
23 import java
.util
.UUID
;
24 import java
.util
.regex
.Matcher
;
25 import java
.util
.regex
.Pattern
;
27 import javax
.xml
.namespace
.QName
;
28 import javax
.xml
.stream
.Location
;
29 import javax
.xml
.stream
.XMLEventReader
;
30 import javax
.xml
.stream
.XMLStreamConstants
;
31 import javax
.xml
.stream
.XMLStreamException
;
32 import javax
.xml
.stream
.events
.Attribute
;
33 import javax
.xml
.stream
.events
.Characters
;
34 import javax
.xml
.stream
.events
.EndElement
;
35 import javax
.xml
.stream
.events
.StartElement
;
36 import javax
.xml
.stream
.events
.XMLEvent
;
38 import org
.apache
.commons
.lang
.StringUtils
;
39 import org
.apache
.commons
.lang
.WordUtils
;
40 import org
.apache
.log4j
.Logger
;
42 import eu
.etaxonomy
.cdm
.api
.service
.IClassificationService
;
43 import eu
.etaxonomy
.cdm
.api
.service
.ITermService
;
44 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
45 import eu
.etaxonomy
.cdm
.ext
.geo
.GeoServiceArea
;
46 import eu
.etaxonomy
.cdm
.ext
.geo
.IEditGeoService
;
47 import eu
.etaxonomy
.cdm
.io
.common
.CdmImportBase
;
48 import eu
.etaxonomy
.cdm
.io
.common
.CdmImportBase
.TermMatchMode
;
49 import eu
.etaxonomy
.cdm
.io
.common
.events
.IIoEvent
;
50 import eu
.etaxonomy
.cdm
.io
.common
.events
.IoProblemEvent
;
51 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
52 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
53 import eu
.etaxonomy
.cdm
.model
.common
.AnnotatableEntity
;
54 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
55 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
56 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
57 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTerm
;
58 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
59 import eu
.etaxonomy
.cdm
.model
.common
.Extension
;
60 import eu
.etaxonomy
.cdm
.model
.common
.ExtensionType
;
61 import eu
.etaxonomy
.cdm
.model
.common
.IntextReference
;
62 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
63 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
64 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
65 import eu
.etaxonomy
.cdm
.model
.common
.TermVocabulary
;
66 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
67 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
68 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
69 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
70 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKey
;
71 import eu
.etaxonomy
.cdm
.model
.description
.PresenceAbsenceTerm
;
72 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
73 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
74 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
75 import eu
.etaxonomy
.cdm
.model
.location
.NamedAreaLevel
;
76 import eu
.etaxonomy
.cdm
.model
.location
.NamedAreaType
;
77 import eu
.etaxonomy
.cdm
.model
.media
.IdentifiableMediaEntity
;
78 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
79 import eu
.etaxonomy
.cdm
.model
.name
.INonViralName
;
80 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
81 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
82 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationBase
;
83 import eu
.etaxonomy
.cdm
.model
.reference
.IArticle
;
84 import eu
.etaxonomy
.cdm
.model
.reference
.IBook
;
85 import eu
.etaxonomy
.cdm
.model
.reference
.IBookSection
;
86 import eu
.etaxonomy
.cdm
.model
.reference
.IJournal
;
87 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
88 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
89 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceType
;
90 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
91 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
92 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
93 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
94 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
95 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
101 public abstract class MarkupImportBase
{
102 private static final Logger logger
= Logger
.getLogger(MarkupImportBase
.class);
105 protected static final String ALTITUDE
= "altitude";
106 protected static final String ANNOTATION
= "annotation";
107 protected static final String BOLD
= "bold";
108 protected static final String BR
= "br";
109 protected static final String DOUBTFUL
= "doubtful";
110 protected static final String CITATION
= "citation";
111 protected static final String CLASS
= "class";
112 protected static final String COORDINATES
= "coordinates";
113 protected static final String DATES
= "dates";
114 protected static final String GATHERING
= "gathering";
115 protected static final String GATHERING_GROUP
= "gatheringGroup";
116 protected static final String GENUS_ABBREVIATION
= "genus abbreviation";
117 protected static final String FOOTNOTE
= "footnote";
118 protected static final String FOOTNOTE_REF
= "footnoteRef";
119 protected static final String FULL_NAME
= "fullName";
120 protected static final String ITALICS
= "italics";
121 protected static final String NUM
= "num";
122 protected static final String NOTES
= "notes";
123 protected static final String PUBLICATION
= "publication";
124 protected static final String SPECIMEN_TYPE
= "specimenType";
125 protected static final String STATUS
= "status";
126 protected static final String SUB_HEADING
= "subHeading";
127 protected static final String TYPE
= "type";
128 protected static final String TYPE_STATUS
= "typeStatus";
129 protected static final String UNKNOWN
= "unknown";
132 protected static final boolean CREATE_NEW
= true;
133 protected static final boolean NO_IMAGE_GALLERY
= false;
134 protected static final boolean IMAGE_GALLERY
= true;
136 protected static final String ADDENDA
= "addenda";
137 protected static final String BIBLIOGRAPHY
= "bibliography";
138 protected static final String BIOGRAPHIES
= "biographies";
139 protected static final String CHAR
= "char";
140 protected static final String DEDICATION
= "dedication";
141 protected static final String DEFAULT_MEDIA_URL
= "defaultMediaUrl";
142 protected static final String DISTRIBUTION_LIST
= "distributionList";
143 protected static final String DISTRIBUTION_LOCALITY
= "distributionLocality";
144 protected static final String FEATURE
= "feature";
145 protected static final String FIGURE
= "figure";
146 protected static final String FIGURE_LEGEND
= "figureLegend";
147 protected static final String FIGURE_PART
= "figurePart";
148 protected static final String FIGURE_REF
= "figureRef";
149 protected static final String FIGURE_TITLE
= "figureTitle";
150 protected static final String FOOTNOTE_STRING
= "footnoteString";
151 protected static final String FREQUENCY
= "frequency";
152 protected static final String HEADING
= "heading";
153 protected static final String HABITAT
= "habitat";
154 protected static final String HABITAT_LIST
= "habitatList";
155 protected static final String IS_FREETEXT
= "isFreetext";
156 protected static final String ID
= "id";
157 protected static final String KEY
= "key";
158 protected static final String LIFE_CYCLE_PERIODS
= "lifeCyclePeriods";
159 protected static final String META_DATA
= "metaData";
160 protected static final String MODS
= "mods";
162 protected static final String NOMENCLATURE
= "nomenclature";
163 protected static final String QUOTE
= "quote";
164 protected static final String RANK
= "rank";
165 protected static final String REF
= "ref";
166 protected static final String REF_NUM
= "refNum";
167 protected static final String REFERENCE
= "reference";
168 protected static final String REFERENCES
= "references";
169 protected static final String SUB_CHAR
= "subChar";
170 protected static final String TAXON
= "taxon";
171 protected static final String TAXONTITLE
= "taxontitle";
172 protected static final String TAXONTYPE
= "taxontype";
173 protected static final String TEXT_SECTION
= "textSection";
174 protected static final String TREATMENT
= "treatment";
175 protected static final String SERIALS_ABBREVIATIONS
= "serialsAbbreviations";
176 protected static final String STRING
= "string";
177 protected static final String URL
= "url";
178 protected static final String WRITER
= "writer";
180 protected static final String LOCALITY
= "locality";
185 protected static final String ACCEPTED
= "accepted";
186 protected static final String ACCEPTED_NAME
= "acceptedName";
187 protected static final String ALTERNATEPUBTITLE
= "alternatepubtitle";
188 protected static final String APPENDIX
= "appendix";
189 protected static final String AUTHOR
= "author";
190 protected static final String DETAILS
= "details";
191 protected static final String EDITION
= "edition";
192 protected static final String EDITORS
= "editors";
193 protected static final String HOMONYM
= "homonym";
194 protected static final String HOMOTYPES
= "homotypes";
195 protected static final String NOMENCLATURAL_NOTES
= "nomenclaturalNotes";
196 protected static final String INFRANK
= "infrank";
197 protected static final String INFRAUT
= "infraut";
198 protected static final String INFRPARAUT
= "infrparaut";
199 protected static final String ISSUE
= "issue";
200 protected static final String NAME_STATUS
= "namestatus";
201 protected static final String NAME
= "name";
202 protected static final String NAME_TYPE
= "nameType";
203 protected static final String NOM
= "nom";
204 protected static final String PAGES
= "pages";
205 protected static final String PARAUT
= "paraut";
206 protected static final String PUBFULLNAME
= "pubfullname";
207 protected static final String PUBLOCATION
= "publocation";
208 protected static final String PUBLISHER
= "publisher";
209 protected static final String PUBNAME
= "pubname";
210 protected static final String PUBTITLE
= "pubtitle";
211 protected static final String PUBTYPE
= "pubtype";
212 protected static final String REF_PART
= "refPart";
213 protected static final String SYNONYM
= "synonym";
214 protected static final String USAGE
= "usage";
215 protected static final String VOLUME
= "volume";
216 protected static final String YEAR
= "year";
220 protected static final String COUPLET
= "couplet";
221 protected static final String IS_SPOTCHARACTERS
= "isSpotcharacters";
222 protected static final String ONLY_NUMBERED_TAXA_EXIST
= "onlyNumberedTaxaExist";
223 protected static final String EXISTS
= "exists";
224 protected static final String KEYNOTES
= "keynotes";
225 protected static final String KEY_TITLE
= "keyTitle";
226 protected static final String QUESTION
= "question";
227 protected static final String TEXT
= "text";
228 protected static final String TO_COUPLET
= "toCouplet";
229 protected static final String TO_KEY
= "toKey";
230 protected static final String TO_TAXON
= "toTaxon";
234 protected static final String VERNACULAR_NAMES
= "vernacularNames";
235 protected static final String VERNACULAR_NAME
= "vernacularName";
236 protected static final String TRANSLATION
= "translation";
237 protected static final String LOCAL_LANGUAGE
= "localLanguage";
241 protected MarkupDocumentImport docImport
;
243 private final IEditGeoService editGeoService
;
244 protected MarkupFeatureImport featureImport
;
246 public MarkupImportBase(MarkupDocumentImport docImport
) {
248 this.docImport
= docImport
;
249 this.editGeoService
= docImport
.getEditGeoService();
252 private final Stack
<QName
> unhandledElements
= new Stack
<QName
>();
253 private final Stack
<QName
> handledElements
= new Stack
<QName
>();
256 protected <T
extends CdmBase
> void save(Collection
<T
> collection
, MarkupImportState state
) {
257 if (state
.isCheck() || collection
.isEmpty()){
260 T example
= collection
.iterator().next();
261 if (example
.isInstanceOf(TaxonBase
.class)){
262 Collection
<TaxonBase
> typedCollection
= (Collection
<TaxonBase
>)collection
;
263 docImport
.getTaxonService().saveOrUpdate(typedCollection
);
264 }else if (example
.isInstanceOf(Classification
.class)){
265 Collection
<Classification
> typedCollection
= (Collection
<Classification
>)collection
;
266 docImport
.getClassificationService().saveOrUpdate(typedCollection
);
267 }else if (example
.isInstanceOf(PolytomousKey
.class)){
268 Collection
<PolytomousKey
> typedCollection
= (Collection
<PolytomousKey
>)collection
;
269 docImport
.getPolytomousKeyService().saveOrUpdate(typedCollection
);
270 }else if (example
.isInstanceOf(DefinedTermBase
.class)){
271 Collection
<DefinedTermBase
> typedCollection
= (Collection
<DefinedTermBase
>)collection
;
272 getTermService().saveOrUpdate(typedCollection
);
278 //TODO move to service layer for all IdentifiableEntities
279 protected void save(CdmBase cdmBase
, MarkupImportState state
) {
280 if (state
.isCheck()){
283 cdmBase
= CdmBase
.deproxy(cdmBase
, CdmBase
.class);
284 if (cdmBase
== null){
285 String message
= "Tried to save a null object.";
286 fireWarningEvent(message
, "--location ?? --", 6,1);
287 } else if (cdmBase
.isInstanceOf(TaxonBase
.class)){
288 docImport
.getTaxonService().saveOrUpdate((TaxonBase
<?
>)cdmBase
);
289 }else if (cdmBase
.isInstanceOf(Classification
.class)){
290 docImport
.getClassificationService().saveOrUpdate((Classification
)cdmBase
);
291 }else if (cdmBase
.isInstanceOf(PolytomousKey
.class)){
292 docImport
.getPolytomousKeyService().saveOrUpdate((PolytomousKey
)cdmBase
);
293 }else if (cdmBase
.isInstanceOf(DefinedTermBase
.class)){
294 docImport
.getTermService().saveOrUpdate((DefinedTermBase
<?
>)cdmBase
);
295 }else if (cdmBase
.isInstanceOf(Media
.class)){
296 docImport
.getMediaService().saveOrUpdate((Media
)cdmBase
);
297 }else if (cdmBase
.isInstanceOf(SpecimenOrObservationBase
.class)){
298 docImport
.getOccurrenceService().saveOrUpdate((SpecimenOrObservationBase
<?
>)cdmBase
);
299 }else if (cdmBase
.isInstanceOf(DescriptionElementBase
.class)){
300 docImport
.getDescriptionService().saveDescriptionElement((DescriptionElementBase
)cdmBase
);
301 }else if (cdmBase
.isInstanceOf(Reference
.class)){
302 docImport
.getReferenceService().saveOrUpdate((Reference
)cdmBase
);
304 String message
= "Unknown cdmBase type to save: " + cdmBase
.getClass();
305 fireWarningEvent(message
, "Unknown location", 8);
307 //logger.warn("Saved " + cdmBase);
311 protected ITermService
getTermService() {
312 return docImport
.getTermService();
315 protected IClassificationService
getClassificationService() {
316 return docImport
.getClassificationService();
319 //*********************** Attribute methods *************************************/
322 * Returns a map for all attributes of an start element
326 protected Map
<String
, Attribute
> getAttributes(XMLEvent event
) {
327 Map
<String
, Attribute
> result
= new HashMap
<>();
328 if (!event
.isStartElement()){
329 fireWarningEvent("Event is not an startElement. Can't check attributes", makeLocationStr(event
.getLocation()), 1, 1);
332 StartElement element
= event
.asStartElement();
333 @SuppressWarnings("unchecked")
334 Iterator
<Attribute
> attributes
= element
.getAttributes();
335 while (attributes
.hasNext()){
336 Attribute attribute
= attributes
.next();
338 result
.put(attribute
.getName().getLocalPart(), attribute
);
344 * Throws an unexpected attributes event if the event has any attributes.
347 protected void checkNoAttributes(Map
<String
, Attribute
> attributes
, XMLEvent event
) {
348 String
[] exceptions
= new String
[]{};
349 handleUnexpectedAttributes(event
.getLocation(), attributes
, 1, exceptions
);
355 * Throws an unexpected attributes event if the event has any attributes.
358 protected void checkNoAttributes(XMLEvent event
) {
359 String
[] exceptions
= new String
[]{};
360 checkNoAttributes(event
, 1, exceptions
);
364 * Throws an unexpected attributes event if the event has any attributes except those mentioned in "exceptions".
368 protected void checkNoAttributes(XMLEvent event
, int stackDepth
, String
... exceptions
) {
369 if (! event
.isStartElement()){
370 fireWarningEvent("Event is not an startElement. Can't check attributes", makeLocationStr(event
.getLocation()), 1, 1);
373 StartElement startElement
= event
.asStartElement();
374 Map
<String
, Attribute
> attributes
= getAttributes(startElement
);
375 handleUnexpectedAttributes(startElement
.getLocation(), attributes
, stackDepth
+1, exceptions
);
380 * Checks if the given attribute exists and has the given value.
381 * If yes, true is returned and the attribute is removed from the attributes map.
382 * Otherwise false is returned.
386 * @return <code>true</code> if attribute has given value, <code>false</code> otherwise
388 protected boolean checkAndRemoveAttributeValue( Map
<String
, Attribute
> attributes
, String attrName
, String value
) {
389 Attribute attr
= attributes
.get(attrName
);
390 if (attr
== null ||value
== null ){
393 if (value
.equals(attr
.getValue())){
394 attributes
.remove(attrName
);
404 * Returns the value of a given attribute name and removes the attribute from the attributes map.
405 * Returns <code>null</code> if attribute does not exist.
406 * @param attributes the list of all attributes
407 * @param attrName the requested attribute name
408 * @return the value for the attribute
410 protected String
getAndRemoveAttributeValue(Map
<String
, Attribute
> attributes
, String attrName
) {
411 return getAndRemoveAttributeValue(null, attributes
, attrName
, false, 1);
415 * Returns the value of a boolean attribute with the given name and removes the attribute from the attributes map.
416 * Returns <code>defaultValue</code> if the attribute does not exist. ALso returns <code>defaultValue</code> and throws a warning if the
417 * attribute has no boolean value (true, false).
419 * @param attributes the
420 * @param attrName the name of the attribute
421 * @param defaultValue the default value to return if attribute does not exist or can not be defined
424 protected Boolean
getAndRemoveBooleanAttributeValue(XMLEvent event
, Map
<String
, Attribute
> attributes
, String attrName
, Boolean defaultValue
) {
425 String value
= getAndRemoveAttributeValue(null, attributes
, attrName
, false, 1);
426 Boolean result
= defaultValue
;
428 if (value
.equalsIgnoreCase("true")){
430 }else if (value
.equalsIgnoreCase("false")){
433 String message
= "Boolean attribute has no boolean value ('true', 'false') but '%s'";
434 fireWarningEvent(String
.format(message
, value
), makeLocationStr(event
.getLocation()), 6, 1);
442 * Returns the value of a given attribute name and returns the attribute from the attributes map.
443 * Fires a mandatory field is missing event if the attribute does not exist.
449 protected String
getAndRemoveRequiredAttributeValue(XMLEvent xmlEvent
, Map
<String
, Attribute
> attributes
, String attrName
) {
450 return getAndRemoveAttributeValue(xmlEvent
, attributes
, attrName
, true, 1);
454 * Returns the value of a given attribute name and returns the attribute from the attributes map.
455 * If required is <code>true</code> and the attribute does not exist a mandatory field is missing event is fired.
462 private String
getAndRemoveAttributeValue(XMLEvent xmlEvent
, Map
<String
, Attribute
> attributes
, String attrName
, boolean isRequired
, int stackDepth
) {
463 Attribute attr
= attributes
.get(attrName
);
466 fireMandatoryElementIsMissing(xmlEvent
, attrName
, 8, stackDepth
+1);
470 attributes
.remove(attrName
);
471 return attr
.getValue();
476 * Fires an not yet implemented event if the given attribute exists in attributes.
481 protected void handleNotYetImplementedAttribute(Map
<String
, Attribute
> attributes
,
482 String attrName
, XMLEvent event
) {
483 Attribute attr
= attributes
.get(attrName
);
485 attributes
.remove(attrName
);
486 QName qName
= attr
.getName();
487 fireNotYetImplementedAttribute(event
.getLocation(), qName
, attr
.getValue(), 1);
492 * Fires an unhandled attributes event, if attributes exist in attributes map not covered by the exceptions.
493 * No event is fired if the unhandled elements stack is not empty.
498 protected void handleUnexpectedAttributes(Location location
,Map
<String
, Attribute
> attributes
, String
... exceptions
) {
499 handleUnexpectedAttributes(location
, attributes
, 1, exceptions
);
503 * see {@link #handleUnexpectedAttributes(Location, Map, String...)}
507 * @param stackDepth the stack trace depth
510 private void handleUnexpectedAttributes(Location location
,Map
<String
, Attribute
> attributes
, int stackDepth
, String
... exceptions
) {
511 if (attributes
.size() > 0){
512 if (this.unhandledElements
.size() == 0 ){
513 boolean hasUnhandledAttributes
= false;
514 for (String key
: attributes
.keySet()){
515 boolean isException
= false;
516 for (String exception
: exceptions
){
517 if(key
.equals(exception
)){
522 hasUnhandledAttributes
= true;
525 if (hasUnhandledAttributes
){
526 fireUnexpectedAttributes(location
, attributes
, stackDepth
+1);
533 private void fireUnexpectedAttributes(Location location
, Map
<String
, Attribute
> attributes
, int stackDepth
) {
534 String attributesString
= "";
535 for (String key
: attributes
.keySet()){
536 Attribute attribute
= attributes
.get(key
);
537 attributesString
= CdmUtils
.concat(",", attributesString
, attribute
.getName().getLocalPart() + ":" + attribute
.getValue());
539 String message
= "Unexpected attributes: %s";
540 IoProblemEvent event
= makeProblemEvent(location
, String
.format(message
, attributesString
), 1 , stackDepth
+1 );
545 protected void fireUnexpectedAttributeValue(XMLEvent parentEvent
, String attrName
, String attrValue
) {
546 String message
= "Unexpected attribute value %s='%s'";
547 message
= String
.format(message
, attrName
, attrValue
);
548 IoProblemEvent event
= makeProblemEvent(parentEvent
.getLocation(), message
, 1 , 1 );
552 protected void handleNotYetImplementedAttributeValue(XMLEvent xmlEvent
, String attrName
, String attrValue
) {
553 String message
= "Attribute %s not yet implemented for value '%s'";
554 message
= String
.format(message
, attrName
, attrValue
);
555 IIoEvent event
= makeProblemEvent(xmlEvent
.getLocation(), message
, 1, 1 );
559 protected void fireNotYetImplementedAttribute(Location location
, QName qName
,
560 String value
, int stackDepth
) {
561 String message
= "Attribute not yet implemented: %s (%s)";
562 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, qName
.getLocalPart(), value
), 1, stackDepth
+1 );
567 protected void fireUnexpectedEvent(XMLEvent xmlEvent
, int stackDepth
) {
568 Location location
= xmlEvent
.getLocation();
569 String message
= "Unexpected event: %s";
570 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, xmlEvent
.toString()), 2, stackDepth
+1);
574 protected void fireUnexpectedStartElement(Location location
, StartElement startElement
, int stackDepth
) {
575 QName qName
= startElement
.getName();
576 String message
= "Unexpected start element: %s";
577 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, qName
.getLocalPart()), 2, stackDepth
+1);
582 protected void fireUnexpectedEndElement(Location location
, EndElement endElement
, int stackDepth
) {
583 QName qName
= endElement
.getName();
584 String message
= "Unexpected end element: %s";
585 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, qName
.getLocalPart()), 16, stackDepth
+1);
589 protected void fireNotYetImplementedElement(Location location
, QName qName
, int stackDepth
) {
590 String message
= "Element not yet implemented: %s";
591 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, qName
.getLocalPart()), 1, stackDepth
+1 );
595 protected void fireNotYetImplementedCharacters(Location location
, Characters chars
, int stackDepth
) {
596 String message
= "Characters not yet handled: %s";
597 IIoEvent event
= makeProblemEvent(location
, String
.format(message
, chars
.getData()), 1, stackDepth
+1 );
602 * Creates a problem event.
603 * Be aware of the right depths of the stack trace !
609 private IoProblemEvent
makeProblemEvent(Location location
, String message
, int severity
, int stackDepth
) {
611 StackTraceElement
[] stackTrace
= new Exception().getStackTrace();
612 int lineNumber
= stackTrace
[stackDepth
].getLineNumber();
613 String methodName
= stackTrace
[stackDepth
].getMethodName();
614 String locationStr
= makeLocationStr(location
);
615 String className
= stackTrace
[stackDepth
].getClassName();
616 Class
<?
> declaringClass
;
618 declaringClass
= Class
.forName(className
);
619 } catch (ClassNotFoundException e
) {
620 declaringClass
= this.getClass();
622 IoProblemEvent event
= IoProblemEvent
.NewInstance(declaringClass
, message
,
623 locationStr
, lineNumber
, severity
, methodName
);
628 * Creates a string from a location
632 protected String
makeLocationStr(Location location
) {
633 String locationStr
= location
== null ?
" - no location - " : "l." + location
.getLineNumber() + "/c."+ location
.getColumnNumber();
639 * Fires an unexpected element event if the unhandled elements stack is empty.
640 * Otherwise adds the element to the stack.
643 protected void handleUnexpectedStartElement(XMLEvent event
) {
644 handleUnexpectedStartElement(event
, 1);
648 * Fires an unexpected element event if the unhandled elements stack is empty.
649 * Otherwise adds the element to the stack.
652 protected void handleUnexpectedStartElement(XMLEvent event
, int stackDepth
) {
653 QName qName
= event
.asStartElement().getName();
654 if (! unhandledElements
.empty()){
655 unhandledElements
.push(qName
);
657 fireUnexpectedStartElement(event
.getLocation(), event
.asStartElement(), stackDepth
+ 1);
662 protected void handleUnexpectedEndElement(EndElement event
) {
663 handleUnexpectedEndElement(event
, 1);
667 * Fires an unexpected element event if the event is not the last on the stack.
668 * Otherwise removes last stack element.
671 protected void handleUnexpectedEndElement(EndElement event
, int stackDepth
) {
672 QName qName
= event
.asEndElement().getName();
673 if (!unhandledElements
.isEmpty() && unhandledElements
.peek().equals(qName
)){
674 unhandledElements
.pop();
676 fireUnexpectedEndElement(event
.getLocation(), event
.asEndElement(), stackDepth
+ 1);
684 protected void popUnimplemented(EndElement endElement
) {
685 QName qName
= endElement
.asEndElement().getName();
686 if (unhandledElements
.peek().equals(qName
)){
687 unhandledElements
.pop();
689 String message
= "End element is not last on stack: %s";
690 message
= String
.format(message
, qName
.getLocalPart());
691 IIoEvent event
= makeProblemEvent(endElement
.getLocation(), message
, 16, 1);
699 * Fires an unexpected element event if the unhandled element stack is empty.
702 protected void handleUnexpectedElement(XMLEvent event
) {
703 if (event
.isStartElement()){
704 handleUnexpectedStartElement(event
, 2);
705 }else if (event
.isEndElement()){
706 handleUnexpectedEndElement(event
.asEndElement(), 2);
707 }else if (event
.getEventType() == XMLStreamConstants
.COMMENT
){
709 }else if (! unhandledElements
.empty()){
712 fireUnexpectedEvent(event
, 1);
717 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
720 protected void handleNotYetImplementedCharacters(XMLEvent event
) {
721 Characters chars
= event
.asCharacters();
722 fireNotYetImplementedCharacters(event
.getLocation(), chars
, 1);
726 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
729 protected void handleNotYetImplementedElement(XMLEvent event
) {
730 QName qName
= event
.asStartElement().getName();
731 boolean isTopLevel
= unhandledElements
.isEmpty();
732 unhandledElements
.push(qName
);
734 fireNotYetImplementedElement(event
.getLocation(), qName
, 1);
739 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
742 protected void handleIgnoreElement(XMLEvent event
) {
743 QName qName
= event
.asStartElement().getName();
744 unhandledElements
.push(qName
);
747 protected void handleAmbigousManually(MarkupImportState state
,
748 XMLEventReader reader
, StartElement startElement
) {
749 QName qName
= startElement
.getName();
750 unhandledElements
.push(qName
);
752 "Handle manually: " + qName
.getLocalPart() + " is ambigous and should therefore be handled manually",
753 makeLocationStr(startElement
.getLocation()), 2, 2);
757 * Checks if a mandatory text is not empty or null.
758 * Returns true if text is given.
759 * Fires an mandatory element is missing event otherwise and returns <code>null</code>.
764 protected boolean checkMandatoryText(String text
, XMLEvent parentEvent
) {
765 if (! StringUtils
.isNotBlank(text
)){
766 fireMandatoryElementIsMissing(parentEvent
, "CData", 4, 1);
773 * Fires an mandatory element is missing event if exists is <code>false</code>.
774 * @param hasMandatory
778 protected void checkMandatoryElement(boolean exists
, StartElement parentEvent
, String attrName
) {
780 fireMandatoryElementIsMissing(parentEvent
, attrName
, 5, 1);
786 * Fires an element is missing event.
791 * @throws IllegalStateException if xmlEvent is not a StartElement and not an Attribute
793 private void fireMandatoryElementIsMissing(XMLEvent xmlEvent
, String missingEventName
, int severity
, int stackDepth
) throws IllegalStateException
{
794 Location location
= xmlEvent
.getLocation();
797 if (xmlEvent
.isAttribute()){
798 Attribute attribute
= ((Attribute
)xmlEvent
);
799 typeName
= "attribute";
800 qName
= attribute
.getName();
801 }else if (xmlEvent
.isStartElement()){
802 typeName
= "element";
803 qName
= xmlEvent
.asStartElement().getName();
805 throw new IllegalStateException("mandatory element only allowed for attributes and start tags in " + makeLocationStr(location
));
807 String message
= "Mandatory %s '%s' is missing in %s";
808 message
= String
.format(message
, typeName
, missingEventName
, qName
.getLocalPart());
809 IIoEvent event
= makeProblemEvent(location
, message
, severity
, stackDepth
+1);
817 * Returns <code>true</code> if the "next" event is the ending tag for the "parent" event.
818 * @param next end element to test, must not be null
819 * @param parentEvent start element to test
820 * @return true if the "next" event is the ending tag for the "parent" event.
821 * @throws XMLStreamException
823 protected boolean isMyEndingElement(XMLEvent next
, XMLEvent parentEvent
) throws XMLStreamException
{
824 if (! parentEvent
.isStartElement()){
825 String message
= "Parent event should be start tag";
826 fireWarningEvent(message
, makeLocationStr(next
.getLocation()), 6);
829 return isEndingElement(next
, parentEvent
.asStartElement().getName().getLocalPart());
833 * Trims the text and removes turns all whitespaces into single empty space.
837 protected String
normalize(String text
) {
838 text
= StringUtils
.trimToEmpty(text
);
839 text
= text
.replaceAll("\\s+", " ");
846 * Removes whitespaces at beginning and end and makes the first letter
847 * a capital letter and all other letters small letters.
851 protected String
toFirstCapital(String value
) {
852 if (StringUtils
.isBlank(value
)){
856 value
= value
.trim();
857 result
+= value
.trim().substring(0,1).toUpperCase();
858 if (value
.length()>1){
859 result
+= value
.substring(1).toLowerCase();
866 * Currently not used.
868 * @param allowedNumberOfCharacters
869 * @param onlyFirstCapital
872 protected boolean isAbbreviation(String str
, int allowedNumberOfCharacters
, boolean onlyFirstCapital
){
877 if (! str
.endsWith(".")){
880 str
= str
.substring(0, str
.length() -1);
881 if (str
.length() > allowedNumberOfCharacters
){
884 final String re
= "^\\p{javaUpperCase}\\p{javaLowerCase}*$";
885 if (str
.matches(re
)){
893 * Checks if <code>abbrev</code> is the short form for the genus name (strGenusName).
894 * Usually this is the case if <code>abbrev</code> is the first letter (optional with ".")
895 * of strGenusName. But in older floras it may also be the first 2 or 3 letters (optional with dot).
896 * However, we allow only a maximum of 2 letters to be anambigous. In cases with 3 letters better
897 * change the original markup data.
899 * @param strGenusName
902 protected boolean isGenusAbbrev(String abbrev
, String strGenusName
) {
903 if (! abbrev
.matches("[A-Z][a-z]?\\.?")) {
905 }else if (abbrev
.length() == 0 || strGenusName
== null || strGenusName
.length() == 0){
908 abbrev
= abbrev
.replace(".", "");
909 return strGenusName
.startsWith(abbrev
);
910 // boolean result = true;
911 // for (int i = 0 ; i < abbrev.length(); i++){
912 // result &= ( abbrev.charAt(i) == strGenusName.charAt(i));
920 * Checks if all words in the given string start with a capital letter but do not have any further capital letter.
921 * @param word the string to be checekd. Usually should be a single word.
922 * @return true if the above is the case, false otherwise
924 protected boolean isFirstCapitalWord(String word
) {
925 if (WordUtils
.capitalizeFully(word
).equals(word
)){
927 }else if (WordUtils
.capitalizeFully(word
,new char[]{'-'}).equals(word
)){
928 //for words like Le-Testui (which is a species epithet)
937 * Read next event. Ignore whitespace events.
940 * @throws XMLStreamException
942 protected XMLEvent
readNoWhitespace(XMLEventReader reader
) throws XMLStreamException
{
943 XMLEvent event
= reader
.nextEvent();
944 while (!unhandledElements
.isEmpty()){
945 if (event
.isStartElement()){
946 handleNotYetImplementedElement(event
);
947 }else if (event
.isEndElement()){
948 popUnimplemented(event
.asEndElement());
950 event
= reader
.nextEvent();
952 while (event
.isCharacters() && event
.asCharacters().isWhiteSpace()){
953 event
= reader
.nextEvent();
959 * Returns the REQUIRED "class" attribute for a given event and checks that it is the only attribute.
963 protected String
getClassOnlyAttribute(XMLEvent parentEvent
) {
964 return getClassOnlyAttribute(parentEvent
, true);
969 * Returns the "class" attribute for a given event and checks that it is the only attribute.
973 protected String
getClassOnlyAttribute(XMLEvent parentEvent
, boolean required
) {
974 return getOnlyAttribute(parentEvent
, CLASS
, required
);
978 * Returns the value for the only attribute for a given event and checks that it is the only attribute.
982 protected String
getOnlyAttribute(XMLEvent parentEvent
, String attrName
, boolean required
) {
983 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
984 String classValue
=getAndRemoveAttributeValue(parentEvent
, attributes
, attrName
, required
, 1);
985 checkNoAttributes(attributes
, parentEvent
);
990 protected void fireWarningEvent(String message
, String locationStr
, Integer severity
, Integer depth
) {
991 docImport
.fireWarningEvent(message
, locationStr
, severity
, depth
);
994 protected void fireWarningEvent(String message
, XMLEvent event
, Integer severity
) {
995 docImport
.fireWarningEvent(message
, makeLocationStr(event
.getLocation()), severity
, 1);
998 protected void fireSchemaConflictEventExpectedStartTag(String elName
, XMLEventReader reader
) throws XMLStreamException
{
999 docImport
.fireSchemaConflictEventExpectedStartTag(elName
, reader
);
1003 protected void fireWarningEvent(String message
, String locationStr
, int severity
) {
1004 docImport
.fireWarningEvent(message
, locationStr
, severity
, 1);
1007 protected void fire(IIoEvent event
) {
1008 docImport
.fire(event
);
1011 protected boolean isNotBlank(String str
){
1012 return StringUtils
.isNotBlank(str
);
1015 protected boolean isBlank(String str
){
1016 return StringUtils
.isBlank(str
);
1019 protected TaxonDescription
getTaxonDescription(Taxon taxon
, Reference ref
, boolean isImageGallery
, boolean createNewIfNotExists
) {
1020 return docImport
.getTaxonDescription(taxon
, ref
, isImageGallery
, createNewIfNotExists
);
1023 protected TaxonDescription
getDefaultTaxonDescription(Taxon taxon
, boolean isImageGallery
, boolean createNewIfNotExists
, Reference source
) {
1024 return docImport
.getDefaultTaxonDescription(taxon
, isImageGallery
, createNewIfNotExists
, source
);
1028 * Returns the taxon description with marked as <code>true</code> with the given marker type.
1029 * If createNewIfNotExists a new description is created if it does not yet exist.
1030 * For the new description the source and the title are set if not <code>null</code>.
1033 * @param isImageGallery
1034 * @param createNewIfNotExists
1037 * @return the existing or new taxon description
1039 protected TaxonDescription
getMarkedTaxonDescription(Taxon taxon
, MarkerType markerType
, boolean isImageGallery
, boolean createNewIfNotExists
, Reference source
, String title
) {
1040 return docImport
.getMarkedTaxonDescription(taxon
, markerType
, isImageGallery
, createNewIfNotExists
, source
, title
);
1045 * Returns the default language defined in the state. If no default language is defined in the state,
1046 * the CDM default language is returned.
1050 protected Language
getDefaultLanguage(MarkupImportState state
) {
1051 Language result
= state
.getDefaultLanguage();
1052 if (result
== null){
1053 result
= Language
.DEFAULT();
1059 //*********************** FROM XML IMPORT BASE ****************************************
1060 protected boolean isEndingElement(XMLEvent event
, String elName
) throws XMLStreamException
{
1061 return docImport
.isEndingElement(event
, elName
);
1064 protected boolean isStartingElement(XMLEvent event
, String elName
) throws XMLStreamException
{
1065 return docImport
.isStartingElement(event
, elName
);
1069 protected void fillMissingEpithetsForTaxa(Taxon parentTaxon
, Taxon childTaxon
) {
1070 docImport
.fillMissingEpithetsForTaxa(parentTaxon
, childTaxon
);
1073 protected Feature
getFeature(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<Feature
> voc
){
1074 return docImport
.getFeature(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1077 protected PresenceAbsenceTerm
getPresenceAbsenceTerm(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, boolean isAbsenceTerm
, TermVocabulary
<PresenceAbsenceTerm
> voc
){
1078 return docImport
.getPresenceTerm(state
, uuid
, label
, text
, labelAbbrev
, isAbsenceTerm
, voc
);
1081 protected ExtensionType
getExtensionType(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
){
1082 return docImport
.getExtensionType(state
, uuid
, label
, text
, labelAbbrev
);
1085 protected DefinedTerm
getIdentifierType(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<DefinedTerm
> voc
){
1086 return docImport
.getIdentifierType(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1089 protected AnnotationType
getAnnotationType(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<AnnotationType
> voc
){
1090 return docImport
.getAnnotationType(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1093 protected MarkerType
getMarkerType(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<MarkerType
> voc
){
1094 return docImport
.getMarkerType(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1097 protected NamedAreaLevel
getNamedAreaLevel(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<NamedAreaLevel
> voc
){
1098 return docImport
.getNamedAreaLevel(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1101 protected NamedArea
getNamedArea(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, NamedAreaType areaType
, NamedAreaLevel level
, TermVocabulary voc
, TermMatchMode matchMode
){
1102 return docImport
.getNamedArea(state
, uuid
, label
, text
, labelAbbrev
, areaType
, level
, voc
, matchMode
);
1105 protected Language
getLanguage(MarkupImportState state
, UUID uuid
, String label
, String text
, String labelAbbrev
, TermVocabulary
<?
> voc
){
1106 return docImport
.getLanguage(state
, uuid
, label
, text
, labelAbbrev
, voc
);
1109 // *************************************** Concrete methods **********************************************/
1118 protected Rank
makeRank(MarkupImportState state
, String value
, boolean byAbbrev
) {
1120 if (StringUtils
.isBlank(value
)) {
1124 boolean useUnknown
= true;
1125 NomenclaturalCode nc
= makeNomenclaturalCode(state
);
1126 if (value
.equals(GENUS_ABBREVIATION
)){
1127 rank
= Rank
.GENUS();
1128 }else if (byAbbrev
) {
1129 rank
= Rank
.getRankByIdInVoc(value
.toLowerCase(), nc
, useUnknown
);
1130 if (value
.equalsIgnoreCase("forma")){
1132 }else if (value
.toLowerCase().matches("(sub)?(section|genus|series|tribe)")){
1133 return Rank
.getRankByEnglishName(value
, nc
, useUnknown
);
1134 }else if (value
.equals("§")){
1135 return Rank
.SECTION_BOTANY(); //Special case in Flora Malesiana
1138 rank
= Rank
.getRankByEnglishName(value
, nc
, useUnknown
);
1140 if (rank
.equals(Rank
.UNKNOWN_RANK())) {
1143 if (rank
== null && "sous-genre".equalsIgnoreCase(value
)){
1144 rank
= Rank
.SUBGENUS();
1146 } catch (UnknownCdmTypeException e
) {
1152 NonViralNameParserImpl parser
= NonViralNameParserImpl
.NewInstance();
1153 protected TeamOrPersonBase
<?
> createAuthor(MarkupImportState state
, String authorTitle
) {
1154 TeamOrPersonBase
<?
> result
= parser
.author(authorTitle
);
1155 return state
.getDeduplicationHelper(docImport
).getExistingAuthor(state
, result
);
1158 protected String
getAndRemoveMapKey(Map
<String
, String
> map
, String key
) {
1159 String result
= map
.get(key
);
1161 if (result
!= null) {
1162 result
= normalize(result
);
1164 return StringUtils
.stripToNull(result
);
1169 * Creates a {@link INonViralName} object depending on the defined {@link NomenclaturalCode}
1170 * and the given parameters.
1175 protected INonViralName
createNameByCode(MarkupImportState state
, Rank rank
) {
1176 NomenclaturalCode nc
= makeNomenclaturalCode(state
);
1177 INonViralName name
= nc
.getNewTaxonNameInstance(rank
);
1181 protected void handleFullName(MarkupImportState state
, XMLEventReader reader
,
1182 INonViralName name
, XMLEvent event
) throws XMLStreamException
{
1184 Map
<String
, Attribute
> attrs
= getAttributes(event
);
1185 String rankStr
= getAndRemoveRequiredAttributeValue(event
, attrs
, "rank");
1186 String hybridClass
= getAndRemoveAttributeValue(attrs
, "hybridClass");
1188 Rank rank
= makeRank(state
, rankStr
, false);
1191 String message
= "Rank was computed as null. This must not be.";
1192 fireWarningEvent(message
, event
, 6);
1193 name
.setRank(Rank
.UNKNOWN_RANK());
1195 if (!attrs
.isEmpty()) {
1196 handleUnexpectedAttributes(event
.getLocation(), attrs
);
1198 fullNameStr
= getCData(state
, reader
, event
, false);
1199 NonViralNameParserImpl
.NewInstance().parseFullName(name
, fullNameStr
, rank
, false);
1200 if (hybridClass
!= null ){
1201 if ("hybrid formula".equals(hybridClass
)){
1202 if (!name
.isHybridFormula()){
1203 fireWarningEvent("Hybrid formula is not set though requested: " + fullNameStr
, event
, 4);
1205 }else if ("hybrid".equals(hybridClass
)){
1206 if (!name
.isHybridName()){
1207 fireWarningEvent("Hybrid name is recognized: " + fullNameStr
, event
, 4);
1210 handleNotYetImplementedAttributeValue(event
, "hybridClass", hybridClass
);
1217 * Returns the {@link NomenclaturalCode} for this import. Default is {@link NomenclaturalCode#ICBN} if
1218 * no code is defined.
1222 protected NomenclaturalCode
makeNomenclaturalCode(MarkupImportState state
) {
1223 NomenclaturalCode nc
= state
.getConfig().getNomenclaturalCode();
1225 nc
= NomenclaturalCode
.ICNAFP
; // default;
1233 * @param levelString
1237 protected NamedAreaLevel
makeNamedAreaLevel(MarkupImportState state
, String levelString
, XMLEvent next
) {
1238 NamedAreaLevel level
;
1240 level
= state
.getTransformer().getNamedAreaLevelByKey(levelString
);
1241 if (level
== null) {
1242 UUID levelUuid
= state
.getTransformer().getNamedAreaLevelUuid(levelString
);
1243 if (levelUuid
== null) {
1244 String message
= "Unknown distribution locality class (named area level): %s. Create new level instead.";
1245 message
= String
.format(message
, levelString
);
1246 fireWarningEvent(message
, next
, 6);
1248 level
= getNamedAreaLevel(state
, levelUuid
, levelString
, levelString
, levelString
, null);
1250 } catch (UndefinedTransformerMethodException e
) {
1251 throw new RuntimeException(e
);
1263 protected NamedArea
makeArea(MarkupImportState state
, String areaName
, NamedAreaLevel level
) {
1265 //TODO FM vocabulary
1266 TermVocabulary
<NamedArea
> voc
= null;
1267 NamedAreaType areaType
= null;
1269 NamedArea area
= null;
1271 area
= state
.getTransformer().getNamedAreaByKey(areaName
);
1272 } catch (UndefinedTransformerMethodException e
) {
1273 throw new RuntimeException(e
);
1276 boolean isNewInState
= false;
1277 UUID uuid
= state
.getAreaUuid(areaName
);
1279 isNewInState
= true;
1281 uuid
= state
.getTransformer().getNamedAreaUuid(areaName
);
1283 uuid
= UUID
.randomUUID();
1284 state
.putAreaUuid(areaName
, uuid
);
1286 } catch (UndefinedTransformerMethodException e
) {
1287 throw new RuntimeException(e
);
1291 CdmImportBase
.TermMatchMode matchMode
= CdmImportBase
.TermMatchMode
.UUID_LABEL
;
1292 area
= getNamedArea(state
, uuid
, areaName
, areaName
, areaName
, areaType
, level
, voc
, matchMode
);
1294 state
.putAreaUuid(areaName
, area
.getUuid());
1296 //TODO just for testing -> make generic and move to better place
1297 String geoServiceLayer
="vmap0_as_bnd_political_boundary_a";
1298 String layerFieldName
="nam";
1300 if ("Bangka".equals(areaName
)){
1301 String areaValue
= "PULAU BANGKA#SUMATERA SELATAN";
1302 GeoServiceArea geoServiceArea
= new GeoServiceArea();
1303 geoServiceArea
.add(geoServiceLayer
, layerFieldName
, areaValue
);
1304 this.editGeoService
.setMapping(area
, geoServiceArea
);
1305 // save(area, state);
1307 if ("Luzon".equals(areaName
)){
1308 GeoServiceArea geoServiceArea
= new GeoServiceArea();
1310 List
<String
> list
= Arrays
.asList("HERMANA MAYOR ISLAND#CENTRAL LUZON",
1311 "HERMANA MENOR ISLAND#CENTRAL LUZON",
1313 for (String areaValue
: list
){
1314 geoServiceArea
.add(geoServiceLayer
, layerFieldName
, areaValue
);
1317 this.editGeoService
.setMapping(area
, geoServiceArea
);
1318 // save(area, state);
1320 if ("Mindanao".equals(areaName
)){
1321 GeoServiceArea geoServiceArea
= new GeoServiceArea();
1323 List
<String
> list
= Arrays
.asList("NORTHERN MINDANAO",
1324 "SOUTHERN MINDANAO",
1325 "WESTERN MINDANAO");
1326 //TODO to be continued
1327 for (String areaValue
: list
){
1328 geoServiceArea
.add(geoServiceLayer
, layerFieldName
, areaValue
);
1331 this.editGeoService
.setMapping(area
, geoServiceArea
);
1332 // save(area, state);
1334 if ("Palawan".equals(areaName
)){
1335 GeoServiceArea geoServiceArea
= new GeoServiceArea();
1337 List
<String
> list
= Arrays
.asList("PALAWAN#SOUTHERN TAGALOG");
1338 for (String areaValue
: list
){
1339 geoServiceArea
.add(geoServiceLayer
, layerFieldName
, areaValue
);
1342 this.editGeoService
.setMapping(area
, geoServiceArea
);
1343 // save(area, state);
1354 * Reads character data. Any element other than character data or the ending
1355 * tag will fire an unexpected element event.
1357 * @see #getCData(MarkupImportState, XMLEventReader, XMLEvent, boolean)
1362 * @throws XMLStreamException
1364 protected String
getCData(MarkupImportState state
, XMLEventReader reader
, XMLEvent next
) throws XMLStreamException
{
1365 return getCData(state
, reader
, next
, true);
1369 * Reads character data. Any element other than character data or the ending
1370 * tag will fire an unexpected element event.
1375 * @param inlineMarkup map for inline markup, this is used for e.g. the locality markup within a subheading
1376 * The map will be filled by the markup element name as key. The value may be a String, a CdmBase or any other object.
1377 * If null any markup text will be neglected but a warning will be fired if they exist.
1378 * @param removeInlineMarkupText if true the markedup text will be removed from the returned String
1379 * @param checkAttributes
1381 * @throws XMLStreamException
1383 protected String
getCData(MarkupImportState state
, XMLEventReader reader
, XMLEvent parent
, /*Map<String, Object> inlineMarkup, *boolean removeInlineMarkupText,*/ boolean checkAttributes
) throws XMLStreamException
{
1384 if (checkAttributes
){
1385 checkNoAttributes(parent
);
1389 while (reader
.hasNext()) {
1390 XMLEvent next
= readNoWhitespace(reader
);
1391 if (isMyEndingElement(next
, parent
)) {
1393 } else if (next
.isCharacters()) {
1394 text
+= next
.asCharacters().getData();
1395 } else if (isStartingElement(next
, FOOTNOTE_REF
)){
1396 handleNotYetImplementedElement(next
);
1397 // } else if (isStartingElement(next, LOCALITY)){
1398 // handleCDataLocality(state, reader, parent);
1400 handleUnexpectedElement(next
);
1403 throw new IllegalStateException("Event has no closing tag");
1407 // private void handleCDataLocality(MarkupImportState state, XMLEventReader reader, XMLEvent parent) {
1408 // checkAndRemoveAttributeValue(attributes, attrName, value)
1415 * For it returns a pure CData annotation string. This behaviour may change in future. More complex annotations
1416 * should be handled differently.
1419 * @param parentEvent
1421 * @throws XMLStreamException
1423 protected String
handleSimpleAnnotation(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
1424 String annotation
= getCData(state
, reader
, parentEvent
);
1429 * True if text is single "." oder "," or ";" or ":"
1433 protected boolean isPunctuation(String text
) {
1434 return text
== null ?
false : text
.trim().matches("^[\\.,;:]$");
1439 * Text indicating that type information is following but no information about the type of the type
1443 protected boolean charIsSimpleType(String text
) {
1444 return text
.matches("(?i)Type:");
1447 protected String
getXmlTag(XMLEvent event
) {
1449 if (event
.isStartElement()) {
1450 result
= "<" + event
.asStartElement().getName().getLocalPart()
1452 } else if (event
.isEndElement()) {
1453 result
= "</" + event
.asEndElement().getName().getLocalPart() + ">";
1455 String message
= "Only start or end elements are allowed as Html tags";
1456 throw new IllegalStateException(message
);
1461 protected WriterDataHolder
handleWriter(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
1463 checkNoAttributes(parentEvent
);
1464 WriterDataHolder dataHolder
= new WriterDataHolder();
1465 List
<FootnoteDataHolder
> footnotes
= new ArrayList
<>();
1467 // TODO handle attributes
1468 while (reader
.hasNext()) {
1469 XMLEvent next
= readNoWhitespace(reader
);
1470 if (isMyEndingElement(next
, parentEvent
)) {
1471 text
= CdmUtils
.removeBrackets(text
);
1472 if (checkMandatoryText(text
, parentEvent
)) {
1473 text
= normalize(text
);
1474 dataHolder
.writer
= text
;
1475 dataHolder
.footnotes
= footnotes
;
1478 UUID uuidWriterExtension
= MarkupTransformer
.uuidWriterExtension
;
1479 ExtensionType writerExtensionType
=
1480 this.getExtensionType(state
, uuidWriterExtension
,"Writer", "writer", "writer");
1481 Extension extension
= Extension
.NewInstance();
1482 extension
.setType(writerExtensionType
);
1483 extension
.setValue(text
);
1484 dataHolder
.extension
= extension
;
1487 UUID uuidWriterAnnotation
= MarkupTransformer
.uuidWriterAnnotation
;
1488 AnnotationType writerAnnotationType
= this.getAnnotationType(state
, uuidWriterAnnotation
, "Writer", "writer", "writer", null);
1489 Annotation annotation
= Annotation
.NewInstance(text
, writerAnnotationType
, getDefaultLanguage(state
));
1490 dataHolder
.annotation
= annotation
;
1496 } else if (isStartingElement(next
, FOOTNOTE_REF
)) {
1497 FootnoteDataHolder footNote
= handleFootnoteRef(state
, reader
, next
);
1498 if (footNote
.isRef()) {
1499 footnotes
.add(footNote
);
1501 logger
.warn("Non ref footnotes not yet impelemnted");
1503 } else if (next
.isCharacters()) {
1504 text
+= next
.asCharacters().getData();
1507 handleUnexpectedElement(next
);
1508 state
.setUnsuccessfull();
1511 throw new IllegalStateException("<writer> has no end tag");
1515 protected void registerFootnotes(MarkupImportState state
, AnnotatableEntity entity
, List
<FootnoteDataHolder
> footnotes
) {
1516 for (FootnoteDataHolder footNote
: footnotes
) {
1517 registerFootnoteDemand(state
, entity
, footNote
);
1522 private void registerFootnoteDemand(MarkupImportState state
, AnnotatableEntity entity
, FootnoteDataHolder footnote
) {
1523 FootnoteDataHolder existingFootnote
= state
.getFootnote(footnote
.ref
);
1524 if (existingFootnote
!= null) {
1525 attachFootnote(state
, entity
, existingFootnote
);
1527 Set
<AnnotatableEntity
> demands
= state
.getFootnoteDemands(footnote
.ref
);
1528 if (demands
== null) {
1529 demands
= new HashSet
<>();
1530 state
.putFootnoteDemands(footnote
.ref
, demands
);
1532 demands
.add(entity
);
1537 protected void attachFootnote(MarkupImportState state
, AnnotatableEntity entity
, FootnoteDataHolder footnote
) {
1538 AnnotationType annotationType
= this.getAnnotationType(state
, MarkupTransformer
.uuidFootnote
, "Footnote", "An e-flora footnote", "fn", null);
1539 Annotation annotation
= Annotation
.NewInstance(footnote
.string
, annotationType
, getDefaultLanguage(state
));
1540 // TODO transient objects
1541 entity
.addAnnotation(annotation
);
1542 save(entity
, state
);
1546 protected void attachFigure(MarkupImportState state
, XMLEvent next
, AnnotatableEntity entity
, Media figure
) {
1547 // IdentifiableEntity<?> toSave;
1548 if (entity
.isInstanceOf(TextData
.class)) {
1549 TextData deb
= CdmBase
.deproxy(entity
, TextData
.class);
1550 deb
.addMedia(figure
);
1551 // toSave = ((TaxonDescription)deb.getInDescription()).getTaxon();
1552 } else if (entity
.isInstanceOf(SpecimenOrObservationBase
.class)) {
1553 String message
= "figures for specimen should be handled as Textdata";
1554 fireWarningEvent(message
, next
, 4);
1556 } else if (entity
.isInstanceOf(IdentifiableMediaEntity
.class)) {
1557 IdentifiableMediaEntity
<?
> ime
= CdmBase
.deproxy(entity
, IdentifiableMediaEntity
.class);
1558 ime
.addMedia(figure
);
1561 String message
= "Unsupported entity to attach media: %s";
1562 message
= String
.format(message
, entity
.getClass().getName());
1565 save(entity
, state
);
1569 protected void registerGivenFootnote(MarkupImportState state
, FootnoteDataHolder footnote
) {
1570 state
.registerFootnote(footnote
);
1571 Set
<AnnotatableEntity
> demands
= state
.getFootnoteDemands(footnote
.id
);
1572 if (demands
!= null) {
1573 for (AnnotatableEntity entity
: demands
) {
1574 attachFootnote(state
, entity
, footnote
);
1580 protected FootnoteDataHolder
handleFootnote(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
,
1581 MarkupSpecimenImport specimenImport
, MarkupNomenclatureImport nomenclatureImport
) throws XMLStreamException
{
1582 FootnoteDataHolder result
= new FootnoteDataHolder();
1583 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
1584 result
.id
= getAndRemoveAttributeValue(attributes
, ID
);
1585 // result.ref = getAndRemoveAttributeValue(attributes, REF);
1586 checkNoAttributes(attributes
, parentEvent
);
1588 while (reader
.hasNext()) {
1589 XMLEvent next
= readNoWhitespace(reader
);
1590 if (isStartingElement(next
, FOOTNOTE_STRING
)) {
1591 String string
= handleFootnoteString(state
, reader
, next
, specimenImport
, nomenclatureImport
);
1592 result
.string
= string
;
1593 } else if (isMyEndingElement(next
, parentEvent
)) {
1596 fireUnexpectedEvent(next
, 0);
1603 protected Media
handleFigure(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
,
1604 MarkupSpecimenImport specimenImport
, MarkupNomenclatureImport nomenclatureImport
) throws XMLStreamException
{
1605 // FigureDataHolder result = new FigureDataHolder();
1607 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
1608 String id
= getAndRemoveAttributeValue(attributes
, ID
);
1609 String type
= getAndRemoveAttributeValue(attributes
, TYPE
);
1610 String urlAttr
= getAndRemoveAttributeValue(attributes
, URL
);
1611 checkNoAttributes(attributes
, parentEvent
);
1613 String urlString
= null;
1614 String legendString
= null;
1615 String titleString
= null;
1616 String numString
= null;
1618 if (isNotBlank(urlAttr
)){
1619 urlString
= CdmUtils
.Nz(state
.getBaseMediaUrl()) + urlAttr
;
1621 while (reader
.hasNext()) {
1622 XMLEvent next
= readNoWhitespace(reader
);
1623 if (isMyEndingElement(next
, parentEvent
)) {
1624 if (isNotBlank(text
)){
1625 if (isNeglectableFigureText(text
)){
1626 fireWarningEvent("Text not yet handled for figures: " + text
, next
, 4);
1629 Media media
= makeFigure(state
, id
, type
, urlString
, legendString
, titleString
, numString
, next
);
1631 } else if (isStartingElement(next
, FIGURE_LEGEND
)) {
1632 // TODO same as figure string ?
1633 legendString
= handleFootnoteString(state
, reader
, next
, specimenImport
, nomenclatureImport
);
1634 } else if (isStartingElement(next
, FIGURE_TITLE
)) {
1635 titleString
= getCData(state
, reader
, next
);
1636 } else if (isStartingElement(next
, URL
)) {
1637 String localUrl
= getCData(state
, reader
, next
);
1638 String url
= CdmUtils
.Nz(state
.getBaseMediaUrl()) + localUrl
;
1639 if (isBlank(urlString
)){
1642 if (! url
.equals(urlString
)){
1643 String message
= "URL attribute and URL element differ. Attribute: %s, Element: %s";
1644 fireWarningEvent(String
.format(message
, urlString
, url
), next
, 2);
1646 } else if (isStartingElement(next
, NUM
)) {
1647 numString
= getCData(state
, reader
, next
);
1648 } else if (next
.isCharacters()) {
1649 text
= CdmUtils
.concat("", text
, next
.asCharacters().getData());
1651 fireUnexpectedEvent(next
, 0);
1654 throw new IllegalStateException("<figure> has no end tag");
1662 private boolean isNeglectableFigureText(String text
) {
1663 if (text
.matches("Fig\\.*")){
1676 * @param legendString
1677 * @param titleString
1681 private Media
makeFigure(MarkupImportState state
, String id
, String type
, String urlString
,
1682 String legendString
, String titleString
, String numString
, XMLEvent next
) {
1684 // boolean isFigure = false; //no difference between figure and media since v3.3
1686 //TODO maybe everything is a figure as it is all taken from a book
1687 if ("lineart".equals(type
)) {
1689 // media = Figure.NewInstance(url.toURI(), null, null, null);
1690 } else if (type
== null || "photo".equals(type
)
1691 || "signature".equals(type
)
1692 || "others".equals(type
)) {
1695 String message
= "Unknown figure type '%s'";
1696 message
= String
.format(message
, type
);
1697 fireWarningEvent(message
, next
, 2);
1699 media
= docImport
.getImageMedia(urlString
, docImport
.getReadMediaData());
1703 if (StringUtils
.isNotBlank(titleString
)) {
1704 media
.putTitle(getDefaultLanguage(state
), titleString
);
1707 if (StringUtils
.isNotBlank(legendString
)) {
1708 media
.putDescription(getDefaultLanguage(state
), legendString
);
1710 if (StringUtils
.isNotBlank(numString
)) {
1711 // TODO use concrete source (e.g. DAPHNIPHYLLACEAE in FM
1713 Reference citation
= state
.getConfig().getSourceReference();
1714 media
.addSource(OriginalSourceType
.Import
, numString
, "num", citation
, null);
1715 // TODO name used in source if available
1717 // TODO which citation
1718 if (StringUtils
.isNotBlank(id
)) {
1719 media
.addSource(OriginalSourceType
.Import
, id
, null, state
.getConfig().getSourceReference(), null);
1721 String message
= "Figure id should never be empty or null";
1722 fireWarningEvent(message
, next
, 6);
1727 registerGivenFigure(state
, next
, id
, media
);
1730 String message
= "No media found: ";
1731 fireWarningEvent(message
, next
, 4);
1733 } catch (MalformedURLException e
) {
1734 String message
= "Media uri has incorrect syntax: %s";
1735 message
= String
.format(message
, urlString
);
1736 fireWarningEvent(message
, next
, 4);
1737 // } catch (URISyntaxException e) {
1738 // String message = "Media uri has incorrect syntax: %s";
1739 // message = String.format(message, urlString);
1740 // fireWarningEvent(message, next, 4);
1747 private void registerGivenFigure(MarkupImportState state
, XMLEvent next
, String id
, Media figure
) {
1748 state
.registerFigure(id
, figure
);
1749 Set
<AnnotatableEntity
> demands
= state
.getFigureDemands(id
);
1750 if (demands
!= null) {
1751 for (AnnotatableEntity entity
: demands
) {
1752 attachFigure(state
, next
, entity
, figure
);
1755 save(figure
, state
);
1759 private FootnoteDataHolder
handleFootnoteRef(MarkupImportState state
,
1760 XMLEventReader reader
, XMLEvent parentEvent
)
1761 throws XMLStreamException
{
1762 FootnoteDataHolder result
= new FootnoteDataHolder();
1763 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
1764 result
.ref
= getAndRemoveAttributeValue(attributes
, REF
);
1765 checkNoAttributes(attributes
, parentEvent
);
1767 // text is not handled, needed only for debugging purposes
1769 while (reader
.hasNext()) {
1770 XMLEvent next
= readNoWhitespace(reader
);
1771 // if (isStartingElement(next, FOOTNOTE_STRING)){
1772 // String string = handleFootnoteString(state, reader, next);
1773 // result.string = string;
1775 if (isMyEndingElement(next
, parentEvent
)) {
1776 if (StringUtils
.isNotBlank(text
)){
1777 fireWarningEvent("text is not empty but not handled during import", parentEvent
, 4);
1780 } else if (next
.isCharacters() && unhandledElements
.isEmpty()) {
1781 text
+= next
.asCharacters().getData();
1782 } else if (isStartingElement(next
, NUM
)) {
1783 //ignore numbering of footnotes as they are numbered differently in the CDM
1784 handleIgnoreElement(next
);
1786 handleUnexpectedElement(next
);
1794 private String
handleFootnoteString(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, MarkupSpecimenImport specimenImport
, MarkupNomenclatureImport nomenclatureImport
) throws XMLStreamException
{
1795 boolean isTextMode
= true;
1797 while (reader
.hasNext()) {
1798 XMLEvent next
= readNoWhitespace(reader
);
1799 if (isMyEndingElement(next
, parentEvent
)) {
1801 } else if (next
.isEndElement()) {
1802 if (isEndingElement(next
, FULL_NAME
)) {
1803 popUnimplemented(next
.asEndElement());
1804 } else if (isEndingElement(next
, BR
)) {
1806 } else if (isHtml(next
)) {
1807 text
+= getXmlTag(next
);
1809 handleUnexpectedEndElement(next
.asEndElement());
1811 } else if (next
.isStartElement()) {
1812 if (isStartingElement(next
, FULL_NAME
)) {
1813 handleNotYetImplementedElement(next
);
1814 } else if (isStartingElement(next
, GATHERING
)) {
1815 text
+= specimenImport
.handleInLineGathering(state
, reader
, next
);
1816 } else if (isStartingElement(next
, REFERENCES
)) {
1817 text
+= " " + handleInLineReferences(state
, reader
, next
, nomenclatureImport
) + " ";
1818 } else if (isStartingElement(next
, BR
)) {
1821 } else if (isStartingElement(next
, NOMENCLATURE
)) {
1822 handleNotYetImplementedElement(next
);
1823 } else if (isHtml(next
)) {
1824 text
+= getXmlTag(next
);
1826 handleUnexpectedStartElement(next
.asStartElement());
1828 } else if (next
.isCharacters()) {
1830 String message
= "footnoteString is not in text mode";
1831 fireWarningEvent(message
, next
, 6);
1833 text
+= next
.asCharacters().getData().trim();
1834 // getCData(state, reader, next); does not work as we have inner tags like <references>
1837 handleUnexpectedEndElement(next
.asEndElement());
1840 throw new IllegalStateException("<footnoteString> has no closing tag");
1844 private static final List
<String
> htmlList
= Arrays
.asList("sub", "sup",
1845 "ol", "ul", "li", "i", "b", "table", "br","tr","td","th");
1847 protected boolean isHtml(XMLEvent event
) {
1848 if (event
.isStartElement()) {
1849 String tag
= event
.asStartElement().getName().getLocalPart();
1850 return htmlList
.contains(tag
);
1851 } else if (event
.isEndElement()) {
1852 String tag
= event
.asEndElement().getName().getLocalPart();
1853 return htmlList
.contains(tag
);
1861 private String
handleInLineReferences(MarkupImportState state
,XMLEventReader reader
, XMLEvent parentEvent
,
1862 MarkupNomenclatureImport nomenclatureImport
) throws XMLStreamException
{
1863 checkNoAttributes(parentEvent
);
1865 boolean hasReference
= false;
1867 while (reader
.hasNext()) {
1868 XMLEvent next
= readNoWhitespace(reader
);
1869 if (isMyEndingElement(next
, parentEvent
)) {
1870 checkMandatoryElement(hasReference
, parentEvent
.asStartElement(), REFERENCE
);
1872 } else if (isStartingElement(next
, REFERENCE
)) {
1873 text
+= handleInLineReference(state
, reader
, next
, nomenclatureImport
);
1874 hasReference
= true;
1876 handleUnexpectedElement(next
);
1879 throw new IllegalStateException("<References> has no closing tag");
1882 private String
handleInLineReference(MarkupImportState state
,XMLEventReader reader
, XMLEvent parentEvent
, MarkupNomenclatureImport nomenclatureImport
)throws XMLStreamException
{
1883 Reference reference
= nomenclatureImport
.handleReference(state
, reader
, parentEvent
);
1884 fireWarningEvent("Check correct usage of inline reference", parentEvent
, 3);
1885 IntextReference intext
= IntextReference
.NewInstance(reference
, null, 0, 0);
1886 save(reference
, state
);
1887 return intext
.toInlineString(reference
.getTitleCache());
1890 protected class SubheadingResult
{
1892 StringReferences references
;
1893 List
<IntextReference
> inlineReferences
;
1897 * Handle < string > .
1900 * @param parentEvent
1901 * @param feature only needed for distributionLocalities
1903 * @throws XMLStreamException
1905 protected Map
<String
, SubheadingResult
> handleString(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, Feature feature
)throws XMLStreamException
{
1907 String classValue
= getClassOnlyAttribute(parentEvent
, false);
1908 if (StringUtils
.isNotBlank(classValue
)) {
1909 String message
= "class attribute for <string> not yet implemented";
1910 fireWarningEvent(message
, parentEvent
, 2);
1912 boolean isHabitat
= false;
1915 Map
<String
, SubheadingResult
> subHeadingMap
= new HashMap
<>();
1916 String currentSubheading
= null;
1918 boolean isTextMode
= true;
1920 StringReferences currentReferences
= null;
1921 List
<IntextReference
> inlineReferences
= new ArrayList
<>();
1922 boolean lastWasReference
= false;
1923 while (reader
.hasNext()) {
1924 XMLEvent next
= readNoWhitespace(reader
);
1925 if (isMyEndingElement(next
, parentEvent
)) {
1926 putCurrentSubheading(subHeadingMap
, currentSubheading
, text
, currentReferences
, inlineReferences
);
1927 return subHeadingMap
;
1929 //check if last event was reference
1930 if (lastWasReference
&& !isStartingElement(next
, BR
) && !isEndingElement(next
, BR
)
1931 && !isStartingElement(next
, SUB_HEADING
)){
1932 for (LabeledReference labeledRef
: currentReferences
.content
){
1933 if (labeledRef
.ref
!= null){
1934 IntextReference intext
= IntextReference
.NewInstance(labeledRef
.ref
, null, 0, 0);
1935 inlineReferences
.add(intext
);
1936 text
+= intext
.toInlineString(labeledRef
.label
);
1938 text
+= labeledRef
.label
;
1941 lastWasReference
= false;
1943 if (isStartingElement(next
, BR
)) {
1946 } else if (isEndingElement(next
, BR
)) {
1948 } else if (isHtml(next
)) {
1949 text
+= getXmlTag(next
);
1950 } else if (isStartingElement(next
, SUB_HEADING
)) {
1951 text
= putCurrentSubheading(subHeadingMap
, currentSubheading
, text
, currentReferences
, inlineReferences
);
1952 currentReferences
= null;
1953 inlineReferences
= new ArrayList
<>();
1954 lastWasReference
= false;
1956 currentSubheading
= getCData(state
, reader
, next
).trim();
1957 } else if (isStartingElement(next
, DISTRIBUTION_LOCALITY
)) {
1958 if (feature
!= null && !feature
.equals(Feature
.DISTRIBUTION())) {
1959 String message
= "Distribution locality only allowed for feature of type 'distribution'";
1960 fireWarningEvent(message
, next
, 4);
1962 text
+= handleDistributionLocality(state
, reader
, next
);
1963 } else if (next
.isCharacters()) {
1965 String message
= "String is not in text mode";
1966 fireWarningEvent(message
, next
, 6);
1968 text
+= next
.asCharacters().getData();
1970 } else if (isStartingElement(next
, HEADING
)) {
1972 handleNotYetImplementedElement(next
);
1973 } else if (isStartingElement(next
, VERNACULAR_NAMES
)) {
1975 handleNotYetImplementedElement(next
);
1976 } else if (isStartingElement(next
, QUOTE
)) {
1978 handleNotYetImplementedElement(next
);
1979 } else if (isStartingElement(next
, DEDICATION
)) {
1981 handleNotYetImplementedElement(next
);
1982 } else if (isStartingElement(next
, TAXONTYPE
)) {
1984 handleNotYetImplementedElement(next
);
1985 } else if (isStartingElement(next
, FULL_NAME
)) {
1987 handleNotYetImplementedElement(next
);
1988 }else if (isStartingElement(next
, REFERENCES
)) {
1989 if (currentReferences
!= null){
1990 fireWarningEvent("References do already exist", next
, 2);
1992 currentReferences
= handleStringReferences(state
, reader
, next
);
1993 lastWasReference
= true;
1994 }else if (isStartingElement(next
, REFERENCE
)) {
1996 handleNotYetImplementedElement(next
);
1997 } else if (isStartingElement(next
, GATHERING
)) {
1999 handleNotYetImplementedElement(next
);
2000 } else if (isStartingElement(next
, ANNOTATION
)) {
2001 //TODO //TODO test handleSimpleAnnotation
2002 handleNotYetImplementedElement(next
);
2003 } else if (isStartingElement(next
, HABITAT
)) {
2004 text
+= featureImport
.handleHabitat(state
, reader
, next
);
2006 } else if (isStartingElement(next
, FIGURE_REF
)) {
2008 handleNotYetImplementedElement(next
);
2009 } else if (isStartingElement(next
, FIGURE
)) {
2011 handleNotYetImplementedElement(next
);
2012 } else if (isStartingElement(next
, FOOTNOTE_REF
)) {
2014 handleNotYetImplementedElement(next
);
2015 } else if (isStartingElement(next
, FOOTNOTE
)) {
2017 handleNotYetImplementedElement(next
);
2018 } else if (isStartingElement(next
, WRITER
)) {
2020 handleNotYetImplementedElement(next
);
2021 } else if (isStartingElement(next
, DATES
)) {
2023 handleNotYetImplementedElement(next
);
2024 } else if (isStartingElement(next
, TO_KEY
)) {
2025 handleNotYetImplementedElement(next
);
2027 handleUnexpectedElement(next
);
2030 throw new IllegalStateException("<String> has no closing tag");
2035 * container class more or less representing a list of labeled references
2037 protected class StringReferences
{
2039 List
<LabeledReference
> content
= new ArrayList
<>() ; //either String or LabeledReference
2041 public String
toString(){
2042 String result
= null;
2043 for (LabeledReference labRef
: content
){
2044 result
= CdmUtils
.concat("", labRef
.label
);
2048 public List
<LabeledReference
> getReferences() {
2049 List
<LabeledReference
> result
= new ArrayList
<>();
2050 for (LabeledReference labRef
: content
){
2051 if (labRef
.ref
!= null){
2059 protected class LabeledReference
{
2060 public LabeledReference(Reference ref
, String detail
, String label
) {
2061 this.ref
= ref
; this.detail
= detail
; this.label
= label
;
2063 protected Reference ref
; //if null, this LabeledReference represents only a string in between references
2064 protected String detail
; //micro reference
2065 protected String label
;
2068 private StringReferences
handleStringReferences(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
2069 checkNoAttributes(parentEvent
);
2070 StringReferences result
= new StringReferences();
2071 while (reader
.hasNext()) {
2072 XMLEvent next
= readNoWhitespace(reader
);
2073 if (isMyEndingElement(next
, parentEvent
)) {
2075 } else if (isStartingElement(next
, SUB_HEADING
)) {
2076 String subheading
= getCData(state
, reader
, next
);
2077 if (!subheading
.matches("(References?|Literature):?")){
2078 fireWarningEvent("Subheading for references not recognized: " + subheading
, next
, 4);
2080 result
.subheading
= subheading
;
2081 } else if (isStartingElement(next
, REFERENCE
)) {
2082 handleInlineReference(state
, reader
, next
, result
);
2084 handleUnexpectedElement(next
);
2087 throw new IllegalStateException("<References> has no closing tag");
2090 private void handleInlineReference(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
,
2091 StringReferences result
) throws XMLStreamException
{
2092 checkNoAttributes(parentEvent
);
2093 boolean hasRefPart
= false;
2094 Map
<String
, String
> refMap
= new HashMap
<>();
2096 while (reader
.hasNext()) {
2097 XMLEvent next
= readNoWhitespace(reader
);
2098 if (isMyEndingElement(next
, parentEvent
)) {
2099 checkMandatoryElement(hasRefPart
, parentEvent
.asStartElement(), REF_PART
);
2100 String details
= refMap
.get(DETAILS
);
2101 // String label = makeLabel(state, refMap, next);
2102 Reference ref
= createReference(state
, refMap
, next
);
2103 ref
= state
.getDeduplicationHelper(docImport
).getExistingReference(state
, ref
);
2105 String label2
= ref
.getTitleCache(); //TODO preliminary for debugging and testing
2106 result
.content
.add(new LabeledReference(ref
, details
, label
));
2108 } else if (isStartingElement(next
, REF_PART
)) {
2109 String classValue
= handleRefPart(state
, reader
, next
, refMap
);
2110 String text
= refMap
.get(classValue
);
2111 if (classValue
.equals(YEAR
)){
2112 text
= "("+text
+")";
2115 label
= CdmUtils
.concat(" ", label
, text
);
2117 handleUnexpectedElement(next
);
2120 throw new IllegalStateException("<References> has no closing tag");
2125 // this is more or less a duplicate Nomenclature import, maybe merge later
2126 private Reference
createReference(MarkupImportState state
,
2127 Map
<String
, String
> refMap
, XMLEvent parentEvent
) {
2129 Reference reference
;
2131 String type
= getAndRemoveMapKey(refMap
, PUBTYPE
);
2132 String authorStr
= getAndRemoveMapKey(refMap
, AUTHOR
);
2133 String titleStr
= getAndRemoveMapKey(refMap
, PUBTITLE
);
2134 String titleCache
= getAndRemoveMapKey(refMap
, PUBFULLNAME
);
2135 String volume
= getAndRemoveMapKey(refMap
, VOLUME
);
2136 String edition
= getAndRemoveMapKey(refMap
, EDITION
);
2137 String editors
= getAndRemoveMapKey(refMap
, EDITORS
);
2138 String year
= getAndRemoveMapKey(refMap
, YEAR
);
2139 String pubName
= getAndRemoveMapKey(refMap
, PUBNAME
);
2140 String pages
= getAndRemoveMapKey(refMap
, PAGES
);
2141 String publication
= getAndRemoveMapKey(refMap
, PUBLOCATION
);
2142 String publisher
= getAndRemoveMapKey(refMap
, PUBLISHER
);
2143 String appendix
= getAndRemoveMapKey(refMap
, APPENDIX
);
2144 String issue
= getAndRemoveMapKey(refMap
, ISSUE
);
2146 reference
= handleNonCitationSpecific(state
, type
, authorStr
, titleStr
,
2147 titleCache
, volume
, issue
, edition
, editors
, pubName
, appendix
, pages
, parentEvent
);
2150 TimePeriod timeperiod
= TimePeriodParser
.parseString(year
);
2151 if (reference
.getType().equals(ReferenceType
.BookSection
)){
2152 reference
.getInBook().setDatePublished(timeperiod
);
2154 reference
.setDatePublished(timeperiod
);
2156 //Quickfix for these 2 attributes (publication, publisher) used in feature.references
2157 Reference inRef
= reference
.getInReference() == null ? reference
: reference
.getInReference();
2159 if (isNotBlank(publisher
)){
2160 inRef
.setPublisher(publisher
);
2164 if (isNotBlank(publication
)){
2165 inRef
.setPlacePublished(publication
);
2169 String
[] unhandledList
= new String
[] { ALTERNATEPUBTITLE
, NOTES
, STATUS
};
2170 for (String unhandled
: unhandledList
) {
2171 String value
= getAndRemoveMapKey(refMap
, unhandled
);
2172 if (isNotBlank(value
)) {
2173 this.handleNotYetImplementedAttributeValue(parentEvent
, CLASS
, unhandled
);
2177 for (String key
: refMap
.keySet()) {
2178 if (!DETAILS
.equalsIgnoreCase(key
)) {
2179 this.fireUnexpectedAttributeValue(parentEvent
, CLASS
, key
);
2188 * Create reference for non nomenclatural references
2191 protected Reference
handleNonCitationSpecific(MarkupImportState state
, String type
, String authorStr
,
2192 String titleStr
, String titleCache
, String volume
, String issue
, String edition
,
2193 String editors
, String pubName
, String appendix
, String pages
, XMLEvent parentEvent
) {
2195 Reference reference
;
2198 if (isBlank(volume
) && isNotBlank(issue
)){
2199 String message
= "Issue ('"+issue
+"') exists but no volume";
2200 fireWarningEvent(message
, parentEvent
, 4);
2202 }else if (isNotBlank(issue
)){
2203 volume
= volume
+ "("+ issue
+ ")";
2206 //pubName / appendix
2207 if (isNotBlank(appendix
)){
2208 pubName
= pubName
== null ? appendix
: (pubName
+ " " + appendix
).replaceAll(" ", " ");
2211 if (isArticleNonCitation(type
, pubName
, volume
, editors
)) {
2212 IArticle article
= ReferenceFactory
.newArticle();
2213 if (pubName
!= null) {
2214 IJournal journal
= ReferenceFactory
.newJournal();
2215 journal
.setTitle(pubName
);
2216 article
.setInJournal(journal
);
2218 fireWarningEvent("Article has no journal", parentEvent
, 4);
2220 reference
= (Reference
) article
;
2222 if (isBookSection(type
, authorStr
, titleStr
, editors
, pubName
, volume
)){
2223 IBookSection bookSection
= ReferenceFactory
.newBookSection();
2224 if (pubName
!= null) {
2225 IBook book
= ReferenceFactory
.newBook();
2226 book
.setTitle(pubName
);
2227 bookSection
.setInBook(book
);
2229 reference
= (Reference
)bookSection
;
2232 Reference bookOrPartOf
= ReferenceFactory
.newGeneric();
2233 if (pubName
!= null && titleStr
!= null) {
2234 Reference inReference
= ReferenceFactory
.newGeneric();
2235 inReference
.setTitle(pubName
);
2236 bookOrPartOf
.setInReference(inReference
);
2238 reference
= bookOrPartOf
;
2243 TeamOrPersonBase
<?
> author
= createAuthor(state
, authorStr
);
2244 reference
.setAuthorship(author
);
2247 reference
.setTitle(titleStr
);
2248 if (StringUtils
.isNotBlank(titleCache
)) {
2249 reference
.setTitleCache(titleCache
, true);
2253 if(reference
.getInReference() != null){
2254 reference
.getInReference().setEdition(edition
);
2255 reference
.getInReference().setEditor(editors
);
2258 reference
.setEdition(edition
);
2259 reference
.setEditor(editors
);
2263 reference
.setVolume(volume
);
2266 reference
.setPages(pages
);
2271 private boolean isBookSection(String type
, String authorStr
, String pubTitle
,
2272 String editors
, String pubName
, String volume
) {
2273 //type not yet handled
2274 if (authorStr
!= null && editors
!= null
2275 && pubTitle
!= null && pubName
!= null){
2277 }else if (pubTitle
!= null && pubName
!= null && volume
== null){
2285 private boolean isArticleNonCitation(String type
, String pubName
, String volume
, String editors
) {
2286 if ("journal".equalsIgnoreCase(type
)){
2288 }else if (volume
!= null && editors
== null){
2289 if (pubName
!= null && IJournal
.guessIsJournalName(pubName
)){
2292 return false; //unclear
2299 protected String
handleRefPart(MarkupImportState state
, XMLEventReader reader
,
2300 XMLEvent parentEvent
, Map
<String
, String
> refMap
)
2301 throws XMLStreamException
{
2302 String classValue
= getClassOnlyAttribute(parentEvent
);
2305 while (reader
.hasNext()) {
2306 XMLEvent next
= readNoWhitespace(reader
);
2307 if (isMyEndingElement(next
, parentEvent
)) {
2308 refMap
.put(classValue
, text
);
2310 } else if (next
.isStartElement()) {
2311 if (isStartingElement(next
, ANNOTATION
)) {
2312 handleNotYetImplementedElement(next
); // TODO test handleSimpleAnnotation
2313 } else if (isStartingElement(next
, ITALICS
)) {
2314 handleNotYetImplementedElement(next
);
2315 } else if (isStartingElement(next
, BOLD
)) {
2316 handleNotYetImplementedElement(next
);
2318 handleUnexpectedStartElement(next
.asStartElement());
2320 } else if (next
.isCharacters()) {
2321 text
+= next
.asCharacters().getData();
2323 handleUnexpectedEndElement(next
.asEndElement());
2326 throw new IllegalStateException("RefPart has no closing tag");
2330 private boolean isBlankOrPunctuation(String text
) {
2334 return text
.matches("^[\\s\\.,;:]*$");
2340 *Is heading an "habitat" type heading
2342 * @return true if heading matches something like Eco(logy), Habitat(s) or Habitat & Ecology
2344 private boolean isHabitatHeading(String heading
) {
2345 return heading
.trim().matches("(Ecol(ogy)?|Habitat|Habitat\\s&\\sEcology)\\.?");
2349 private String
putCurrentSubheading(Map
<String
, SubheadingResult
> subHeadingMap
, String currentSubheading
,
2350 String text
, StringReferences fullReferences
, List
<IntextReference
> inlineReferences
) {
2351 if (isNotBlank(text
) || (fullReferences
!= null && isNotEmptyCollection(fullReferences
.content
))
2352 ||isNotEmptyCollection(inlineReferences
)) {
2353 SubheadingResult result
= new SubheadingResult();
2354 text
= removeStartingMinus(text
);
2355 result
.text
= text
.trim();
2356 result
.references
= fullReferences
== null ?
new StringReferences() : fullReferences
;
2357 result
.inlineReferences
= inlineReferences
;
2358 subHeadingMap
.put(currentSubheading
, result
);
2364 * @param references2
2367 protected boolean isNotEmptyCollection(Collection
<?
> list
) {
2368 return list
!= null && !list
.isEmpty();
2372 private String
removeStartingMinus(String string
) {
2373 string
= replaceStart(string
, "-");
2374 string
= replaceStart(string
, "\u002d");
2375 string
= replaceStart(string
, "\u2013");
2376 string
= replaceStart(string
, "\u2014");
2377 string
= replaceStart(string
, "--");
2384 * @param replacementString
2386 private String
replaceStart(String value
, String replacementString
) {
2387 if (value
.startsWith(replacementString
) ){
2388 value
= value
.substring(replacementString
.length()).trim();
2390 while (value
.startsWith("-") || value
.startsWith("\u2014") ){
2391 value
= value
.substring("-".length()).trim();
2397 private String
handleDistributionLocality(MarkupImportState state
,XMLEventReader reader
, XMLEvent parentEvent
)throws XMLStreamException
{
2398 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
2399 String classValue
= getAndRemoveRequiredAttributeValue(parentEvent
, attributes
, CLASS
);
2400 String statusValue
=getAndRemoveAttributeValue(attributes
, STATUS
);
2401 String frequencyValue
=getAndRemoveAttributeValue(attributes
, FREQUENCY
);
2403 Taxon taxon
= state
.getCurrentTaxon();
2404 // TODO which ref to take?
2405 Reference sourceReference
= state
.getConfig().getSourceReference();
2408 while (reader
.hasNext()) {
2409 XMLEvent next
= readNoWhitespace(reader
);
2410 if (isMyEndingElement(next
, parentEvent
)) {
2411 if (StringUtils
.isNotBlank(text
)) {
2412 String label
= CdmUtils
.removeTrailingDot(normalize(text
));
2413 TaxonDescription description
= getExtractedMarkupMarkedDescription(state
, taxon
, sourceReference
);
2414 NamedAreaLevel level
= makeNamedAreaLevel(state
,classValue
, next
);
2417 PresenceAbsenceTerm status
= null;
2418 if (isNotBlank(statusValue
)){
2420 status
= state
.getTransformer().getPresenceTermByKey(statusValue
);
2421 if (status
== null){
2422 UUID uuid
= state
.getTransformer().getPresenceTermUuid(statusValue
);
2424 status
= this.getPresenceAbsenceTerm(state
, uuid
, statusValue
, statusValue
, statusValue
, false, null);
2427 if (status
== null){
2429 String message
= "The presence/absence status '%s' could not be transformed to an CDM status";
2430 fireWarningEvent(String
.format(message
, statusValue
), next
, 4);
2432 } catch (UndefinedTransformerMethodException e
) {
2433 throw new RuntimeException(e
);
2436 status
= PresenceAbsenceTerm
.PRESENT();
2439 if (isNotBlank(frequencyValue
)){
2440 if (frequencyValue
.equalsIgnoreCase("absent") && PresenceAbsenceTerm
.PRESENT().equals(status
)){ //to be on the safe side that not real status has been defined yet.
2441 status
= PresenceAbsenceTerm
.ABSENT();
2443 String message
= "The frequency attribute is currently not yet available in CDM";
2444 fireWarningEvent(message
, parentEvent
, 6);
2448 NamedArea higherArea
= null;
2449 List
<NamedArea
> areas
= new ArrayList
<>();
2451 String patSingleArea
= "([^,\\(]{3,})";
2452 String patSeparator
= "(,|\\sand\\s)";
2453 String hierarchiePattern
= String
.format("%s\\((%s(%s%s)*)\\)", patSingleArea
, patSingleArea
, patSeparator
, patSingleArea
);
2454 Pattern patHierarchie
= Pattern
.compile(hierarchiePattern
, Pattern
.CASE_INSENSITIVE
);
2455 Matcher matcher
= patHierarchie
.matcher(label
);
2456 if (matcher
.matches()){
2457 String higherAreaStr
= matcher
.group(1).trim();
2458 higherArea
= makeArea(state
, higherAreaStr
, level
);
2459 String
[] innerAreas
= matcher
.group(2).split(patSeparator
);
2460 for (String innerArea
: innerAreas
){
2461 if (isNotBlank(innerArea
)){
2462 NamedArea singleArea
= makeArea(state
, innerArea
.trim(), level
);
2463 areas
.add(singleArea
);
2464 NamedArea partOf
= singleArea
.getPartOf();
2465 // if (partOf == null){
2466 // singleArea.setPartOf(higherArea);
2471 NamedArea singleArea
= makeArea(state
, label
, level
);
2472 areas
.add(singleArea
);
2475 for (NamedArea area
: areas
){
2476 //create distribution
2477 Distribution distribution
= Distribution
.NewInstance(area
,status
);
2478 distribution
.addPrimaryTaxonomicSource(sourceReference
);
2479 description
.addElement(distribution
);
2482 String message
= "Empty distribution locality";
2483 fireWarningEvent(message
, next
, 4);
2486 } else if (isStartingElement(next
, COORDINATES
)) {
2488 handleNotYetImplementedElement(next
);
2489 } else if (isEndingElement(next
, COORDINATES
)) {
2491 popUnimplemented(next
.asEndElement());
2492 } else if (next
.isCharacters()) {
2493 text
+= next
.asCharacters().getData();
2495 handleUnexpectedElement(next
);
2498 throw new IllegalStateException("<DistributionLocality> has no closing tag");
2507 protected TaxonDescription
getExtractedMarkupMarkedDescription(MarkupImportState state
, Taxon taxon
, Reference sourceReference
) {
2508 MarkerType markerType
= getMarkerType(
2510 MarkupTransformer
.uuidMarkerExtractedMarkupData
,
2511 "Extracted factual data", "Marker type for factual data imported from markup where the markup for this data was included in parent markup that was also imported including the text from this markup.",
2514 String title
= "Extracted markup data for " + taxon
.getName().getTitleCache();
2515 TaxonDescription description
= getMarkedTaxonDescription(taxon
, markerType
, false, true, sourceReference
, title
);