fix #6459 Implement deduplication for IntextReferences
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupImportBase.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.markup;
11
12 import java.net.MalformedURLException;
13 import java.util.ArrayList;
14 import java.util.Arrays;
15 import java.util.Collection;
16 import java.util.HashMap;
17 import java.util.HashSet;
18 import java.util.Iterator;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.Set;
22 import java.util.Stack;
23 import java.util.UUID;
24 import java.util.regex.Matcher;
25 import java.util.regex.Pattern;
26
27 import javax.xml.namespace.QName;
28 import javax.xml.stream.Location;
29 import javax.xml.stream.XMLEventReader;
30 import javax.xml.stream.XMLStreamConstants;
31 import javax.xml.stream.XMLStreamException;
32 import javax.xml.stream.events.Attribute;
33 import javax.xml.stream.events.Characters;
34 import javax.xml.stream.events.EndElement;
35 import javax.xml.stream.events.StartElement;
36 import javax.xml.stream.events.XMLEvent;
37
38 import org.apache.commons.lang.StringUtils;
39 import org.apache.commons.lang.WordUtils;
40 import org.apache.log4j.Logger;
41
42 import eu.etaxonomy.cdm.api.service.IClassificationService;
43 import eu.etaxonomy.cdm.api.service.ITermService;
44 import eu.etaxonomy.cdm.common.CdmUtils;
45 import eu.etaxonomy.cdm.ext.geo.GeoServiceArea;
46 import eu.etaxonomy.cdm.ext.geo.IEditGeoService;
47 import eu.etaxonomy.cdm.io.common.CdmImportBase;
48 import eu.etaxonomy.cdm.io.common.CdmImportBase.TermMatchMode;
49 import eu.etaxonomy.cdm.io.common.events.IIoEvent;
50 import eu.etaxonomy.cdm.io.common.events.IoProblemEvent;
51 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
52 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
53 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
54 import eu.etaxonomy.cdm.model.common.Annotation;
55 import eu.etaxonomy.cdm.model.common.AnnotationType;
56 import eu.etaxonomy.cdm.model.common.CdmBase;
57 import eu.etaxonomy.cdm.model.common.DefinedTerm;
58 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
59 import eu.etaxonomy.cdm.model.common.Extension;
60 import eu.etaxonomy.cdm.model.common.ExtensionType;
61 import eu.etaxonomy.cdm.model.common.IntextReference;
62 import eu.etaxonomy.cdm.model.common.Language;
63 import eu.etaxonomy.cdm.model.common.MarkerType;
64 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
65 import eu.etaxonomy.cdm.model.common.TermVocabulary;
66 import eu.etaxonomy.cdm.model.common.TimePeriod;
67 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
68 import eu.etaxonomy.cdm.model.description.Distribution;
69 import eu.etaxonomy.cdm.model.description.Feature;
70 import eu.etaxonomy.cdm.model.description.PolytomousKey;
71 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
72 import eu.etaxonomy.cdm.model.description.TaxonDescription;
73 import eu.etaxonomy.cdm.model.description.TextData;
74 import eu.etaxonomy.cdm.model.location.NamedArea;
75 import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
76 import eu.etaxonomy.cdm.model.location.NamedAreaType;
77 import eu.etaxonomy.cdm.model.media.IdentifiableMediaEntity;
78 import eu.etaxonomy.cdm.model.media.Media;
79 import eu.etaxonomy.cdm.model.name.INonViralName;
80 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
81 import eu.etaxonomy.cdm.model.name.NonViralName;
82 import eu.etaxonomy.cdm.model.name.Rank;
83 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
84 import eu.etaxonomy.cdm.model.reference.IArticle;
85 import eu.etaxonomy.cdm.model.reference.IBook;
86 import eu.etaxonomy.cdm.model.reference.IBookSection;
87 import eu.etaxonomy.cdm.model.reference.IJournal;
88 import eu.etaxonomy.cdm.model.reference.Reference;
89 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
90 import eu.etaxonomy.cdm.model.reference.ReferenceType;
91 import eu.etaxonomy.cdm.model.taxon.Classification;
92 import eu.etaxonomy.cdm.model.taxon.Taxon;
93 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
94 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
95 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
96 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
97
98 /**
99 * @author a.mueller
100 * @created 04.08.2008
101 */
102 public abstract class MarkupImportBase {
103 private static final Logger logger = Logger.getLogger(MarkupImportBase.class);
104
105 //Base
106 protected static final String ALTITUDE = "altitude";
107 protected static final String ANNOTATION = "annotation";
108 protected static final String BOLD = "bold";
109 protected static final String BR = "br";
110 protected static final String DOUBTFUL = "doubtful";
111 protected static final String CITATION = "citation";
112 protected static final String CLASS = "class";
113 protected static final String COORDINATES = "coordinates";
114 protected static final String DATES = "dates";
115 protected static final String GATHERING = "gathering";
116 protected static final String GATHERING_GROUP = "gatheringGroup";
117 protected static final String GENUS_ABBREVIATION = "genus abbreviation";
118 protected static final String FOOTNOTE = "footnote";
119 protected static final String FOOTNOTE_REF = "footnoteRef";
120 protected static final String FULL_NAME = "fullName";
121 protected static final String ITALICS = "italics";
122 protected static final String NUM = "num";
123 protected static final String NOTES = "notes";
124 protected static final String PUBLICATION = "publication";
125 protected static final String SPECIMEN_TYPE = "specimenType";
126 protected static final String STATUS = "status";
127 protected static final String SUB_HEADING = "subHeading";
128 protected static final String TYPE = "type";
129 protected static final String TYPE_STATUS = "typeStatus";
130 protected static final String UNKNOWN = "unknown";
131
132
133 protected static final boolean CREATE_NEW = true;
134 protected static final boolean NO_IMAGE_GALLERY = false;
135 protected static final boolean IMAGE_GALLERY = true;
136
137 protected static final String ADDENDA = "addenda";
138 protected static final String BIBLIOGRAPHY = "bibliography";
139 protected static final String BIOGRAPHIES = "biographies";
140 protected static final String CHAR = "char";
141 protected static final String DEDICATION = "dedication";
142 protected static final String DEFAULT_MEDIA_URL = "defaultMediaUrl";
143 protected static final String DISTRIBUTION_LIST = "distributionList";
144 protected static final String DISTRIBUTION_LOCALITY = "distributionLocality";
145 protected static final String FEATURE = "feature";
146 protected static final String FIGURE = "figure";
147 protected static final String FIGURE_LEGEND = "figureLegend";
148 protected static final String FIGURE_PART = "figurePart";
149 protected static final String FIGURE_REF = "figureRef";
150 protected static final String FIGURE_TITLE = "figureTitle";
151 protected static final String FOOTNOTE_STRING = "footnoteString";
152 protected static final String FREQUENCY = "frequency";
153 protected static final String HEADING = "heading";
154 protected static final String HABITAT = "habitat";
155 protected static final String HABITAT_LIST = "habitatList";
156 protected static final String IS_FREETEXT = "isFreetext";
157 protected static final String ID = "id";
158 protected static final String KEY = "key";
159 protected static final String LIFE_CYCLE_PERIODS = "lifeCyclePeriods";
160 protected static final String META_DATA = "metaData";
161 protected static final String MODS = "mods";
162
163 protected static final String NOMENCLATURE = "nomenclature";
164 protected static final String QUOTE = "quote";
165 protected static final String RANK = "rank";
166 protected static final String REF = "ref";
167 protected static final String REF_NUM = "refNum";
168 protected static final String REFERENCE = "reference";
169 protected static final String REFERENCES = "references";
170 protected static final String SUB_CHAR = "subChar";
171 protected static final String TAXON = "taxon";
172 protected static final String TAXONTITLE = "taxontitle";
173 protected static final String TAXONTYPE = "taxontype";
174 protected static final String TEXT_SECTION = "textSection";
175 protected static final String TREATMENT = "treatment";
176 protected static final String SERIALS_ABBREVIATIONS = "serialsAbbreviations";
177 protected static final String STRING = "string";
178 protected static final String URL = "url";
179 protected static final String WRITER = "writer";
180
181 protected static final String LOCALITY = "locality";
182
183
184
185 //Nomenclature
186 protected static final String ACCEPTED = "accepted";
187 protected static final String ACCEPTED_NAME = "acceptedName";
188 protected static final String ALTERNATEPUBTITLE = "alternatepubtitle";
189 protected static final String APPENDIX = "appendix";
190 protected static final String AUTHOR = "author";
191 protected static final String DETAILS = "details";
192 protected static final String EDITION = "edition";
193 protected static final String EDITORS = "editors";
194 protected static final String HOMONYM = "homonym";
195 protected static final String HOMOTYPES = "homotypes";
196 protected static final String NOMENCLATURAL_NOTES = "nomenclaturalNotes";
197 protected static final String INFRANK = "infrank";
198 protected static final String INFRAUT = "infraut";
199 protected static final String INFRPARAUT = "infrparaut";
200 protected static final String ISSUE = "issue";
201 protected static final String NAME_STATUS = "namestatus";
202 protected static final String NAME = "name";
203 protected static final String NAME_TYPE = "nameType";
204 protected static final String NOM = "nom";
205 protected static final String PAGES = "pages";
206 protected static final String PARAUT = "paraut";
207 protected static final String PUBFULLNAME = "pubfullname";
208 protected static final String PUBLOCATION = "publocation";
209 protected static final String PUBLISHER = "publisher";
210 protected static final String PUBNAME = "pubname";
211 protected static final String PUBTITLE = "pubtitle";
212 protected static final String PUBTYPE = "pubtype";
213 protected static final String REF_PART = "refPart";
214 protected static final String SYNONYM = "synonym";
215 protected static final String USAGE = "usage";
216 protected static final String VOLUME = "volume";
217 protected static final String YEAR = "year";
218
219
220 //keys
221 protected static final String COUPLET = "couplet";
222 protected static final String IS_SPOTCHARACTERS = "isSpotcharacters";
223 protected static final String ONLY_NUMBERED_TAXA_EXIST = "onlyNumberedTaxaExist";
224 protected static final String EXISTS = "exists";
225 protected static final String KEYNOTES = "keynotes";
226 protected static final String KEY_TITLE = "keyTitle";
227 protected static final String QUESTION = "question";
228 protected static final String TEXT = "text";
229 protected static final String TO_COUPLET = "toCouplet";
230 protected static final String TO_KEY = "toKey";
231 protected static final String TO_TAXON = "toTaxon";
232
233
234 //Feature
235 protected static final String VERNACULAR_NAMES = "vernacularNames";
236 protected static final String VERNACULAR_NAME = "vernacularName";
237 protected static final String TRANSLATION = "translation";
238 protected static final String LOCAL_LANGUAGE = "localLanguage";
239
240
241
242 protected MarkupDocumentImport docImport;
243
244 private final IEditGeoService editGeoService;
245 protected MarkupFeatureImport featureImport;
246
247 public MarkupImportBase(MarkupDocumentImport docImport) {
248 super();
249 this.docImport = docImport;
250 this.editGeoService = docImport.getEditGeoService();
251 }
252
253 private final Stack<QName> unhandledElements = new Stack<QName>();
254 private final Stack<QName> handledElements = new Stack<QName>();
255
256
257 protected <T extends CdmBase> void save(Collection<T> collection, MarkupImportState state) {
258 if (state.isCheck() || collection.isEmpty()){
259 return;
260 }
261 T example = collection.iterator().next();
262 if (example.isInstanceOf(TaxonBase.class)){
263 Collection<TaxonBase> typedCollection = (Collection<TaxonBase>)collection;
264 docImport.getTaxonService().saveOrUpdate(typedCollection);
265 }else if (example.isInstanceOf(Classification.class)){
266 Collection<Classification> typedCollection = (Collection<Classification>)collection;
267 docImport.getClassificationService().saveOrUpdate(typedCollection);
268 }else if (example.isInstanceOf(PolytomousKey.class)){
269 Collection<PolytomousKey> typedCollection = (Collection<PolytomousKey>)collection;
270 docImport.getPolytomousKeyService().saveOrUpdate(typedCollection);
271 }else if (example.isInstanceOf(DefinedTermBase.class)){
272 Collection<DefinedTermBase> typedCollection = (Collection<DefinedTermBase>)collection;
273 getTermService().saveOrUpdate(typedCollection);
274 }
275
276 }
277
278
279 //TODO move to service layer for all IdentifiableEntities
280 protected void save(CdmBase cdmBase, MarkupImportState state) {
281 if (state.isCheck()){
282 return;
283 }
284 cdmBase = CdmBase.deproxy(cdmBase, CdmBase.class);
285 if (cdmBase == null){
286 String message = "Tried to save a null object.";
287 fireWarningEvent(message, "--location ?? --", 6,1);
288 } else if (cdmBase.isInstanceOf(TaxonBase.class)){
289 docImport.getTaxonService().saveOrUpdate((TaxonBase<?>)cdmBase);
290 }else if (cdmBase.isInstanceOf(Classification.class)){
291 docImport.getClassificationService().saveOrUpdate((Classification)cdmBase);
292 }else if (cdmBase.isInstanceOf(PolytomousKey.class)){
293 docImport.getPolytomousKeyService().saveOrUpdate((PolytomousKey)cdmBase);
294 }else if (cdmBase.isInstanceOf(DefinedTermBase.class)){
295 docImport.getTermService().saveOrUpdate((DefinedTermBase<?>)cdmBase);
296 }else if (cdmBase.isInstanceOf(Media.class)){
297 docImport.getMediaService().saveOrUpdate((Media)cdmBase);
298 }else if (cdmBase.isInstanceOf(SpecimenOrObservationBase.class)){
299 docImport.getOccurrenceService().saveOrUpdate((SpecimenOrObservationBase<?>)cdmBase);
300 }else if (cdmBase.isInstanceOf(DescriptionElementBase.class)){
301 docImport.getDescriptionService().saveDescriptionElement((DescriptionElementBase)cdmBase);
302 }else if (cdmBase.isInstanceOf(Reference.class)){
303 docImport.getReferenceService().saveOrUpdate((Reference)cdmBase);
304 }else{
305 String message = "Unknown cdmBase type to save: " + cdmBase.getClass();
306 fireWarningEvent(message, "Unknown location", 8);
307 }
308 //logger.warn("Saved " + cdmBase);
309 }
310
311
312 protected ITermService getTermService() {
313 return docImport.getTermService();
314 }
315
316 protected IClassificationService getClassificationService() {
317 return docImport.getClassificationService();
318 }
319
320 //*********************** Attribute methods *************************************/
321
322 /**
323 * Returns a map for all attributes of an start element
324 * @param event
325 * @return
326 */
327 protected Map<String, Attribute> getAttributes(XMLEvent event) {
328 Map<String, Attribute> result = new HashMap<>();
329 if (!event.isStartElement()){
330 fireWarningEvent("Event is not an startElement. Can't check attributes", makeLocationStr(event.getLocation()), 1, 1);
331 return result;
332 }
333 StartElement element = event.asStartElement();
334 @SuppressWarnings("unchecked")
335 Iterator<Attribute> attributes = element.getAttributes();
336 while (attributes.hasNext()){
337 Attribute attribute = attributes.next();
338 //TODO namespaces
339 result.put(attribute.getName().getLocalPart(), attribute);
340 }
341 return result;
342 }
343
344 /**
345 * Throws an unexpected attributes event if the event has any attributes.
346 * @param event
347 */
348 protected void checkNoAttributes(Map<String, Attribute> attributes, XMLEvent event) {
349 String[] exceptions = new String[]{};
350 handleUnexpectedAttributes(event.getLocation(), attributes, 1, exceptions);
351 }
352
353
354
355 /**
356 * Throws an unexpected attributes event if the event has any attributes.
357 * @param event
358 */
359 protected void checkNoAttributes(XMLEvent event) {
360 String[] exceptions = new String[]{};
361 checkNoAttributes(event, 1, exceptions);
362 }
363
364 /**
365 * Throws an unexpected attributes event if the event has any attributes except those mentioned in "exceptions".
366 * @param event
367 * @param exceptions
368 */
369 protected void checkNoAttributes(XMLEvent event, int stackDepth, String... exceptions) {
370 if (! event.isStartElement()){
371 fireWarningEvent("Event is not an startElement. Can't check attributes", makeLocationStr(event.getLocation()), 1, 1);
372 return;
373 }
374 StartElement startElement = event.asStartElement();
375 Map<String, Attribute> attributes = getAttributes(startElement);
376 handleUnexpectedAttributes(startElement.getLocation(), attributes, stackDepth+1, exceptions);
377 }
378
379
380 /**
381 * Checks if the given attribute exists and has the given value.
382 * If yes, true is returned and the attribute is removed from the attributes map.
383 * Otherwise false is returned.
384 * @param attributes
385 * @param attrName
386 * @param value
387 * @return <code>true</code> if attribute has given value, <code>false</code> otherwise
388 */
389 protected boolean checkAndRemoveAttributeValue( Map<String, Attribute> attributes, String attrName, String value) {
390 Attribute attr = attributes.get(attrName);
391 if (attr == null ||value == null ){
392 return false;
393 }else{
394 if (value.equals(attr.getValue())){
395 attributes.remove(attrName);
396 return true;
397 }else{
398 return false;
399 }
400 }
401 }
402
403
404 /**
405 * Returns the value of a given attribute name and removes the attribute from the attributes map.
406 * Returns <code>null</code> if attribute does not exist.
407 * @param attributes the list of all attributes
408 * @param attrName the requested attribute name
409 * @return the value for the attribute
410 */
411 protected String getAndRemoveAttributeValue(Map<String, Attribute> attributes, String attrName) {
412 return getAndRemoveAttributeValue(null, attributes, attrName, false, 1);
413 }
414
415 /**
416 * Returns the value of a boolean attribute with the given name and removes the attribute from the attributes map.
417 * Returns <code>defaultValue</code> if the attribute does not exist. ALso returns <code>defaultValue</code> and throws a warning if the
418 * attribute has no boolean value (true, false).
419 * @param
420 * @param attributes the
421 * @param attrName the name of the attribute
422 * @param defaultValue the default value to return if attribute does not exist or can not be defined
423 * @return
424 */
425 protected Boolean getAndRemoveBooleanAttributeValue(XMLEvent event, Map<String, Attribute> attributes, String attrName, Boolean defaultValue) {
426 String value = getAndRemoveAttributeValue(null, attributes, attrName, false, 1);
427 Boolean result = defaultValue;
428 if (value != null){
429 if (value.equalsIgnoreCase("true")){
430 result = true;
431 }else if (value.equalsIgnoreCase("false")){
432 result = false;
433 }else{
434 String message = "Boolean attribute has no boolean value ('true', 'false') but '%s'";
435 fireWarningEvent(String.format(message, value), makeLocationStr(event.getLocation()), 6, 1);
436 }
437 }
438 return result;
439 }
440
441
442 /**
443 * Returns the value of a given attribute name and returns the attribute from the attributes map.
444 * Fires a mandatory field is missing event if the attribute does not exist.
445 * @param xmlEvent
446 * @param attributes
447 * @param attrName
448 * @return
449 */
450 protected String getAndRemoveRequiredAttributeValue(XMLEvent xmlEvent, Map<String, Attribute> attributes, String attrName) {
451 return getAndRemoveAttributeValue(xmlEvent, attributes, attrName, true, 1);
452 }
453
454 /**
455 * Returns the value of a given attribute name and returns the attribute from the attributes map.
456 * If required is <code>true</code> and the attribute does not exist a mandatory field is missing event is fired.
457 * @param xmlEvent
458 * @param attributes
459 * @param attrName
460 * @param isRequired
461 * @return
462 */
463 private String getAndRemoveAttributeValue(XMLEvent xmlEvent, Map<String, Attribute> attributes, String attrName, boolean isRequired, int stackDepth) {
464 Attribute attr = attributes.get(attrName);
465 if (attr == null ){
466 if (isRequired){
467 fireMandatoryElementIsMissing(xmlEvent, attrName, 8, stackDepth+1);
468 }
469 return null;
470 }else{
471 attributes.remove(attrName);
472 return attr.getValue();
473 }
474 }
475
476 /**
477 * Fires an not yet implemented event if the given attribute exists in attributes.
478 * @param attributes
479 * @param attrName
480 * @param event
481 */
482 protected void handleNotYetImplementedAttribute(Map<String, Attribute> attributes,
483 String attrName, XMLEvent event) {
484 Attribute attr = attributes.get(attrName);
485 if (attr != null){
486 attributes.remove(attrName);
487 QName qName = attr.getName();
488 fireNotYetImplementedAttribute(event.getLocation(), qName, attr.getValue(), 1);
489 }
490 }
491
492 /**
493 * Fires an unhandled attributes event, if attributes exist in attributes map not covered by the exceptions.
494 * No event is fired if the unhandled elements stack is not empty.
495 * @param location
496 * @param attributes
497 * @param exceptions
498 */
499 protected void handleUnexpectedAttributes(Location location,Map<String, Attribute> attributes, String... exceptions) {
500 handleUnexpectedAttributes(location, attributes, 1, exceptions);
501 }
502
503 /**
504 * see {@link #handleUnexpectedAttributes(Location, Map, String...)}
505 *
506 * @param location
507 * @param attributes
508 * @param stackDepth the stack trace depth
509 * @param exceptions
510 */
511 private void handleUnexpectedAttributes(Location location,Map<String, Attribute> attributes, int stackDepth, String... exceptions) {
512 if (attributes.size() > 0){
513 if (this.unhandledElements.size() == 0 ){
514 boolean hasUnhandledAttributes = false;
515 for (String key : attributes.keySet()){
516 boolean isException = false;
517 for (String exception : exceptions){
518 if(key.equals(exception)){
519 isException = true;
520 }
521 }
522 if (!isException){
523 hasUnhandledAttributes = true;
524 }
525 }
526 if (hasUnhandledAttributes){
527 fireUnexpectedAttributes(location, attributes, stackDepth+1);
528 }
529 }
530 }
531 }
532
533
534 private void fireUnexpectedAttributes(Location location, Map<String, Attribute> attributes, int stackDepth) {
535 String attributesString = "";
536 for (String key : attributes.keySet()){
537 Attribute attribute = attributes.get(key);
538 attributesString = CdmUtils.concat(",", attributesString, attribute.getName().getLocalPart() + ":" + attribute.getValue());
539 }
540 String message = "Unexpected attributes: %s";
541 IoProblemEvent event = makeProblemEvent(location, String.format(message, attributesString), 1 , stackDepth +1 );
542 fire(event);
543 }
544
545
546 protected void fireUnexpectedAttributeValue(XMLEvent parentEvent, String attrName, String attrValue) {
547 String message = "Unexpected attribute value %s='%s'";
548 message = String.format(message, attrName, attrValue);
549 IoProblemEvent event = makeProblemEvent(parentEvent.getLocation(), message, 1 , 1 );
550 fire(event);
551 }
552
553 protected void handleNotYetImplementedAttributeValue(XMLEvent xmlEvent, String attrName, String attrValue) {
554 String message = "Attribute %s not yet implemented for value '%s'";
555 message = String.format(message, attrName, attrValue);
556 IIoEvent event = makeProblemEvent(xmlEvent.getLocation(), message, 1, 1 );
557 fire(event);
558 }
559
560 protected void fireNotYetImplementedAttribute(Location location, QName qName,
561 String value, int stackDepth) {
562 String message = "Attribute not yet implemented: %s (%s)";
563 IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart(), value), 1, stackDepth+1 );
564 fire(event);
565 }
566
567
568 protected void fireUnexpectedEvent(XMLEvent xmlEvent, int stackDepth) {
569 Location location = xmlEvent.getLocation();
570 String message = "Unexpected event: %s";
571 IIoEvent event = makeProblemEvent(location, String.format(message, xmlEvent.toString()), 2, stackDepth +1);
572 fire(event);
573 }
574
575 protected void fireUnexpectedStartElement(Location location, StartElement startElement, int stackDepth) {
576 QName qName = startElement.getName();
577 String message = "Unexpected start element: %s";
578 IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart()), 2, stackDepth +1);
579 fire(event);
580 }
581
582
583 protected void fireUnexpectedEndElement(Location location, EndElement endElement, int stackDepth) {
584 QName qName = endElement.getName();
585 String message = "Unexpected end element: %s";
586 IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart()), 16, stackDepth+1);
587 fire(event);
588 }
589
590 protected void fireNotYetImplementedElement(Location location, QName qName, int stackDepth) {
591 String message = "Element not yet implemented: %s";
592 IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart()), 1, stackDepth+1 );
593 fire(event);
594 }
595
596 protected void fireNotYetImplementedCharacters(Location location, Characters chars, int stackDepth) {
597 String message = "Characters not yet handled: %s";
598 IIoEvent event = makeProblemEvent(location, String.format(message, chars.getData()), 1, stackDepth+1 );
599 fire(event);
600 }
601
602 /**
603 * Creates a problem event.
604 * Be aware of the right depths of the stack trace !
605 * @param location
606 * @param message
607 * @param severity
608 * @return
609 */
610 private IoProblemEvent makeProblemEvent(Location location, String message, int severity, int stackDepth) {
611 stackDepth++;
612 StackTraceElement[] stackTrace = new Exception().getStackTrace();
613 int lineNumber = stackTrace[stackDepth].getLineNumber();
614 String methodName = stackTrace[stackDepth].getMethodName();
615 String locationStr = makeLocationStr(location);
616 String className = stackTrace[stackDepth].getClassName();
617 Class<?> declaringClass;
618 try {
619 declaringClass = Class.forName(className);
620 } catch (ClassNotFoundException e) {
621 declaringClass = this.getClass();
622 }
623 IoProblemEvent event = IoProblemEvent.NewInstance(declaringClass, message,
624 locationStr, lineNumber, severity, methodName);
625 return event;
626 }
627
628 /**
629 * Creates a string from a location
630 * @param location
631 * @return
632 */
633 protected String makeLocationStr(Location location) {
634 String locationStr = location == null ? " - no location - " : "l." + location.getLineNumber() + "/c."+ location.getColumnNumber();
635 return locationStr;
636 }
637
638
639 /**
640 * Fires an unexpected element event if the unhandled elements stack is empty.
641 * Otherwise adds the element to the stack.
642 * @param event
643 */
644 protected void handleUnexpectedStartElement(XMLEvent event) {
645 handleUnexpectedStartElement(event, 1);
646 }
647
648 /**
649 * Fires an unexpected element event if the unhandled elements stack is empty.
650 * Otherwise adds the element to the stack.
651 * @param event
652 */
653 protected void handleUnexpectedStartElement(XMLEvent event, int stackDepth) {
654 QName qName = event.asStartElement().getName();
655 if (! unhandledElements.empty()){
656 unhandledElements.push(qName);
657 }else{
658 fireUnexpectedStartElement(event.getLocation(), event.asStartElement(), stackDepth + 1);
659 }
660 }
661
662
663 protected void handleUnexpectedEndElement(EndElement event) {
664 handleUnexpectedEndElement(event, 1);
665 }
666
667 /**
668 * Fires an unexpected element event if the event is not the last on the stack.
669 * Otherwise removes last stack element.
670 * @param event
671 */
672 protected void handleUnexpectedEndElement(EndElement event, int stackDepth) {
673 QName qName = event.asEndElement().getName();
674 if (!unhandledElements.isEmpty() && unhandledElements.peek().equals(qName)){
675 unhandledElements.pop();
676 }else{
677 fireUnexpectedEndElement(event.getLocation(), event.asEndElement(), stackDepth + 1);
678 }
679 }
680
681 /**
682 *
683 * @param endElement
684 */
685 protected void popUnimplemented(EndElement endElement) {
686 QName qName = endElement.asEndElement().getName();
687 if (unhandledElements.peek().equals(qName)){
688 unhandledElements.pop();
689 }else{
690 String message = "End element is not last on stack: %s";
691 message = String.format(message, qName.getLocalPart());
692 IIoEvent event = makeProblemEvent(endElement.getLocation(), message, 16, 1);
693 fire(event);
694 }
695
696 }
697
698
699 /**
700 * Fires an unexpected element event if the unhandled element stack is empty.
701 * @param event
702 */
703 protected void handleUnexpectedElement(XMLEvent event) {
704 if (event.isStartElement()){
705 handleUnexpectedStartElement(event, 2);
706 }else if (event.isEndElement()){
707 handleUnexpectedEndElement(event.asEndElement(), 2);
708 }else if (event.getEventType() == XMLStreamConstants.COMMENT){
709 //do nothing
710 }else if (! unhandledElements.empty()){
711 //do nothing
712 }else{
713 fireUnexpectedEvent(event, 1);
714 }
715 }
716
717 /**
718 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
719 * @param event
720 */
721 protected void handleNotYetImplementedCharacters(XMLEvent event) {
722 Characters chars = event.asCharacters();
723 fireNotYetImplementedCharacters(event.getLocation(), chars, 1);
724 }
725
726 /**
727 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
728 * @param event
729 */
730 protected void handleNotYetImplementedElement(XMLEvent event) {
731 QName qName = event.asStartElement().getName();
732 boolean isTopLevel = unhandledElements.isEmpty();
733 unhandledElements.push(qName);
734 if (isTopLevel){
735 fireNotYetImplementedElement(event.getLocation(), qName, 1);
736 }
737 }
738
739 /**
740 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
741 * @param event
742 */
743 protected void handleIgnoreElement(XMLEvent event) {
744 QName qName = event.asStartElement().getName();
745 unhandledElements.push(qName);
746 }
747
748 protected void handleAmbigousManually(MarkupImportState state,
749 XMLEventReader reader, StartElement startElement) {
750 QName qName = startElement.getName();
751 unhandledElements.push(qName);
752 fireWarningEvent(
753 "Handle manually: " + qName.getLocalPart() + " is ambigous and should therefore be handled manually",
754 makeLocationStr(startElement.getLocation()), 2, 2);
755 }
756
757 /**
758 * Checks if a mandatory text is not empty or null.
759 * Returns true if text is given.
760 * Fires an mandatory element is missing event otherwise and returns <code>null</code>.
761 * @param text
762 * @param parentEvent
763 * @return
764 */
765 protected boolean checkMandatoryText(String text, XMLEvent parentEvent) {
766 if (! StringUtils.isNotBlank(text)){
767 fireMandatoryElementIsMissing(parentEvent, "CData", 4, 1);
768 return false;
769 }
770 return true;
771 }
772
773 /**
774 * Fires an mandatory element is missing event if exists is <code>false</code>.
775 * @param hasMandatory
776 * @param parentEvent
777 * @param string
778 */
779 protected void checkMandatoryElement(boolean exists, StartElement parentEvent, String attrName) {
780 if (! exists){
781 fireMandatoryElementIsMissing(parentEvent, attrName, 5, 1);
782 }
783 }
784
785
786 /**
787 * Fires an element is missing event.
788 * @param xmlEvent
789 * @param string
790 * @param severity
791 * @param stackDepth
792 * @throws IllegalStateException if xmlEvent is not a StartElement and not an Attribute
793 */
794 private void fireMandatoryElementIsMissing(XMLEvent xmlEvent, String missingEventName, int severity, int stackDepth) throws IllegalStateException{
795 Location location = xmlEvent.getLocation();
796 String typeName;
797 QName qName;
798 if (xmlEvent.isAttribute()){
799 Attribute attribute = ((Attribute)xmlEvent);
800 typeName = "attribute";
801 qName = attribute.getName();
802 }else if (xmlEvent.isStartElement()){
803 typeName = "element";
804 qName = xmlEvent.asStartElement().getName();
805 }else{
806 throw new IllegalStateException("mandatory element only allowed for attributes and start tags in " + makeLocationStr(location));
807 }
808 String message = "Mandatory %s '%s' is missing in %s";
809 message = String.format(message, typeName , missingEventName, qName.getLocalPart());
810 IIoEvent event = makeProblemEvent(location, message, severity, stackDepth +1);
811 fire(event);
812 }
813
814
815
816
817 /**
818 * Returns <code>true</code> if the "next" event is the ending tag for the "parent" event.
819 * @param next end element to test, must not be null
820 * @param parentEvent start element to test
821 * @return true if the "next" event is the ending tag for the "parent" event.
822 * @throws XMLStreamException
823 */
824 protected boolean isMyEndingElement(XMLEvent next, XMLEvent parentEvent) throws XMLStreamException {
825 if (! parentEvent.isStartElement()){
826 String message = "Parent event should be start tag";
827 fireWarningEvent(message, makeLocationStr(next.getLocation()), 6);
828 return false;
829 }
830 return isEndingElement(next, parentEvent.asStartElement().getName().getLocalPart());
831 }
832
833 /**
834 * Trims the text and removes turns all whitespaces into single empty space.
835 * @param text
836 * @return
837 */
838 protected String normalize(String text) {
839 text = StringUtils.trimToEmpty(text);
840 text = text.replaceAll("\\s+", " ");
841 return text;
842 }
843
844
845
846 /**
847 * Removes whitespaces at beginning and end and makes the first letter
848 * a capital letter and all other letters small letters.
849 * @param value
850 * @return
851 */
852 protected String toFirstCapital(String value) {
853 if (StringUtils.isBlank(value)){
854 return value;
855 }else{
856 String result = "";
857 value = value.trim();
858 result += value.trim().substring(0,1).toUpperCase();
859 if (value.length()>1){
860 result += value.substring(1).toLowerCase();
861 }
862 return result;
863 }
864 }
865
866 /**
867 * Currently not used.
868 * @param str
869 * @param allowedNumberOfCharacters
870 * @param onlyFirstCapital
871 * @return
872 */
873 protected boolean isAbbreviation(String str, int allowedNumberOfCharacters, boolean onlyFirstCapital){
874 if (isBlank(str)){
875 return false;
876 }
877 str = str.trim();
878 if (! str.endsWith(".")){
879 return false;
880 }
881 str = str.substring(0, str.length() -1);
882 if (str.length() > allowedNumberOfCharacters){
883 return false;
884 }
885 final String re = "^\\p{javaUpperCase}\\p{javaLowerCase}*$";
886 if (str.matches(re)){
887 return true;
888 }else{
889 return false;
890 }
891 }
892
893 /**
894 * Checks if <code>abbrev</code> is the short form for the genus name (strGenusName).
895 * Usually this is the case if <code>abbrev</code> is the first letter (optional with ".")
896 * of strGenusName. But in older floras it may also be the first 2 or 3 letters (optional with dot).
897 * However, we allow only a maximum of 2 letters to be anambigous. In cases with 3 letters better
898 * change the original markup data.
899 * @param single
900 * @param strGenusName
901 * @return
902 */
903 protected boolean isGenusAbbrev(String abbrev, String strGenusName) {
904 if (! abbrev.matches("[A-Z][a-z]?\\.?")) {
905 return false;
906 }else if (abbrev.length() == 0 || strGenusName == null || strGenusName.length() == 0){
907 return false;
908 }else{
909 abbrev = abbrev.replace(".", "");
910 return strGenusName.startsWith(abbrev);
911 // boolean result = true;
912 // for (int i = 0 ; i < abbrev.length(); i++){
913 // result &= ( abbrev.charAt(i) == strGenusName.charAt(i));
914 // }
915 // return result;
916 }
917 }
918
919
920 /**
921 * Checks if all words in the given string start with a capital letter but do not have any further capital letter.
922 * @param word the string to be checekd. Usually should be a single word.
923 * @return true if the above is the case, false otherwise
924 */
925 protected boolean isFirstCapitalWord(String word) {
926 if (WordUtils.capitalizeFully(word).equals(word)){
927 return true;
928 }else if (WordUtils.capitalizeFully(word,new char[]{'-'}).equals(word)){
929 //for words like Le-Testui (which is a species epithet)
930 return true;
931 }else{
932 return false;
933 }
934 }
935
936
937 /**
938 * Read next event. Ignore whitespace events.
939 * @param reader
940 * @return
941 * @throws XMLStreamException
942 */
943 protected XMLEvent readNoWhitespace(XMLEventReader reader) throws XMLStreamException {
944 XMLEvent event = reader.nextEvent();
945 while (!unhandledElements.isEmpty()){
946 if (event.isStartElement()){
947 handleNotYetImplementedElement(event);
948 }else if (event.isEndElement()){
949 popUnimplemented(event.asEndElement());
950 }
951 event = reader.nextEvent();
952 }
953 while (event.isCharacters() && event.asCharacters().isWhiteSpace()){
954 event = reader.nextEvent();
955 }
956 return event;
957 }
958
959 /**
960 * Returns the REQUIRED "class" attribute for a given event and checks that it is the only attribute.
961 * @param parentEvent
962 * @return
963 */
964 protected String getClassOnlyAttribute(XMLEvent parentEvent) {
965 return getClassOnlyAttribute(parentEvent, true);
966 }
967
968
969 /**
970 * Returns the "class" attribute for a given event and checks that it is the only attribute.
971 * @param parentEvent
972 * @return
973 */
974 protected String getClassOnlyAttribute(XMLEvent parentEvent, boolean required) {
975 return getOnlyAttribute(parentEvent, CLASS, required);
976 }
977
978 /**
979 * Returns the value for the only attribute for a given event and checks that it is the only attribute.
980 * @param parentEvent
981 * @return
982 */
983 protected String getOnlyAttribute(XMLEvent parentEvent, String attrName, boolean required) {
984 Map<String, Attribute> attributes = getAttributes(parentEvent);
985 String classValue =getAndRemoveAttributeValue(parentEvent, attributes, attrName, required, 1);
986 checkNoAttributes(attributes, parentEvent);
987 return classValue;
988 }
989
990
991 protected void fireWarningEvent(String message, String locationStr, Integer severity, Integer depth) {
992 docImport.fireWarningEvent(message, locationStr, severity, depth);
993 }
994
995 protected void fireWarningEvent(String message, XMLEvent event, Integer severity) {
996 docImport.fireWarningEvent(message, makeLocationStr(event.getLocation()), severity, 1);
997 }
998
999 protected void fireSchemaConflictEventExpectedStartTag(String elName, XMLEventReader reader) throws XMLStreamException {
1000 docImport.fireSchemaConflictEventExpectedStartTag(elName, reader);
1001 }
1002
1003
1004 protected void fireWarningEvent(String message, String locationStr, int severity) {
1005 docImport.fireWarningEvent(message, locationStr, severity, 1);
1006 }
1007
1008 protected void fire(IIoEvent event) {
1009 docImport.fire(event);
1010 }
1011
1012 protected boolean isNotBlank(String str){
1013 return StringUtils.isNotBlank(str);
1014 }
1015
1016 protected boolean isBlank(String str){
1017 return StringUtils.isBlank(str);
1018 }
1019
1020 protected TaxonDescription getTaxonDescription(Taxon taxon, Reference ref, boolean isImageGallery, boolean createNewIfNotExists) {
1021 return docImport.getTaxonDescription(taxon, ref, isImageGallery, createNewIfNotExists);
1022 }
1023
1024 protected TaxonDescription getDefaultTaxonDescription(Taxon taxon, boolean isImageGallery, boolean createNewIfNotExists, Reference source) {
1025 return docImport.getDefaultTaxonDescription(taxon, isImageGallery, createNewIfNotExists, source);
1026 }
1027
1028 /**
1029 * Returns the taxon description with marked as <code>true</code> with the given marker type.
1030 * If createNewIfNotExists a new description is created if it does not yet exist.
1031 * For the new description the source and the title are set if not <code>null</code>.
1032 * @param taxon
1033 * @param markerType
1034 * @param isImageGallery
1035 * @param createNewIfNotExists
1036 * @param source
1037 * @param title
1038 * @return the existing or new taxon description
1039 */
1040 protected TaxonDescription getMarkedTaxonDescription(Taxon taxon, MarkerType markerType, boolean isImageGallery, boolean createNewIfNotExists, Reference source, String title) {
1041 return docImport.getMarkedTaxonDescription(taxon, markerType, isImageGallery, createNewIfNotExists, source, title);
1042 }
1043
1044
1045 /**
1046 * Returns the default language defined in the state. If no default language is defined in the state,
1047 * the CDM default language is returned.
1048 * @param state
1049 * @return
1050 */
1051 protected Language getDefaultLanguage(MarkupImportState state) {
1052 Language result = state.getDefaultLanguage();
1053 if (result == null){
1054 result = Language.DEFAULT();
1055 }
1056 return result;
1057 }
1058
1059
1060 //*********************** FROM XML IMPORT BASE ****************************************
1061 protected boolean isEndingElement(XMLEvent event, String elName) throws XMLStreamException {
1062 return docImport.isEndingElement(event, elName);
1063 }
1064
1065 protected boolean isStartingElement(XMLEvent event, String elName) throws XMLStreamException {
1066 return docImport.isStartingElement(event, elName);
1067 }
1068
1069
1070 protected void fillMissingEpithetsForTaxa(Taxon parentTaxon, Taxon childTaxon) {
1071 docImport.fillMissingEpithetsForTaxa(parentTaxon, childTaxon);
1072 }
1073
1074 protected Feature getFeature(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<Feature> voc){
1075 return docImport.getFeature(state, uuid, label, text, labelAbbrev, voc);
1076 }
1077
1078 protected PresenceAbsenceTerm getPresenceAbsenceTerm(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, boolean isAbsenceTerm, TermVocabulary<PresenceAbsenceTerm> voc){
1079 return docImport.getPresenceTerm(state, uuid, label, text, labelAbbrev, isAbsenceTerm, voc);
1080 }
1081
1082 protected ExtensionType getExtensionType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev){
1083 return docImport.getExtensionType(state, uuid, label, text, labelAbbrev);
1084 }
1085
1086 protected DefinedTerm getIdentifierType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<DefinedTerm> voc){
1087 return docImport.getIdentifierType(state, uuid, label, text, labelAbbrev, voc);
1088 }
1089
1090 protected AnnotationType getAnnotationType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<AnnotationType> voc){
1091 return docImport.getAnnotationType(state, uuid, label, text, labelAbbrev, voc);
1092 }
1093
1094 protected MarkerType getMarkerType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<MarkerType> voc){
1095 return docImport.getMarkerType(state, uuid, label, text, labelAbbrev, voc);
1096 }
1097
1098 protected NamedAreaLevel getNamedAreaLevel(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<NamedAreaLevel> voc){
1099 return docImport.getNamedAreaLevel(state, uuid, label, text, labelAbbrev, voc);
1100 }
1101
1102 protected NamedArea getNamedArea(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, NamedAreaType areaType, NamedAreaLevel level, TermVocabulary voc, TermMatchMode matchMode){
1103 return docImport.getNamedArea(state, uuid, label, text, labelAbbrev, areaType, level, voc, matchMode);
1104 }
1105
1106 protected Language getLanguage(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<?> voc){
1107 return docImport.getLanguage(state, uuid, label, text, labelAbbrev, voc);
1108 }
1109
1110 // *************************************** Concrete methods **********************************************/
1111
1112
1113 /**
1114 * @param state
1115 * @param classValue
1116 * @param byAbbrev
1117 * @return
1118 */
1119 protected Rank makeRank(MarkupImportState state, String value, boolean byAbbrev) {
1120 Rank rank = null;
1121 if (StringUtils.isBlank(value)) {
1122 return null;
1123 }
1124 try {
1125 boolean useUnknown = true;
1126 NomenclaturalCode nc = makeNomenclaturalCode(state);
1127 if (value.equals(GENUS_ABBREVIATION)){
1128 rank = Rank.GENUS();
1129 }else if (byAbbrev) {
1130 rank = Rank.getRankByIdInVoc(value.toLowerCase(), nc, useUnknown);
1131 if (value.equalsIgnoreCase("forma")){
1132 return Rank.FORM();
1133 }else if (value.toLowerCase().matches("(sub)?(section|genus|series|tribe)")){
1134 return Rank.getRankByEnglishName(value, nc, useUnknown);
1135 }else if (value.equals("§")){
1136 return Rank.SECTION_BOTANY(); //Special case in Flora Malesiana
1137 }
1138 } else {
1139 rank = Rank.getRankByEnglishName(value, nc, useUnknown);
1140 }
1141 if (rank.equals(Rank.UNKNOWN_RANK())) {
1142 rank = null;
1143 }
1144 if (rank == null && "sous-genre".equalsIgnoreCase(value)){
1145 rank = Rank.SUBGENUS();
1146 }
1147 } catch (UnknownCdmTypeException e) {
1148 // doNothing
1149 }
1150 return rank;
1151 }
1152
1153 NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
1154 protected TeamOrPersonBase<?> createAuthor(MarkupImportState state, String authorTitle) {
1155 TeamOrPersonBase<?> result = parser.author(authorTitle);
1156 return state.getDeduplicationHelper(docImport).getExistingAuthor(state, result);
1157 }
1158
1159 protected String getAndRemoveMapKey(Map<String, String> map, String key) {
1160 String result = map.get(key);
1161 map.remove(key);
1162 if (result != null) {
1163 result = normalize(result);
1164 }
1165 return StringUtils.stripToNull(result);
1166 }
1167
1168
1169 /**
1170 * Creates a {@link NonViralName} object depending on the defined {@link NomenclaturalCode}
1171 * and the given parameters.
1172 * @param state
1173 * @param rank
1174 * @return
1175 */
1176 protected INonViralName createNameByCode(MarkupImportState state, Rank rank) {
1177 NomenclaturalCode nc = makeNomenclaturalCode(state);
1178 INonViralName name = nc.getNewTaxonNameInstance(rank);
1179 return name;
1180 }
1181
1182 protected void handleFullName(MarkupImportState state, XMLEventReader reader,
1183 INonViralName name, XMLEvent event) throws XMLStreamException {
1184 String fullNameStr;
1185 Map<String, Attribute> attrs = getAttributes(event);
1186 String rankStr = getAndRemoveRequiredAttributeValue(event, attrs, "rank");
1187 String hybridClass = getAndRemoveAttributeValue(attrs, "hybridClass");
1188
1189 Rank rank = makeRank(state, rankStr, false);
1190 name.setRank(rank);
1191 if (rank == null) {
1192 String message = "Rank was computed as null. This must not be.";
1193 fireWarningEvent(message, event, 6);
1194 name.setRank(Rank.UNKNOWN_RANK());
1195 }
1196 if (!attrs.isEmpty()) {
1197 handleUnexpectedAttributes(event.getLocation(), attrs);
1198 }
1199 fullNameStr = getCData(state, reader, event, false);
1200 NonViralNameParserImpl.NewInstance().parseFullName(name, fullNameStr, rank, false);
1201 if (hybridClass != null ){
1202 if ("hybrid formula".equals(hybridClass)){
1203 if (!name.isHybridFormula()){
1204 fireWarningEvent("Hybrid formula is not set though requested: " + fullNameStr, event, 4);
1205 }
1206 }else if ("hybrid".equals(hybridClass)){
1207 if (!name.isHybridName()){
1208 fireWarningEvent("Hybrid name is recognized: " + fullNameStr, event, 4);
1209 }
1210 }else{
1211 handleNotYetImplementedAttributeValue(event, "hybridClass", hybridClass);
1212 }
1213 }
1214 }
1215
1216
1217 /**
1218 * Returns the {@link NomenclaturalCode} for this import. Default is {@link NomenclaturalCode#ICBN} if
1219 * no code is defined.
1220 * @param state
1221 * @return
1222 */
1223 protected NomenclaturalCode makeNomenclaturalCode(MarkupImportState state) {
1224 NomenclaturalCode nc = state.getConfig().getNomenclaturalCode();
1225 if (nc == null) {
1226 nc = NomenclaturalCode.ICNAFP; // default;
1227 }
1228 return nc;
1229 }
1230
1231
1232 /**
1233 * @param state
1234 * @param levelString
1235 * @param next
1236 * @return
1237 */
1238 protected NamedAreaLevel makeNamedAreaLevel(MarkupImportState state, String levelString, XMLEvent next) {
1239 NamedAreaLevel level;
1240 try {
1241 level = state.getTransformer().getNamedAreaLevelByKey(levelString);
1242 if (level == null) {
1243 UUID levelUuid = state.getTransformer().getNamedAreaLevelUuid(levelString);
1244 if (levelUuid == null) {
1245 String message = "Unknown distribution locality class (named area level): %s. Create new level instead.";
1246 message = String.format(message, levelString);
1247 fireWarningEvent(message, next, 6);
1248 }
1249 level = getNamedAreaLevel(state, levelUuid, levelString, levelString, levelString, null);
1250 }
1251 } catch (UndefinedTransformerMethodException e) {
1252 throw new RuntimeException(e);
1253 }
1254 return level;
1255 }
1256
1257
1258 /**
1259 * @param state
1260 * @param areaName
1261 * @param level
1262 * @return
1263 */
1264 protected NamedArea makeArea(MarkupImportState state, String areaName, NamedAreaLevel level) {
1265
1266 //TODO FM vocabulary
1267 TermVocabulary<NamedArea> voc = null;
1268 NamedAreaType areaType = null;
1269
1270 NamedArea area = null;
1271 try {
1272 area = state.getTransformer().getNamedAreaByKey(areaName);
1273 } catch (UndefinedTransformerMethodException e) {
1274 throw new RuntimeException(e);
1275 }
1276 if (area == null){
1277 boolean isNewInState = false;
1278 UUID uuid = state.getAreaUuid(areaName);
1279 if (uuid == null){
1280 isNewInState = true;
1281 try {
1282 uuid = state.getTransformer().getNamedAreaUuid(areaName);
1283 if (uuid == null){
1284 uuid = UUID.randomUUID();
1285 state.putAreaUuid(areaName, uuid);
1286 }
1287 } catch (UndefinedTransformerMethodException e) {
1288 throw new RuntimeException(e);
1289 }
1290 }
1291
1292 CdmImportBase.TermMatchMode matchMode = CdmImportBase.TermMatchMode.UUID_LABEL;
1293 area = getNamedArea(state, uuid, areaName, areaName, areaName, areaType, level, voc, matchMode);
1294 if (isNewInState){
1295 state.putAreaUuid(areaName, area.getUuid());
1296
1297 //TODO just for testing -> make generic and move to better place
1298 String geoServiceLayer="vmap0_as_bnd_political_boundary_a";
1299 String layerFieldName ="nam";
1300
1301 if ("Bangka".equals(areaName)){
1302 String areaValue = "PULAU BANGKA#SUMATERA SELATAN";
1303 GeoServiceArea geoServiceArea = new GeoServiceArea();
1304 geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1305 this.editGeoService.setMapping(area, geoServiceArea);
1306 // save(area, state);
1307 }
1308 if ("Luzon".equals(areaName)){
1309 GeoServiceArea geoServiceArea = new GeoServiceArea();
1310
1311 List<String> list = Arrays.asList("HERMANA MAYOR ISLAND#CENTRAL LUZON",
1312 "HERMANA MENOR ISLAND#CENTRAL LUZON",
1313 "CENTRAL LUZON");
1314 for (String areaValue : list){
1315 geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1316 }
1317
1318 this.editGeoService.setMapping(area, geoServiceArea);
1319 // save(area, state);
1320 }
1321 if ("Mindanao".equals(areaName)){
1322 GeoServiceArea geoServiceArea = new GeoServiceArea();
1323
1324 List<String> list = Arrays.asList("NORTHERN MINDANAO",
1325 "SOUTHERN MINDANAO",
1326 "WESTERN MINDANAO");
1327 //TODO to be continued
1328 for (String areaValue : list){
1329 geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1330 }
1331
1332 this.editGeoService.setMapping(area, geoServiceArea);
1333 // save(area, state);
1334 }
1335 if ("Palawan".equals(areaName)){
1336 GeoServiceArea geoServiceArea = new GeoServiceArea();
1337
1338 List<String> list = Arrays.asList("PALAWAN#SOUTHERN TAGALOG");
1339 for (String areaValue : list){
1340 geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1341 }
1342
1343 this.editGeoService.setMapping(area, geoServiceArea);
1344 // save(area, state);
1345 }
1346
1347 }
1348 }
1349 return area;
1350 }
1351
1352
1353
1354 /**
1355 * Reads character data. Any element other than character data or the ending
1356 * tag will fire an unexpected element event.
1357 *
1358 * @see #getCData(MarkupImportState, XMLEventReader, XMLEvent, boolean)
1359 * @param state
1360 * @param reader
1361 * @param next
1362 * @return
1363 * @throws XMLStreamException
1364 */
1365 protected String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent next) throws XMLStreamException {
1366 return getCData(state, reader, next, true);
1367 }
1368
1369 /**
1370 * Reads character data. Any element other than character data or the ending
1371 * tag will fire an unexpected element event.
1372 *
1373 * @param state
1374 * @param reader
1375 * @param next
1376 * @param inlineMarkup map for inline markup, this is used for e.g. the locality markup within a subheading
1377 * The map will be filled by the markup element name as key. The value may be a String, a CdmBase or any other object.
1378 * If null any markup text will be neglected but a warning will be fired if they exist.
1379 * @param removeInlineMarkupText if true the markedup text will be removed from the returned String
1380 * @param checkAttributes
1381 * @return
1382 * @throws XMLStreamException
1383 */
1384 protected String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent parent, /*Map<String, Object> inlineMarkup, *boolean removeInlineMarkupText,*/ boolean checkAttributes) throws XMLStreamException {
1385 if (checkAttributes){
1386 checkNoAttributes(parent);
1387 }
1388
1389 String text = "";
1390 while (reader.hasNext()) {
1391 XMLEvent next = readNoWhitespace(reader);
1392 if (isMyEndingElement(next, parent)) {
1393 return text;
1394 } else if (next.isCharacters()) {
1395 text += next.asCharacters().getData();
1396 } else if (isStartingElement(next, FOOTNOTE_REF)){
1397 handleNotYetImplementedElement(next);
1398 // } else if (isStartingElement(next, LOCALITY)){
1399 // handleCDataLocality(state, reader, parent);
1400 } else {
1401 handleUnexpectedElement(next);
1402 }
1403 }
1404 throw new IllegalStateException("Event has no closing tag");
1405
1406 }
1407
1408 // private void handleCDataLocality(MarkupImportState state, XMLEventReader reader, XMLEvent parent) {
1409 // checkAndRemoveAttributeValue(attributes, attrName, value)
1410 //
1411 // }
1412
1413
1414
1415 /**
1416 * For it returns a pure CData annotation string. This behaviour may change in future. More complex annotations
1417 * should be handled differently.
1418 * @param state
1419 * @param reader
1420 * @param parentEvent
1421 * @return
1422 * @throws XMLStreamException
1423 */
1424 protected String handleSimpleAnnotation(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1425 String annotation = getCData(state, reader, parentEvent);
1426 return annotation;
1427 }
1428
1429 /**
1430 * True if text is single "." oder "," or ";" or ":"
1431 * @param text
1432 * @return
1433 */
1434 protected boolean isPunctuation(String text) {
1435 return text == null ? false : text.trim().matches("^[\\.,;:]$");
1436 }
1437
1438
1439 /**
1440 * Text indicating that type information is following but no information about the type of the type
1441 * @param text
1442 * @return
1443 */
1444 protected boolean charIsSimpleType(String text) {
1445 return text.matches("(?i)Type:");
1446 }
1447
1448 protected String getXmlTag(XMLEvent event) {
1449 String result;
1450 if (event.isStartElement()) {
1451 result = "<" + event.asStartElement().getName().getLocalPart()
1452 + ">";
1453 } else if (event.isEndElement()) {
1454 result = "</" + event.asEndElement().getName().getLocalPart() + ">";
1455 } else {
1456 String message = "Only start or end elements are allowed as Html tags";
1457 throw new IllegalStateException(message);
1458 }
1459 return result;
1460 }
1461
1462 protected WriterDataHolder handleWriter(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1463 String text = "";
1464 checkNoAttributes(parentEvent);
1465 WriterDataHolder dataHolder = new WriterDataHolder();
1466 List<FootnoteDataHolder> footnotes = new ArrayList<>();
1467
1468 // TODO handle attributes
1469 while (reader.hasNext()) {
1470 XMLEvent next = readNoWhitespace(reader);
1471 if (isMyEndingElement(next, parentEvent)) {
1472 text = CdmUtils.removeBrackets(text);
1473 if (checkMandatoryText(text, parentEvent)) {
1474 text = normalize(text);
1475 dataHolder.writer = text;
1476 dataHolder.footnotes = footnotes;
1477
1478 // Extension
1479 UUID uuidWriterExtension = MarkupTransformer.uuidWriterExtension;
1480 ExtensionType writerExtensionType =
1481 this.getExtensionType(state, uuidWriterExtension,"Writer", "writer", "writer");
1482 Extension extension = Extension.NewInstance();
1483 extension.setType(writerExtensionType);
1484 extension.setValue(text);
1485 dataHolder.extension = extension;
1486
1487 // Annotation
1488 UUID uuidWriterAnnotation = MarkupTransformer.uuidWriterAnnotation;
1489 AnnotationType writerAnnotationType = this.getAnnotationType(state, uuidWriterAnnotation, "Writer", "writer", "writer", null);
1490 Annotation annotation = Annotation.NewInstance(text, writerAnnotationType, getDefaultLanguage(state));
1491 dataHolder.annotation = annotation;
1492
1493 return dataHolder;
1494 } else {
1495 return null;
1496 }
1497 } else if (isStartingElement(next, FOOTNOTE_REF)) {
1498 FootnoteDataHolder footNote = handleFootnoteRef(state, reader, next);
1499 if (footNote.isRef()) {
1500 footnotes.add(footNote);
1501 } else {
1502 logger.warn("Non ref footnotes not yet impelemnted");
1503 }
1504 } else if (next.isCharacters()) {
1505 text += next.asCharacters().getData();
1506
1507 } else {
1508 handleUnexpectedElement(next);
1509 state.setUnsuccessfull();
1510 }
1511 }
1512 throw new IllegalStateException("<writer> has no end tag");
1513 }
1514
1515
1516 protected void registerFootnotes(MarkupImportState state, AnnotatableEntity entity, List<FootnoteDataHolder> footnotes) {
1517 for (FootnoteDataHolder footNote : footnotes) {
1518 registerFootnoteDemand(state, entity, footNote);
1519 }
1520 }
1521
1522
1523 private void registerFootnoteDemand(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
1524 FootnoteDataHolder existingFootnote = state.getFootnote(footnote.ref);
1525 if (existingFootnote != null) {
1526 attachFootnote(state, entity, existingFootnote);
1527 } else {
1528 Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.ref);
1529 if (demands == null) {
1530 demands = new HashSet<>();
1531 state.putFootnoteDemands(footnote.ref, demands);
1532 }
1533 demands.add(entity);
1534 }
1535 }
1536
1537
1538 protected void attachFootnote(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
1539 AnnotationType annotationType = this.getAnnotationType(state, MarkupTransformer.uuidFootnote, "Footnote", "An e-flora footnote", "fn", null);
1540 Annotation annotation = Annotation.NewInstance(footnote.string, annotationType, getDefaultLanguage(state));
1541 // TODO transient objects
1542 entity.addAnnotation(annotation);
1543 save(entity, state);
1544 }
1545
1546
1547 protected void attachFigure(MarkupImportState state, XMLEvent next, AnnotatableEntity entity, Media figure) {
1548 // IdentifiableEntity<?> toSave;
1549 if (entity.isInstanceOf(TextData.class)) {
1550 TextData deb = CdmBase.deproxy(entity, TextData.class);
1551 deb.addMedia(figure);
1552 // toSave = ((TaxonDescription)deb.getInDescription()).getTaxon();
1553 } else if (entity.isInstanceOf(SpecimenOrObservationBase.class)) {
1554 String message = "figures for specimen should be handled as Textdata";
1555 fireWarningEvent(message, next, 4);
1556 // toSave = ime;
1557 } else if (entity.isInstanceOf(IdentifiableMediaEntity.class)) {
1558 IdentifiableMediaEntity<?> ime = CdmBase.deproxy(entity, IdentifiableMediaEntity.class);
1559 ime.addMedia(figure);
1560 // toSave = ime;
1561 } else {
1562 String message = "Unsupported entity to attach media: %s";
1563 message = String.format(message, entity.getClass().getName());
1564 // toSave = null;
1565 }
1566 save(entity, state);
1567 }
1568
1569
1570 protected void registerGivenFootnote(MarkupImportState state, FootnoteDataHolder footnote) {
1571 state.registerFootnote(footnote);
1572 Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.id);
1573 if (demands != null) {
1574 for (AnnotatableEntity entity : demands) {
1575 attachFootnote(state, entity, footnote);
1576 }
1577 }
1578 }
1579
1580
1581 protected FootnoteDataHolder handleFootnote(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
1582 MarkupSpecimenImport specimenImport, MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1583 FootnoteDataHolder result = new FootnoteDataHolder();
1584 Map<String, Attribute> attributes = getAttributes(parentEvent);
1585 result.id = getAndRemoveAttributeValue(attributes, ID);
1586 // result.ref = getAndRemoveAttributeValue(attributes, REF);
1587 checkNoAttributes(attributes, parentEvent);
1588
1589 while (reader.hasNext()) {
1590 XMLEvent next = readNoWhitespace(reader);
1591 if (isStartingElement(next, FOOTNOTE_STRING)) {
1592 String string = handleFootnoteString(state, reader, next, specimenImport, nomenclatureImport);
1593 result.string = string;
1594 } else if (isMyEndingElement(next, parentEvent)) {
1595 return result;
1596 } else {
1597 fireUnexpectedEvent(next, 0);
1598 }
1599 }
1600 return result;
1601 }
1602
1603
1604 protected Media handleFigure(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
1605 MarkupSpecimenImport specimenImport, MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1606 // FigureDataHolder result = new FigureDataHolder();
1607
1608 Map<String, Attribute> attributes = getAttributes(parentEvent);
1609 String id = getAndRemoveAttributeValue(attributes, ID);
1610 String type = getAndRemoveAttributeValue(attributes, TYPE);
1611 String urlAttr = getAndRemoveAttributeValue(attributes, URL);
1612 checkNoAttributes(attributes, parentEvent);
1613
1614 String urlString = null;
1615 String legendString = null;
1616 String titleString = null;
1617 String numString = null;
1618 String text = null;
1619 if (isNotBlank(urlAttr)){
1620 urlString = CdmUtils.Nz(state.getBaseMediaUrl()) + urlAttr;
1621 }
1622 while (reader.hasNext()) {
1623 XMLEvent next = readNoWhitespace(reader);
1624 if (isMyEndingElement(next, parentEvent)) {
1625 if (isNotBlank(text)){
1626 if (isNeglectableFigureText(text)){
1627 fireWarningEvent("Text not yet handled for figures: " + text, next, 4);
1628 }
1629 }
1630 Media media = makeFigure(state, id, type, urlString, legendString, titleString, numString, next);
1631 return media;
1632 } else if (isStartingElement(next, FIGURE_LEGEND)) {
1633 // TODO same as figure string ?
1634 legendString = handleFootnoteString(state, reader, next, specimenImport, nomenclatureImport);
1635 } else if (isStartingElement(next, FIGURE_TITLE)) {
1636 titleString = getCData(state, reader, next);
1637 } else if (isStartingElement(next, URL)) {
1638 String localUrl = getCData(state, reader, next);
1639 String url = CdmUtils.Nz(state.getBaseMediaUrl()) + localUrl;
1640 if (isBlank(urlString)){
1641 urlString = url;
1642 }
1643 if (! url.equals(urlString)){
1644 String message = "URL attribute and URL element differ. Attribute: %s, Element: %s";
1645 fireWarningEvent(String.format(message, urlString, url), next, 2);
1646 }
1647 } else if (isStartingElement(next, NUM)) {
1648 numString = getCData(state, reader, next);
1649 } else if (next.isCharacters()) {
1650 text = CdmUtils.concat("", text, next.asCharacters().getData());
1651 } else {
1652 fireUnexpectedEvent(next, 0);
1653 }
1654 }
1655 throw new IllegalStateException("<figure> has no end tag");
1656 }
1657
1658
1659 /**
1660 * @param text2
1661 * @return
1662 */
1663 private boolean isNeglectableFigureText(String text) {
1664 if (text.matches("Fig\\.*")){
1665 return true;
1666 }else{
1667 return false;
1668 }
1669 }
1670
1671
1672 /**
1673 * @param state
1674 * @param id
1675 * @param type
1676 * @param urlString
1677 * @param legendString
1678 * @param titleString
1679 * @param numString
1680 * @param next
1681 */
1682 private Media makeFigure(MarkupImportState state, String id, String type, String urlString,
1683 String legendString, String titleString, String numString, XMLEvent next) {
1684 Media media = null;
1685 // boolean isFigure = false; //no difference between figure and media since v3.3
1686 try {
1687 //TODO maybe everything is a figure as it is all taken from a book
1688 if ("lineart".equals(type)) {
1689 // isFigure = true;
1690 // media = Figure.NewInstance(url.toURI(), null, null, null);
1691 } else if (type == null || "photo".equals(type)
1692 || "signature".equals(type)
1693 || "others".equals(type)) {
1694 //TODO
1695 } else {
1696 String message = "Unknown figure type '%s'";
1697 message = String.format(message, type);
1698 fireWarningEvent(message, next, 2);
1699 }
1700 media = docImport.getImageMedia(urlString, docImport.getReadMediaData());
1701
1702 if (media != null){
1703 // title
1704 if (StringUtils.isNotBlank(titleString)) {
1705 media.putTitle(getDefaultLanguage(state), titleString);
1706 }
1707 // legend
1708 if (StringUtils.isNotBlank(legendString)) {
1709 media.putDescription(getDefaultLanguage(state), legendString);
1710 }
1711 if (StringUtils.isNotBlank(numString)) {
1712 // TODO use concrete source (e.g. DAPHNIPHYLLACEAE in FM
1713 // vol.13)
1714 Reference citation = state.getConfig().getSourceReference();
1715 media.addSource(OriginalSourceType.Import, numString, "num", citation, null);
1716 // TODO name used in source if available
1717 }
1718 // TODO which citation
1719 if (StringUtils.isNotBlank(id)) {
1720 media.addSource(OriginalSourceType.Import, id, null, state.getConfig().getSourceReference(), null);
1721 } else {
1722 String message = "Figure id should never be empty or null";
1723 fireWarningEvent(message, next, 6);
1724 }
1725
1726 // text
1727 // do nothing
1728 registerGivenFigure(state, next, id, media);
1729
1730 }else{
1731 String message = "No media found: ";
1732 fireWarningEvent(message, next, 4);
1733 }
1734 } catch (MalformedURLException e) {
1735 String message = "Media uri has incorrect syntax: %s";
1736 message = String.format(message, urlString);
1737 fireWarningEvent(message, next, 4);
1738 // } catch (URISyntaxException e) {
1739 // String message = "Media uri has incorrect syntax: %s";
1740 // message = String.format(message, urlString);
1741 // fireWarningEvent(message, next, 4);
1742 }
1743
1744 return media;
1745 }
1746
1747
1748 private void registerGivenFigure(MarkupImportState state, XMLEvent next, String id, Media figure) {
1749 state.registerFigure(id, figure);
1750 Set<AnnotatableEntity> demands = state.getFigureDemands(id);
1751 if (demands != null) {
1752 for (AnnotatableEntity entity : demands) {
1753 attachFigure(state, next, entity, figure);
1754 }
1755 }
1756 save(figure, state);
1757 }
1758
1759
1760 private FootnoteDataHolder handleFootnoteRef(MarkupImportState state,
1761 XMLEventReader reader, XMLEvent parentEvent)
1762 throws XMLStreamException {
1763 FootnoteDataHolder result = new FootnoteDataHolder();
1764 Map<String, Attribute> attributes = getAttributes(parentEvent);
1765 result.ref = getAndRemoveAttributeValue(attributes, REF);
1766 checkNoAttributes(attributes, parentEvent);
1767
1768 // text is not handled, needed only for debugging purposes
1769 String text = "";
1770 while (reader.hasNext()) {
1771 XMLEvent next = readNoWhitespace(reader);
1772 // if (isStartingElement(next, FOOTNOTE_STRING)){
1773 // String string = handleFootnoteString(state, reader, next);
1774 // result.string = string;
1775 // }else
1776 if (isMyEndingElement(next, parentEvent)) {
1777 if (StringUtils.isNotBlank(text)){
1778 fireWarningEvent("text is not empty but not handled during import", parentEvent, 4);
1779 }
1780 return result;
1781 } else if (next.isCharacters() && unhandledElements.isEmpty()) {
1782 text += next.asCharacters().getData();
1783 } else if (isStartingElement(next, NUM)) {
1784 //ignore numbering of footnotes as they are numbered differently in the CDM
1785 handleIgnoreElement(next);
1786 } else {
1787 handleUnexpectedElement(next);
1788 }
1789 }
1790 return result;
1791 }
1792
1793
1794
1795 private String handleFootnoteString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, MarkupSpecimenImport specimenImport, MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1796 boolean isTextMode = true;
1797 String text = "";
1798 while (reader.hasNext()) {
1799 XMLEvent next = readNoWhitespace(reader);
1800 if (isMyEndingElement(next, parentEvent)) {
1801 return text;
1802 } else if (next.isEndElement()) {
1803 if (isEndingElement(next, FULL_NAME)) {
1804 popUnimplemented(next.asEndElement());
1805 } else if (isEndingElement(next, BR)) {
1806 isTextMode = true;
1807 } else if (isHtml(next)) {
1808 text += getXmlTag(next);
1809 } else {
1810 handleUnexpectedEndElement(next.asEndElement());
1811 }
1812 } else if (next.isStartElement()) {
1813 if (isStartingElement(next, FULL_NAME)) {
1814 handleNotYetImplementedElement(next);
1815 } else if (isStartingElement(next, GATHERING)) {
1816 text += specimenImport.handleInLineGathering(state, reader, next);
1817 } else if (isStartingElement(next, REFERENCES)) {
1818 text += " " + handleInLineReferences(state, reader, next, nomenclatureImport) + " ";
1819 } else if (isStartingElement(next, BR)) {
1820 text += "<br/>";
1821 isTextMode = false;
1822 } else if (isStartingElement(next, NOMENCLATURE)) {
1823 handleNotYetImplementedElement(next);
1824 } else if (isHtml(next)) {
1825 text += getXmlTag(next);
1826 } else {
1827 handleUnexpectedStartElement(next.asStartElement());
1828 }
1829 } else if (next.isCharacters()) {
1830 if (!isTextMode) {
1831 String message = "footnoteString is not in text mode";
1832 fireWarningEvent(message, next, 6);
1833 } else {
1834 text += next.asCharacters().getData().trim();
1835 // getCData(state, reader, next); does not work as we have inner tags like <references>
1836 }
1837 } else {
1838 handleUnexpectedEndElement(next.asEndElement());
1839 }
1840 }
1841 throw new IllegalStateException("<footnoteString> has no closing tag");
1842
1843 }
1844
1845 private static final List<String> htmlList = Arrays.asList("sub", "sup",
1846 "ol", "ul", "li", "i", "b", "table", "br","tr","td","th");
1847
1848 protected boolean isHtml(XMLEvent event) {
1849 if (event.isStartElement()) {
1850 String tag = event.asStartElement().getName().getLocalPart();
1851 return htmlList.contains(tag);
1852 } else if (event.isEndElement()) {
1853 String tag = event.asEndElement().getName().getLocalPart();
1854 return htmlList.contains(tag);
1855 } else {
1856 return false;
1857 }
1858
1859 }
1860
1861
1862 private String handleInLineReferences(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent,
1863 MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1864 checkNoAttributes(parentEvent);
1865
1866 boolean hasReference = false;
1867 String text = "";
1868 while (reader.hasNext()) {
1869 XMLEvent next = readNoWhitespace(reader);
1870 if (isMyEndingElement(next, parentEvent)) {
1871 checkMandatoryElement(hasReference, parentEvent.asStartElement(), REFERENCE);
1872 return text;
1873 } else if (isStartingElement(next, REFERENCE)) {
1874 text += handleInLineReference(state, reader, next, nomenclatureImport);
1875 hasReference = true;
1876 } else {
1877 handleUnexpectedElement(next);
1878 }
1879 }
1880 throw new IllegalStateException("<References> has no closing tag");
1881 }
1882
1883 private String handleInLineReference(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent, MarkupNomenclatureImport nomenclatureImport)throws XMLStreamException {
1884 Reference reference = nomenclatureImport.handleReference(state, reader, parentEvent);
1885 fireWarningEvent("Check correct usage of inline reference", parentEvent, 3);
1886 IntextReference intext = IntextReference.NewInstance(reference, null, 0, 0);
1887 save(reference, state);
1888 return intext.toInlineString(reference.getTitleCache());
1889 }
1890
1891 protected class SubheadingResult{
1892 String text;
1893 StringReferences references;
1894 List<IntextReference> inlineReferences;
1895 }
1896
1897 /**
1898 * Handle < string > .
1899 * @param state
1900 * @param reader
1901 * @param parentEvent
1902 * @param feature only needed for distributionLocalities
1903 * @return
1904 * @throws XMLStreamException
1905 */
1906 protected Map<String, SubheadingResult> handleString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature feature)throws XMLStreamException {
1907 // attributes
1908 String classValue = getClassOnlyAttribute(parentEvent, false);
1909 if (StringUtils.isNotBlank(classValue)) {
1910 String message = "class attribute for <string> not yet implemented";
1911 fireWarningEvent(message, parentEvent, 2);
1912 }
1913 boolean isHabitat = false;
1914
1915 // subheadings
1916 Map<String, SubheadingResult> subHeadingMap = new HashMap<>();
1917 String currentSubheading = null;
1918
1919 boolean isTextMode = true;
1920 String text = "";
1921 StringReferences currentReferences = null;
1922 List<IntextReference> inlineReferences = new ArrayList<>();
1923 boolean lastWasReference = false;
1924 while (reader.hasNext()) {
1925 XMLEvent next = readNoWhitespace(reader);
1926 if (isMyEndingElement(next, parentEvent)) {
1927 putCurrentSubheading(subHeadingMap, currentSubheading, text, currentReferences, inlineReferences);
1928 return subHeadingMap;
1929 }
1930 //check if last event was reference
1931 if (lastWasReference && !isStartingElement(next, BR) && !isEndingElement(next, BR)
1932 && !isStartingElement(next, SUB_HEADING)){
1933 for (LabeledReference labeledRef : currentReferences.content){
1934 if (labeledRef.ref != null){
1935 IntextReference intext = IntextReference.NewInstance(labeledRef.ref, null, 0, 0);
1936 inlineReferences.add(intext);
1937 text += intext.toInlineString(labeledRef.label);
1938 }else{
1939 text += labeledRef.label;
1940 }
1941 }
1942 lastWasReference = false;
1943 }
1944 if (isStartingElement(next, BR)) {
1945 text += "<br/>";
1946 isTextMode = false;
1947 } else if (isEndingElement(next, BR)) {
1948 isTextMode = true;
1949 } else if (isHtml(next)) {
1950 text += getXmlTag(next);
1951 } else if (isStartingElement(next, SUB_HEADING)) {
1952 text = putCurrentSubheading(subHeadingMap, currentSubheading, text, currentReferences, inlineReferences);
1953 currentReferences = null;
1954 inlineReferences = new ArrayList<>();
1955 lastWasReference = false;
1956 // TODO footnotes
1957 currentSubheading = getCData(state, reader, next).trim();
1958 } else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
1959 if (feature != null && !feature.equals(Feature.DISTRIBUTION())) {
1960 String message = "Distribution locality only allowed for feature of type 'distribution'";
1961 fireWarningEvent(message, next, 4);
1962 }
1963 text += handleDistributionLocality(state, reader, next);
1964 } else if (next.isCharacters()) {
1965 if (! isTextMode) {
1966 String message = "String is not in text mode";
1967 fireWarningEvent(message, next, 6);
1968 } else {
1969 text += next.asCharacters().getData();
1970 }
1971 } else if (isStartingElement(next, HEADING)) {
1972 //TODO
1973 handleNotYetImplementedElement(next);
1974 } else if (isStartingElement(next, VERNACULAR_NAMES)) {
1975 //TODO
1976 handleNotYetImplementedElement(next);
1977 } else if (isStartingElement(next, QUOTE)) {
1978 //TODO
1979 handleNotYetImplementedElement(next);
1980 } else if (isStartingElement(next, DEDICATION)) {
1981 //TODO
1982 handleNotYetImplementedElement(next);
1983 } else if (isStartingElement(next, TAXONTYPE)) {
1984 //TODO
1985 handleNotYetImplementedElement(next);
1986 } else if (isStartingElement(next, FULL_NAME)) {
1987 //TODO
1988 handleNotYetImplementedElement(next);
1989 }else if (isStartingElement(next, REFERENCES)) {
1990 if (currentReferences != null){
1991 fireWarningEvent("References do already exist", next, 2);
1992 }
1993 currentReferences = handleStringReferences(state, reader, next);
1994 lastWasReference = true;
1995 }else if (isStartingElement(next, REFERENCE)) {
1996 //TODO
1997 handleNotYetImplementedElement(next);
1998 } else if (isStartingElement(next, GATHERING)) {
1999 //TODO
2000 handleNotYetImplementedElement(next);
2001 } else if (isStartingElement(next, ANNOTATION)) {
2002 //TODO //TODO test handleSimpleAnnotation
2003 handleNotYetImplementedElement(next);
2004 } else if (isStartingElement(next, HABITAT)) {
2005 text += featureImport.handleHabitat(state, reader, next);
2006 isHabitat = true;
2007 } else if (isStartingElement(next, FIGURE_REF)) {
2008 //TODO
2009 handleNotYetImplementedElement(next);
2010 } else if (isStartingElement(next, FIGURE)) {
2011 //TODO
2012 handleNotYetImplementedElement(next);
2013 } else if (isStartingElement(next, FOOTNOTE_REF)) {
2014 //TODO
2015 handleNotYetImplementedElement(next);
2016 } else if (isStartingElement(next, FOOTNOTE)) {
2017 //TODO
2018 handleNotYetImplementedElement(next);
2019 } else if (isStartingElement(next, WRITER)) {
2020 //TODO
2021 handleNotYetImplementedElement(next);
2022 } else if (isStartingElement(next, DATES)) {
2023 //TODO
2024 handleNotYetImplementedElement(next);
2025 } else if (isStartingElement(next, TO_KEY)) {
2026 handleNotYetImplementedElement(next);
2027 } else {
2028 handleUnexpectedElement(next);
2029 }
2030 }
2031 throw new IllegalStateException("<String> has no closing tag");
2032 }
2033
2034
2035 /**
2036 * container class more or less representing a list of labeled references
2037 */
2038 protected class StringReferences{
2039 String subheading;
2040 List<LabeledReference> content = new ArrayList<>() ; //either String or LabeledReference
2041 @Override
2042 public String toString(){
2043 String result = null;
2044 for (LabeledReference labRef : content){
2045 result = CdmUtils.concat("", labRef.label);
2046 }
2047 return result;
2048 }
2049 public List<LabeledReference> getReferences() {
2050 List<LabeledReference> result = new ArrayList<>();
2051 for (LabeledReference labRef : content){
2052 if (labRef.ref != null){
2053 result.add(labRef);
2054 }
2055 }
2056 return result;
2057 }
2058 }
2059
2060 protected class LabeledReference{
2061 public LabeledReference(Reference ref, String detail, String label) {
2062 this.ref = ref; this.detail = detail; this.label = label;
2063 }
2064 protected Reference ref; //if null, this LabeledReference represents only a string in between references
2065 protected String detail; //micro reference
2066 protected String label;
2067 }
2068
2069 private StringReferences handleStringReferences(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
2070 checkNoAttributes(parentEvent);
2071 StringReferences result = new StringReferences();
2072 while (reader.hasNext()) {
2073 XMLEvent next = readNoWhitespace(reader);
2074 if (isMyEndingElement(next, parentEvent)) {
2075 return result;
2076 } else if (isStartingElement(next, SUB_HEADING)) {
2077 String subheading = getCData(state, reader, next);
2078 if (!subheading.matches("(References?|Literature):?")){
2079 fireWarningEvent("Subheading for references not recognized: " + subheading, next, 4);
2080 }
2081 result.subheading = subheading;
2082 } else if (isStartingElement(next, REFERENCE)) {
2083 handleInlineReference(state, reader, next, result);
2084 } else {
2085 handleUnexpectedElement(next);
2086 }
2087 }
2088 throw new IllegalStateException("<References> has no closing tag");
2089 }
2090
2091 private void handleInlineReference(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
2092 StringReferences result) throws XMLStreamException {
2093 checkNoAttributes(parentEvent);
2094 boolean hasRefPart = false;
2095 Map<String, String> refMap = new HashMap<>();
2096 String label = "";
2097 while (reader.hasNext()) {
2098 XMLEvent next = readNoWhitespace(reader);
2099 if (isMyEndingElement(next, parentEvent)) {
2100 checkMandatoryElement(hasRefPart, parentEvent.asStartElement(), REF_PART);
2101 String details = refMap.get(DETAILS);
2102 // String label = makeLabel(state, refMap, next);
2103 Reference ref = createReference(state, refMap, next);
2104 ref = state.getDeduplicationHelper(docImport).getExistingReference(state, ref);
2105
2106 String label2 = ref.getTitleCache(); //TODO preliminary for debugging and testing
2107 result.content.add(new LabeledReference(ref, details, label));
2108 return;
2109 } else if (isStartingElement(next, REF_PART)) {
2110 String classValue = handleRefPart(state, reader, next, refMap);
2111 String text = refMap.get(classValue);
2112 if (classValue.equals(YEAR)){
2113 text = "("+text+")";
2114 }
2115 hasRefPart = true;
2116 label = CdmUtils.concat(" ", label, text);
2117 } else {
2118 handleUnexpectedElement(next);
2119 }
2120 }
2121 throw new IllegalStateException("<References> has no closing tag");
2122
2123 }
2124
2125
2126 // this is more or less a duplicate Nomenclature import, maybe merge later
2127 private Reference createReference(MarkupImportState state,
2128 Map<String, String> refMap, XMLEvent parentEvent) {
2129
2130 Reference reference;
2131
2132 String type = getAndRemoveMapKey(refMap, PUBTYPE);
2133 String authorStr = getAndRemoveMapKey(refMap, AUTHOR);
2134 String titleStr = getAndRemoveMapKey(refMap, PUBTITLE);
2135 String titleCache = getAndRemoveMapKey(refMap, PUBFULLNAME);
2136 String volume = getAndRemoveMapKey(refMap, VOLUME);
2137 String edition = getAndRemoveMapKey(refMap, EDITION);
2138 String editors = getAndRemoveMapKey(refMap, EDITORS);
2139 String year = getAndRemoveMapKey(refMap, YEAR);
2140 String pubName = getAndRemoveMapKey(refMap, PUBNAME);
2141 String pages = getAndRemoveMapKey(refMap, PAGES);
2142 String publication = getAndRemoveMapKey(refMap, PUBLOCATION);
2143 String publisher = getAndRemoveMapKey(refMap, PUBLISHER);
2144 String appendix = getAndRemoveMapKey(refMap, APPENDIX);
2145 String issue = getAndRemoveMapKey(refMap, ISSUE);
2146
2147 reference = handleNonCitationSpecific(state, type, authorStr, titleStr,
2148 titleCache, volume, issue, edition, editors, pubName, appendix, pages, parentEvent);
2149
2150 //year
2151 TimePeriod timeperiod = TimePeriodParser.parseString(year);
2152 if (reference.getType().equals(ReferenceType.BookSection)){
2153 reference.getInBook().setDatePublished(timeperiod);
2154 }
2155 reference.setDatePublished(timeperiod);
2156
2157 //Quickfix for these 2 attributes (publication, publisher) used in feature.references
2158 Reference inRef = reference.getInReference() == null ? reference : reference.getInReference();
2159 //publication
2160 if (isNotBlank(publisher)){
2161 inRef.setPublisher(publisher);
2162 }
2163
2164 //publisher
2165 if (isNotBlank(publication)){
2166 inRef.setPlacePublished(publication);
2167 }
2168
2169 // TODO
2170 String[] unhandledList = new String[] { ALTERNATEPUBTITLE, NOTES, STATUS };
2171 for (String unhandled : unhandledList) {
2172 String value = getAndRemoveMapKey(refMap, unhandled);
2173 if (isNotBlank(value)) {
2174 this.handleNotYetImplementedAttributeValue(parentEvent, CLASS, unhandled);
2175 }
2176 }
2177
2178 for (String key : refMap.keySet()) {
2179 if (!DETAILS.equalsIgnoreCase(key)) {
2180 this.fireUnexpectedAttributeValue(parentEvent, CLASS, key);
2181 }
2182 }
2183
2184 return reference;
2185 }
2186
2187
2188 /**
2189 * Create reference for non nomenclatural references
2190 * @return
2191 */
2192 protected Reference handleNonCitationSpecific(MarkupImportState state, String type, String authorStr,
2193 String titleStr, String titleCache, String volume, String issue, String edition,
2194 String editors, String pubName, String appendix, String pages, XMLEvent parentEvent) {
2195
2196 Reference reference;
2197
2198 //volume / issue
2199 if (isBlank(volume) && isNotBlank(issue)){
2200 String message = "Issue ('"+issue+"') exists but no volume";
2201 fireWarningEvent(message, parentEvent, 4);
2202 volume = issue;
2203 }else if (isNotBlank(issue)){
2204 volume = volume + "("+ issue + ")";
2205 }
2206
2207 //pubName / appendix
2208 if (isNotBlank(appendix)){
2209 pubName = pubName == null ? appendix : (pubName + " " + appendix).replaceAll(" ", " ");
2210 }
2211
2212 if (isArticleNonCitation(type, pubName, volume, editors)) {
2213 IArticle article = ReferenceFactory.newArticle();
2214 if (pubName != null) {
2215 IJournal journal = ReferenceFactory.newJournal();
2216 journal.setTitle(pubName);
2217 article.setInJournal(journal);
2218 }else{
2219 fireWarningEvent("Article has no journal", parentEvent, 4);
2220 }
2221 reference = (Reference) article;
2222 } else {
2223 if (isBookSection(type, authorStr, titleStr, editors, pubName, volume)){
2224 IBookSection bookSection = ReferenceFactory.newBookSection();
2225 if (pubName != null) {
2226 IBook book = ReferenceFactory.newBook();
2227 book.setTitle(pubName);
2228 bookSection.setInBook(book);
2229 }
2230 reference = (Reference)bookSection;
2231 }else{
2232 //??
2233 Reference bookOrPartOf = ReferenceFactory.newGeneric();
2234 if (pubName != null && titleStr != null) {
2235 Reference inReference = ReferenceFactory.newGeneric();
2236 inReference.setTitle(pubName);
2237 bookOrPartOf.setInReference(inReference);
2238 }
2239 reference = bookOrPartOf;
2240 }
2241 }
2242
2243 //author
2244 TeamOrPersonBase<?> author = createAuthor(state, authorStr);
2245 reference.setAuthorship(author);
2246
2247 //title
2248 reference.setTitle(titleStr);
2249 if (StringUtils.isNotBlank(titleCache)) {
2250 reference.setTitleCache(titleCache, true);
2251 }
2252
2253 //edition
2254 if(reference.getInReference() != null){
2255 reference.getInReference().setEdition(edition);
2256 reference.getInReference().setEditor(editors);
2257 }else{
2258 //edition
2259 reference.setEdition(edition);
2260 reference.setEditor(editors);
2261 }
2262
2263 //volume
2264 reference.setVolume(volume);
2265
2266 //pages
2267 reference.setPages(pages);
2268
2269 return reference;
2270 }
2271
2272 private boolean isBookSection(String type, String authorStr, String pubTitle,
2273 String editors, String pubName, String volume) {
2274 //type not yet handled
2275 if (authorStr != null && editors != null
2276 && pubTitle != null && pubName != null){
2277 return true;
2278 }else if (pubTitle != null && pubName != null && volume == null){
2279 return true;
2280 }else{
2281 return false;
2282 }
2283 }
2284
2285
2286 private boolean isArticleNonCitation(String type, String pubName, String volume, String editors) {
2287 if ("journal".equalsIgnoreCase(type)){
2288 return true;
2289 }else if (volume != null && editors == null){
2290 if (pubName != null && IJournal.guessIsJournalName(pubName)){
2291 return true;
2292 }else{
2293 return false; //unclear
2294 }
2295 }else{
2296 return false;
2297 }
2298 }
2299
2300 protected String handleRefPart(MarkupImportState state, XMLEventReader reader,
2301 XMLEvent parentEvent, Map<String, String> refMap)
2302 throws XMLStreamException {
2303 String classValue = getClassOnlyAttribute(parentEvent);
2304
2305 String text = "";
2306 while (reader.hasNext()) {
2307 XMLEvent next = readNoWhitespace(reader);
2308 if (isMyEndingElement(next, parentEvent)) {
2309 refMap.put(classValue, text);
2310 return classValue;
2311 } else if (next.isStartElement()) {
2312 if (isStartingElement(next, ANNOTATION)) {
2313 handleNotYetImplementedElement(next); // TODO test handleSimpleAnnotation
2314 } else if (isStartingElement(next, ITALICS)) {
2315 handleNotYetImplementedElement(next);
2316 } else if (isStartingElement(next, BOLD)) {
2317 handleNotYetImplementedElement(next);
2318 } else {
2319 handleUnexpectedStartElement(next.asStartElement());
2320 }
2321 } else if (next.isCharacters()) {
2322 text += next.asCharacters().getData();
2323 } else {
2324 handleUnexpectedEndElement(next.asEndElement());
2325 }
2326 }
2327 throw new IllegalStateException("RefPart has no closing tag");
2328 }
2329
2330
2331 private boolean isBlankOrPunctuation(String text) {
2332 if (text == null){
2333 return true;
2334 } else {
2335 return text.matches("^[\\s\\.,;:]*$");
2336 }
2337 }
2338
2339
2340 /**
2341 *Is heading an "habitat" type heading
2342 * @param heading
2343 * @return true if heading matches something like Eco(logy), Habitat(s) or Habitat & Ecology
2344 */
2345 private boolean isHabitatHeading(String heading) {
2346 return heading.trim().matches("(Ecol(ogy)?|Habitat|Habitat\\s&\\sEcology)\\.?");
2347 }
2348
2349
2350 private String putCurrentSubheading(Map<String, SubheadingResult> subHeadingMap, String currentSubheading,
2351 String text, StringReferences fullReferences, List<IntextReference> inlineReferences) {
2352 if (isNotBlank(text) || (fullReferences != null && isNotEmptyCollection(fullReferences.content))
2353 ||isNotEmptyCollection(inlineReferences)) {
2354 SubheadingResult result = new SubheadingResult();
2355 text = removeStartingMinus(text);
2356 result.text = text.trim();
2357 result.references = fullReferences == null ? new StringReferences() : fullReferences;
2358 result.inlineReferences = inlineReferences;
2359 subHeadingMap.put(currentSubheading, result);
2360 }
2361 return "";
2362 }
2363
2364 /**
2365 * @param references2
2366 * @return
2367 */
2368 protected boolean isNotEmptyCollection(Collection<?> list) {
2369 return list != null && !list.isEmpty();
2370 }
2371
2372
2373 private String removeStartingMinus(String string) {
2374 string = replaceStart(string, "-");
2375 string = replaceStart(string, "\u002d");
2376 string = replaceStart(string, "\u2013");
2377 string = replaceStart(string, "\u2014");
2378 string = replaceStart(string, "--");
2379 return string;
2380 }
2381
2382
2383 /**
2384 * @param value
2385 * @param replacementString
2386 */
2387 private String replaceStart(String value, String replacementString) {
2388 if (value.startsWith(replacementString) ){
2389 value = value.substring(replacementString.length()).trim();
2390 }
2391 while (value.startsWith("-") || value.startsWith("\u2014") ){
2392 value = value.substring("-".length()).trim();
2393 }
2394 return value;
2395 }
2396
2397
2398 private String handleDistributionLocality(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
2399 Map<String, Attribute> attributes = getAttributes(parentEvent);
2400 String classValue = getAndRemoveRequiredAttributeValue(parentEvent, attributes, CLASS);
2401 String statusValue =getAndRemoveAttributeValue(attributes, STATUS);
2402 String frequencyValue =getAndRemoveAttributeValue(attributes, FREQUENCY);
2403
2404 Taxon taxon = state.getCurrentTaxon();
2405 // TODO which ref to take?
2406 Reference sourceReference = state.getConfig().getSourceReference();
2407
2408 String text = "";
2409 while (reader.hasNext()) {
2410 XMLEvent next = readNoWhitespace(reader);
2411 if (isMyEndingElement(next, parentEvent)) {
2412 if (StringUtils.isNotBlank(text)) {
2413 String label = CdmUtils.removeTrailingDot(normalize(text));
2414 TaxonDescription description = getExtractedMarkupMarkedDescription(state, taxon, sourceReference);
2415 NamedAreaLevel level = makeNamedAreaLevel(state,classValue, next);
2416
2417 //status
2418 PresenceAbsenceTerm status = null;
2419 if (isNotBlank(statusValue)){
2420 try {
2421 status = state.getTransformer().getPresenceTermByKey(statusValue);
2422 if (status == null){
2423 UUID uuid = state.getTransformer().getPresenceTermUuid(statusValue);
2424 if (uuid != null){
2425 status = this.getPresenceAbsenceTerm(state, uuid, statusValue, statusValue, statusValue, false, null);
2426 }
2427 }
2428 if (status == null){
2429 //TODO
2430 String message = "The presence/absence status '%s' could not be transformed to an CDM status";
2431 fireWarningEvent(String.format(message, statusValue), next, 4);
2432 }
2433 } catch (UndefinedTransformerMethodException e) {
2434 throw new RuntimeException(e);
2435 }
2436 }else{
2437 status = PresenceAbsenceTerm.PRESENT();
2438 }
2439 //frequency
2440 if (isNotBlank(frequencyValue)){
2441 if (frequencyValue.equalsIgnoreCase("absent") && PresenceAbsenceTerm.PRESENT().equals(status)){ //to be on the safe side that not real status has been defined yet.
2442 status = PresenceAbsenceTerm.ABSENT();
2443 }else{
2444 String message = "The frequency attribute is currently not yet available in CDM";
2445 fireWarningEvent(message, parentEvent, 6);
2446 }
2447 }
2448
2449 NamedArea higherArea = null;
2450 List<NamedArea> areas = new ArrayList<>();
2451
2452 String patSingleArea = "([^,\\(]{3,})";
2453 String patSeparator = "(,|\\sand\\s)";
2454 String hierarchiePattern = String.format("%s\\((%s(%s%s)*)\\)", patSingleArea, patSingleArea, patSeparator, patSingleArea);
2455 Pattern patHierarchie = Pattern.compile(hierarchiePattern, Pattern.CASE_INSENSITIVE);
2456 Matcher matcher = patHierarchie.matcher(label);
2457 if (matcher.matches()){
2458 String higherAreaStr = matcher.group(1).trim();
2459 higherArea = makeArea(state, higherAreaStr, level);
2460 String[] innerAreas = matcher.group(2).split(patSeparator);
2461 for (String innerArea : innerAreas){
2462 if (isNotBlank(innerArea)){
2463 NamedArea singleArea = makeArea(state, innerArea.trim(), level);
2464 areas.add(singleArea);
2465 NamedArea partOf = singleArea.getPartOf();
2466 // if (partOf == null){
2467 // singleArea.setPartOf(higherArea);
2468 // }
2469 }
2470 }
2471 }else{
2472 NamedArea singleArea = makeArea(state, label, level);
2473 areas.add(singleArea);
2474 }
2475
2476 for (NamedArea area : areas){
2477 //create distribution
2478 Distribution distribution = Distribution.NewInstance(area,status);
2479 distribution.addPrimaryTaxonomicSource(sourceReference);
2480 description.addElement(distribution);
2481 }
2482 } else {
2483 String message = "Empty distribution locality";
2484 fireWarningEvent(message, next, 4);
2485 }
2486 return text;
2487 } else if (isStartingElement(next, COORDINATES)) {
2488 //TODO
2489 handleNotYetImplementedElement(next);
2490 } else if (isEndingElement(next, COORDINATES)) {
2491 //TODO
2492 popUnimplemented(next.asEndElement());
2493 } else if (next.isCharacters()) {
2494 text += next.asCharacters().getData();
2495 } else {
2496 handleUnexpectedElement(next);
2497 }
2498 }
2499 throw new IllegalStateException("<DistributionLocality> has no closing tag");
2500 }
2501
2502 /**
2503 * @param state
2504 * @param taxon
2505 * @param ref
2506 * @return
2507 */
2508 protected TaxonDescription getExtractedMarkupMarkedDescription(MarkupImportState state, Taxon taxon, Reference sourceReference) {
2509 MarkerType markerType = getMarkerType(
2510 state,
2511 MarkupTransformer.uuidMarkerExtractedMarkupData,
2512 "Extracted factual data", "Marker type for factual data imported from markup where the markup for this data was included in parent markup that was also imported including the text from this markup.",
2513 "Extr. data",
2514 null);
2515 String title = "Extracted markup data for " + taxon.getName().getTitleCache();
2516 TaxonDescription description = getMarkedTaxonDescription(taxon, markerType, false, true, sourceReference, title);
2517 return description;
2518 }
2519
2520 }