43c13337374c5c59c38244a25cc8b609669ffcdf
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupImportBase.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.markup;
11
12 import java.net.MalformedURLException;
13 import java.util.ArrayList;
14 import java.util.Arrays;
15 import java.util.Collection;
16 import java.util.HashMap;
17 import java.util.HashSet;
18 import java.util.Iterator;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.Set;
22 import java.util.Stack;
23 import java.util.UUID;
24 import java.util.regex.Matcher;
25 import java.util.regex.Pattern;
26
27 import javax.xml.namespace.QName;
28 import javax.xml.stream.Location;
29 import javax.xml.stream.XMLEventReader;
30 import javax.xml.stream.XMLStreamConstants;
31 import javax.xml.stream.XMLStreamException;
32 import javax.xml.stream.events.Attribute;
33 import javax.xml.stream.events.Characters;
34 import javax.xml.stream.events.EndElement;
35 import javax.xml.stream.events.StartElement;
36 import javax.xml.stream.events.XMLEvent;
37
38 import org.apache.commons.lang.StringUtils;
39 import org.apache.commons.lang.WordUtils;
40 import org.apache.log4j.Logger;
41
42 import eu.etaxonomy.cdm.api.service.IClassificationService;
43 import eu.etaxonomy.cdm.api.service.ITermService;
44 import eu.etaxonomy.cdm.common.CdmUtils;
45 import eu.etaxonomy.cdm.ext.geo.GeoServiceArea;
46 import eu.etaxonomy.cdm.ext.geo.IEditGeoService;
47 import eu.etaxonomy.cdm.io.common.CdmImportBase;
48 import eu.etaxonomy.cdm.io.common.CdmImportBase.TermMatchMode;
49 import eu.etaxonomy.cdm.io.common.events.IIoEvent;
50 import eu.etaxonomy.cdm.io.common.events.IoProblemEvent;
51 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
52 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
53 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
54 import eu.etaxonomy.cdm.model.common.Annotation;
55 import eu.etaxonomy.cdm.model.common.AnnotationType;
56 import eu.etaxonomy.cdm.model.common.CdmBase;
57 import eu.etaxonomy.cdm.model.common.DefinedTerm;
58 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
59 import eu.etaxonomy.cdm.model.common.Extension;
60 import eu.etaxonomy.cdm.model.common.ExtensionType;
61 import eu.etaxonomy.cdm.model.common.IntextReference;
62 import eu.etaxonomy.cdm.model.common.Language;
63 import eu.etaxonomy.cdm.model.common.MarkerType;
64 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
65 import eu.etaxonomy.cdm.model.common.TermVocabulary;
66 import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
67 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
68 import eu.etaxonomy.cdm.model.description.Distribution;
69 import eu.etaxonomy.cdm.model.description.Feature;
70 import eu.etaxonomy.cdm.model.description.PolytomousKey;
71 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
72 import eu.etaxonomy.cdm.model.description.TaxonDescription;
73 import eu.etaxonomy.cdm.model.description.TextData;
74 import eu.etaxonomy.cdm.model.location.NamedArea;
75 import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
76 import eu.etaxonomy.cdm.model.location.NamedAreaType;
77 import eu.etaxonomy.cdm.model.media.IdentifiableMediaEntity;
78 import eu.etaxonomy.cdm.model.media.Media;
79 import eu.etaxonomy.cdm.model.name.INonViralName;
80 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
81 import eu.etaxonomy.cdm.model.name.Rank;
82 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
83 import eu.etaxonomy.cdm.model.reference.IArticle;
84 import eu.etaxonomy.cdm.model.reference.IBook;
85 import eu.etaxonomy.cdm.model.reference.IBookSection;
86 import eu.etaxonomy.cdm.model.reference.IJournal;
87 import eu.etaxonomy.cdm.model.reference.Reference;
88 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
89 import eu.etaxonomy.cdm.model.reference.ReferenceType;
90 import eu.etaxonomy.cdm.model.taxon.Classification;
91 import eu.etaxonomy.cdm.model.taxon.Taxon;
92 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
93 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
94 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
95 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
96
97 /**
98 * @author a.mueller
99 * @since 04.08.2008
100 */
101 public abstract class MarkupImportBase {
102 private static final Logger logger = Logger.getLogger(MarkupImportBase.class);
103
104 //Base
105 protected static final String ALTITUDE = "altitude";
106 protected static final String ANNOTATION = "annotation";
107 protected static final String BOLD = "bold";
108 protected static final String BR = "br";
109 protected static final String DOUBTFUL = "doubtful";
110 protected static final String CITATION = "citation";
111 protected static final String CLASS = "class";
112 protected static final String COORDINATES = "coordinates";
113 protected static final String DATES = "dates";
114 protected static final String GATHERING = "gathering";
115 protected static final String GATHERING_GROUP = "gatheringGroup";
116 protected static final String GENUS_ABBREVIATION = "genus abbreviation";
117 protected static final String FOOTNOTE = "footnote";
118 protected static final String FOOTNOTE_REF = "footnoteRef";
119 protected static final String FULL_NAME = "fullName";
120 protected static final String ITALICS = "italics";
121 protected static final String NUM = "num";
122 protected static final String NOTES = "notes";
123 protected static final String PUBLICATION = "publication";
124 protected static final String SPECIMEN_TYPE = "specimenType";
125 protected static final String STATUS = "status";
126 protected static final String SUB_HEADING = "subHeading";
127 protected static final String TYPE = "type";
128 protected static final String TYPE_STATUS = "typeStatus";
129 protected static final String UNKNOWN = "unknown";
130
131
132 protected static final boolean CREATE_NEW = true;
133 protected static final boolean NO_IMAGE_GALLERY = false;
134 protected static final boolean IMAGE_GALLERY = true;
135
136 protected static final String ADDENDA = "addenda";
137 protected static final String BIBLIOGRAPHY = "bibliography";
138 protected static final String BIOGRAPHIES = "biographies";
139 protected static final String CHAR = "char";
140 protected static final String DEDICATION = "dedication";
141 protected static final String DEFAULT_MEDIA_URL = "defaultMediaUrl";
142 protected static final String DISTRIBUTION_LIST = "distributionList";
143 protected static final String DISTRIBUTION_LOCALITY = "distributionLocality";
144 protected static final String FEATURE = "feature";
145 protected static final String FIGURE = "figure";
146 protected static final String FIGURE_LEGEND = "figureLegend";
147 protected static final String FIGURE_PART = "figurePart";
148 protected static final String FIGURE_REF = "figureRef";
149 protected static final String FIGURE_TITLE = "figureTitle";
150 protected static final String FOOTNOTE_STRING = "footnoteString";
151 protected static final String FREQUENCY = "frequency";
152 protected static final String HEADING = "heading";
153 protected static final String HABITAT = "habitat";
154 protected static final String HABITAT_LIST = "habitatList";
155 protected static final String IS_FREETEXT = "isFreetext";
156 protected static final String ID = "id";
157 protected static final String KEY = "key";
158 protected static final String LIFE_CYCLE_PERIODS = "lifeCyclePeriods";
159 protected static final String META_DATA = "metaData";
160 protected static final String MODS = "mods";
161
162 protected static final String NOMENCLATURE = "nomenclature";
163 protected static final String QUOTE = "quote";
164 protected static final String RANK = "rank";
165 protected static final String REF = "ref";
166 protected static final String REF_NUM = "refNum";
167 protected static final String REFERENCE = "reference";
168 protected static final String REFERENCES = "references";
169 protected static final String SUB_CHAR = "subChar";
170 protected static final String TAXON = "taxon";
171 protected static final String TAXONTITLE = "taxontitle";
172 protected static final String TAXONTYPE = "taxontype";
173 protected static final String TEXT_SECTION = "textSection";
174 protected static final String TREATMENT = "treatment";
175 protected static final String SERIALS_ABBREVIATIONS = "serialsAbbreviations";
176 protected static final String STRING = "string";
177 protected static final String URL = "url";
178 protected static final String WRITER = "writer";
179
180 protected static final String LOCALITY = "locality";
181
182
183
184 //Nomenclature
185 protected static final String ACCEPTED = "accepted";
186 protected static final String ACCEPTED_NAME = "acceptedName";
187 protected static final String ALTERNATEPUBTITLE = "alternatepubtitle";
188 protected static final String APPENDIX = "appendix";
189 protected static final String AUTHOR = "author";
190 protected static final String DETAILS = "details";
191 protected static final String EDITION = "edition";
192 protected static final String EDITORS = "editors";
193 protected static final String HOMONYM = "homonym";
194 protected static final String HOMOTYPES = "homotypes";
195 protected static final String NOMENCLATURAL_NOTES = "nomenclaturalNotes";
196 protected static final String INFRANK = "infrank";
197 protected static final String INFRAUT = "infraut";
198 protected static final String INFRPARAUT = "infrparaut";
199 protected static final String ISSUE = "issue";
200 protected static final String NAME_STATUS = "namestatus";
201 protected static final String NAME = "name";
202 protected static final String NAME_TYPE = "nameType";
203 protected static final String NOM = "nom";
204 protected static final String PAGES = "pages";
205 protected static final String PARAUT = "paraut";
206 protected static final String PUBFULLNAME = "pubfullname";
207 protected static final String PUBLOCATION = "publocation";
208 protected static final String PUBLISHER = "publisher";
209 protected static final String PUBNAME = "pubname";
210 protected static final String PUBTITLE = "pubtitle";
211 protected static final String PUBTYPE = "pubtype";
212 protected static final String REF_PART = "refPart";
213 protected static final String SYNONYM = "synonym";
214 protected static final String USAGE = "usage";
215 protected static final String VOLUME = "volume";
216 protected static final String YEAR = "year";
217
218
219 //keys
220 protected static final String COUPLET = "couplet";
221 protected static final String IS_SPOTCHARACTERS = "isSpotcharacters";
222 protected static final String ONLY_NUMBERED_TAXA_EXIST = "onlyNumberedTaxaExist";
223 protected static final String EXISTS = "exists";
224 protected static final String KEYNOTES = "keynotes";
225 protected static final String KEY_TITLE = "keyTitle";
226 protected static final String QUESTION = "question";
227 protected static final String TEXT = "text";
228 protected static final String TO_COUPLET = "toCouplet";
229 protected static final String TO_KEY = "toKey";
230 protected static final String TO_TAXON = "toTaxon";
231
232
233 //Feature
234 protected static final String VERNACULAR_NAMES = "vernacularNames";
235 protected static final String VERNACULAR_NAME = "vernacularName";
236 protected static final String TRANSLATION = "translation";
237 protected static final String LOCAL_LANGUAGE = "localLanguage";
238
239
240
241 protected MarkupDocumentImport docImport;
242
243 private final IEditGeoService editGeoService;
244 protected MarkupFeatureImport featureImport;
245
246 public MarkupImportBase(MarkupDocumentImport docImport) {
247 super();
248 this.docImport = docImport;
249 this.editGeoService = docImport.getEditGeoService();
250 }
251
252 private final Stack<QName> unhandledElements = new Stack<QName>();
253 private final Stack<QName> handledElements = new Stack<QName>();
254
255
256 protected <T extends CdmBase> void save(Collection<T> collection, MarkupImportState state) {
257 if (state.isCheck() || collection.isEmpty()){
258 return;
259 }
260 T example = collection.iterator().next();
261 if (example.isInstanceOf(TaxonBase.class)){
262 Collection<TaxonBase> typedCollection = (Collection<TaxonBase>)collection;
263 docImport.getTaxonService().saveOrUpdate(typedCollection);
264 }else if (example.isInstanceOf(Classification.class)){
265 Collection<Classification> typedCollection = (Collection<Classification>)collection;
266 docImport.getClassificationService().saveOrUpdate(typedCollection);
267 }else if (example.isInstanceOf(PolytomousKey.class)){
268 Collection<PolytomousKey> typedCollection = (Collection<PolytomousKey>)collection;
269 docImport.getPolytomousKeyService().saveOrUpdate(typedCollection);
270 }else if (example.isInstanceOf(DefinedTermBase.class)){
271 Collection<DefinedTermBase> typedCollection = (Collection<DefinedTermBase>)collection;
272 getTermService().saveOrUpdate(typedCollection);
273 }
274
275 }
276
277
278 //TODO move to service layer for all IdentifiableEntities
279 protected void save(CdmBase cdmBase, MarkupImportState state) {
280 if (state.isCheck()){
281 return;
282 }
283 cdmBase = CdmBase.deproxy(cdmBase, CdmBase.class);
284 if (cdmBase == null){
285 String message = "Tried to save a null object.";
286 fireWarningEvent(message, "--location ?? --", 6,1);
287 } else if (cdmBase.isInstanceOf(TaxonBase.class)){
288 docImport.getTaxonService().saveOrUpdate((TaxonBase<?>)cdmBase);
289 }else if (cdmBase.isInstanceOf(Classification.class)){
290 docImport.getClassificationService().saveOrUpdate((Classification)cdmBase);
291 }else if (cdmBase.isInstanceOf(PolytomousKey.class)){
292 docImport.getPolytomousKeyService().saveOrUpdate((PolytomousKey)cdmBase);
293 }else if (cdmBase.isInstanceOf(DefinedTermBase.class)){
294 docImport.getTermService().saveOrUpdate((DefinedTermBase<?>)cdmBase);
295 }else if (cdmBase.isInstanceOf(Media.class)){
296 docImport.getMediaService().saveOrUpdate((Media)cdmBase);
297 }else if (cdmBase.isInstanceOf(SpecimenOrObservationBase.class)){
298 docImport.getOccurrenceService().saveOrUpdate((SpecimenOrObservationBase<?>)cdmBase);
299 }else if (cdmBase.isInstanceOf(DescriptionElementBase.class)){
300 docImport.getDescriptionService().saveDescriptionElement((DescriptionElementBase)cdmBase);
301 }else if (cdmBase.isInstanceOf(Reference.class)){
302 docImport.getReferenceService().saveOrUpdate((Reference)cdmBase);
303 }else{
304 String message = "Unknown cdmBase type to save: " + cdmBase.getClass();
305 fireWarningEvent(message, "Unknown location", 8);
306 }
307 //logger.warn("Saved " + cdmBase);
308 }
309
310
311 protected ITermService getTermService() {
312 return docImport.getTermService();
313 }
314
315 protected IClassificationService getClassificationService() {
316 return docImport.getClassificationService();
317 }
318
319 //*********************** Attribute methods *************************************/
320
321 /**
322 * Returns a map for all attributes of an start element
323 * @param event
324 * @return
325 */
326 protected Map<String, Attribute> getAttributes(XMLEvent event) {
327 Map<String, Attribute> result = new HashMap<>();
328 if (!event.isStartElement()){
329 fireWarningEvent("Event is not an startElement. Can't check attributes", makeLocationStr(event.getLocation()), 1, 1);
330 return result;
331 }
332 StartElement element = event.asStartElement();
333 @SuppressWarnings("unchecked")
334 Iterator<Attribute> attributes = element.getAttributes();
335 while (attributes.hasNext()){
336 Attribute attribute = attributes.next();
337 //TODO namespaces
338 result.put(attribute.getName().getLocalPart(), attribute);
339 }
340 return result;
341 }
342
343 /**
344 * Throws an unexpected attributes event if the event has any attributes.
345 * @param event
346 */
347 protected void checkNoAttributes(Map<String, Attribute> attributes, XMLEvent event) {
348 String[] exceptions = new String[]{};
349 handleUnexpectedAttributes(event.getLocation(), attributes, 1, exceptions);
350 }
351
352
353
354 /**
355 * Throws an unexpected attributes event if the event has any attributes.
356 * @param event
357 */
358 protected void checkNoAttributes(XMLEvent event) {
359 String[] exceptions = new String[]{};
360 checkNoAttributes(event, 1, exceptions);
361 }
362
363 /**
364 * Throws an unexpected attributes event if the event has any attributes except those mentioned in "exceptions".
365 * @param event
366 * @param exceptions
367 */
368 protected void checkNoAttributes(XMLEvent event, int stackDepth, String... exceptions) {
369 if (! event.isStartElement()){
370 fireWarningEvent("Event is not an startElement. Can't check attributes", makeLocationStr(event.getLocation()), 1, 1);
371 return;
372 }
373 StartElement startElement = event.asStartElement();
374 Map<String, Attribute> attributes = getAttributes(startElement);
375 handleUnexpectedAttributes(startElement.getLocation(), attributes, stackDepth+1, exceptions);
376 }
377
378
379 /**
380 * Checks if the given attribute exists and has the given value.
381 * If yes, true is returned and the attribute is removed from the attributes map.
382 * Otherwise false is returned.
383 * @param attributes
384 * @param attrName
385 * @param value
386 * @return <code>true</code> if attribute has given value, <code>false</code> otherwise
387 */
388 protected boolean checkAndRemoveAttributeValue( Map<String, Attribute> attributes, String attrName, String value) {
389 Attribute attr = attributes.get(attrName);
390 if (attr == null ||value == null ){
391 return false;
392 }else{
393 if (value.equals(attr.getValue())){
394 attributes.remove(attrName);
395 return true;
396 }else{
397 return false;
398 }
399 }
400 }
401
402
403 /**
404 * Returns the value of a given attribute name and removes the attribute from the attributes map.
405 * Returns <code>null</code> if attribute does not exist.
406 * @param attributes the list of all attributes
407 * @param attrName the requested attribute name
408 * @return the value for the attribute
409 */
410 protected String getAndRemoveAttributeValue(Map<String, Attribute> attributes, String attrName) {
411 return getAndRemoveAttributeValue(null, attributes, attrName, false, 1);
412 }
413
414 /**
415 * Returns the value of a boolean attribute with the given name and removes the attribute from the attributes map.
416 * Returns <code>defaultValue</code> if the attribute does not exist. ALso returns <code>defaultValue</code> and throws a warning if the
417 * attribute has no boolean value (true, false).
418 * @param
419 * @param attributes the
420 * @param attrName the name of the attribute
421 * @param defaultValue the default value to return if attribute does not exist or can not be defined
422 * @return
423 */
424 protected Boolean getAndRemoveBooleanAttributeValue(XMLEvent event, Map<String, Attribute> attributes, String attrName, Boolean defaultValue) {
425 String value = getAndRemoveAttributeValue(null, attributes, attrName, false, 1);
426 Boolean result = defaultValue;
427 if (value != null){
428 if (value.equalsIgnoreCase("true")){
429 result = true;
430 }else if (value.equalsIgnoreCase("false")){
431 result = false;
432 }else{
433 String message = "Boolean attribute has no boolean value ('true', 'false') but '%s'";
434 fireWarningEvent(String.format(message, value), makeLocationStr(event.getLocation()), 6, 1);
435 }
436 }
437 return result;
438 }
439
440
441 /**
442 * Returns the value of a given attribute name and returns the attribute from the attributes map.
443 * Fires a mandatory field is missing event if the attribute does not exist.
444 * @param xmlEvent
445 * @param attributes
446 * @param attrName
447 * @return
448 */
449 protected String getAndRemoveRequiredAttributeValue(XMLEvent xmlEvent, Map<String, Attribute> attributes, String attrName) {
450 return getAndRemoveAttributeValue(xmlEvent, attributes, attrName, true, 1);
451 }
452
453 /**
454 * Returns the value of a given attribute name and returns the attribute from the attributes map.
455 * If required is <code>true</code> and the attribute does not exist a mandatory field is missing event is fired.
456 * @param xmlEvent
457 * @param attributes
458 * @param attrName
459 * @param isRequired
460 * @return
461 */
462 private String getAndRemoveAttributeValue(XMLEvent xmlEvent, Map<String, Attribute> attributes, String attrName, boolean isRequired, int stackDepth) {
463 Attribute attr = attributes.get(attrName);
464 if (attr == null ){
465 if (isRequired){
466 fireMandatoryElementIsMissing(xmlEvent, attrName, 8, stackDepth+1);
467 }
468 return null;
469 }else{
470 attributes.remove(attrName);
471 return attr.getValue();
472 }
473 }
474
475 /**
476 * Fires an not yet implemented event if the given attribute exists in attributes.
477 * @param attributes
478 * @param attrName
479 * @param event
480 */
481 protected void handleNotYetImplementedAttribute(Map<String, Attribute> attributes,
482 String attrName, XMLEvent event) {
483 Attribute attr = attributes.get(attrName);
484 if (attr != null){
485 attributes.remove(attrName);
486 QName qName = attr.getName();
487 fireNotYetImplementedAttribute(event.getLocation(), qName, attr.getValue(), 1);
488 }
489 }
490
491 /**
492 * Fires an unhandled attributes event, if attributes exist in attributes map not covered by the exceptions.
493 * No event is fired if the unhandled elements stack is not empty.
494 * @param location
495 * @param attributes
496 * @param exceptions
497 */
498 protected void handleUnexpectedAttributes(Location location,Map<String, Attribute> attributes, String... exceptions) {
499 handleUnexpectedAttributes(location, attributes, 1, exceptions);
500 }
501
502 /**
503 * see {@link #handleUnexpectedAttributes(Location, Map, String...)}
504 *
505 * @param location
506 * @param attributes
507 * @param stackDepth the stack trace depth
508 * @param exceptions
509 */
510 private void handleUnexpectedAttributes(Location location,Map<String, Attribute> attributes, int stackDepth, String... exceptions) {
511 if (attributes.size() > 0){
512 if (this.unhandledElements.size() == 0 ){
513 boolean hasUnhandledAttributes = false;
514 for (String key : attributes.keySet()){
515 boolean isException = false;
516 for (String exception : exceptions){
517 if(key.equals(exception)){
518 isException = true;
519 }
520 }
521 if (!isException){
522 hasUnhandledAttributes = true;
523 }
524 }
525 if (hasUnhandledAttributes){
526 fireUnexpectedAttributes(location, attributes, stackDepth+1);
527 }
528 }
529 }
530 }
531
532
533 private void fireUnexpectedAttributes(Location location, Map<String, Attribute> attributes, int stackDepth) {
534 String attributesString = "";
535 for (String key : attributes.keySet()){
536 Attribute attribute = attributes.get(key);
537 attributesString = CdmUtils.concat(",", attributesString, attribute.getName().getLocalPart() + ":" + attribute.getValue());
538 }
539 String message = "Unexpected attributes: %s";
540 IoProblemEvent event = makeProblemEvent(location, String.format(message, attributesString), 1 , stackDepth +1 );
541 fire(event);
542 }
543
544
545 protected void fireUnexpectedAttributeValue(XMLEvent parentEvent, String attrName, String attrValue) {
546 String message = "Unexpected attribute value %s='%s'";
547 message = String.format(message, attrName, attrValue);
548 IoProblemEvent event = makeProblemEvent(parentEvent.getLocation(), message, 1 , 1 );
549 fire(event);
550 }
551
552 protected void handleNotYetImplementedAttributeValue(XMLEvent xmlEvent, String attrName, String attrValue) {
553 String message = "Attribute %s not yet implemented for value '%s'";
554 message = String.format(message, attrName, attrValue);
555 IIoEvent event = makeProblemEvent(xmlEvent.getLocation(), message, 1, 1 );
556 fire(event);
557 }
558
559 protected void fireNotYetImplementedAttribute(Location location, QName qName,
560 String value, int stackDepth) {
561 String message = "Attribute not yet implemented: %s (%s)";
562 IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart(), value), 1, stackDepth+1 );
563 fire(event);
564 }
565
566
567 protected void fireUnexpectedEvent(XMLEvent xmlEvent, int stackDepth) {
568 Location location = xmlEvent.getLocation();
569 String message = "Unexpected event: %s";
570 IIoEvent event = makeProblemEvent(location, String.format(message, xmlEvent.toString()), 2, stackDepth +1);
571 fire(event);
572 }
573
574 protected void fireUnexpectedStartElement(Location location, StartElement startElement, int stackDepth) {
575 QName qName = startElement.getName();
576 String message = "Unexpected start element: %s";
577 IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart()), 2, stackDepth +1);
578 fire(event);
579 }
580
581
582 protected void fireUnexpectedEndElement(Location location, EndElement endElement, int stackDepth) {
583 QName qName = endElement.getName();
584 String message = "Unexpected end element: %s";
585 IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart()), 16, stackDepth+1);
586 fire(event);
587 }
588
589 protected void fireNotYetImplementedElement(Location location, QName qName, int stackDepth) {
590 String message = "Element not yet implemented: %s";
591 IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart()), 1, stackDepth+1 );
592 fire(event);
593 }
594
595 protected void fireNotYetImplementedCharacters(Location location, Characters chars, int stackDepth) {
596 String message = "Characters not yet handled: %s";
597 IIoEvent event = makeProblemEvent(location, String.format(message, chars.getData()), 1, stackDepth+1 );
598 fire(event);
599 }
600
601 /**
602 * Creates a problem event.
603 * Be aware of the right depths of the stack trace !
604 * @param location
605 * @param message
606 * @param severity
607 * @return
608 */
609 private IoProblemEvent makeProblemEvent(Location location, String message, int severity, int stackDepth) {
610 stackDepth++;
611 StackTraceElement[] stackTrace = new Exception().getStackTrace();
612 int lineNumber = stackTrace[stackDepth].getLineNumber();
613 String methodName = stackTrace[stackDepth].getMethodName();
614 String locationStr = makeLocationStr(location);
615 String className = stackTrace[stackDepth].getClassName();
616 Class<?> declaringClass;
617 try {
618 declaringClass = Class.forName(className);
619 } catch (ClassNotFoundException e) {
620 declaringClass = this.getClass();
621 }
622 IoProblemEvent event = IoProblemEvent.NewInstance(declaringClass, message,
623 locationStr, lineNumber, severity, methodName);
624 return event;
625 }
626
627 /**
628 * Creates a string from a location
629 * @param location
630 * @return
631 */
632 protected String makeLocationStr(Location location) {
633 String locationStr = location == null ? " - no location - " : "l." + location.getLineNumber() + "/c."+ location.getColumnNumber();
634 return locationStr;
635 }
636
637
638 /**
639 * Fires an unexpected element event if the unhandled elements stack is empty.
640 * Otherwise adds the element to the stack.
641 * @param event
642 */
643 protected void handleUnexpectedStartElement(XMLEvent event) {
644 handleUnexpectedStartElement(event, 1);
645 }
646
647 /**
648 * Fires an unexpected element event if the unhandled elements stack is empty.
649 * Otherwise adds the element to the stack.
650 * @param event
651 */
652 protected void handleUnexpectedStartElement(XMLEvent event, int stackDepth) {
653 QName qName = event.asStartElement().getName();
654 if (! unhandledElements.empty()){
655 unhandledElements.push(qName);
656 }else{
657 fireUnexpectedStartElement(event.getLocation(), event.asStartElement(), stackDepth + 1);
658 }
659 }
660
661
662 protected void handleUnexpectedEndElement(EndElement event) {
663 handleUnexpectedEndElement(event, 1);
664 }
665
666 /**
667 * Fires an unexpected element event if the event is not the last on the stack.
668 * Otherwise removes last stack element.
669 * @param event
670 */
671 protected void handleUnexpectedEndElement(EndElement event, int stackDepth) {
672 QName qName = event.asEndElement().getName();
673 if (!unhandledElements.isEmpty() && unhandledElements.peek().equals(qName)){
674 unhandledElements.pop();
675 }else{
676 fireUnexpectedEndElement(event.getLocation(), event.asEndElement(), stackDepth + 1);
677 }
678 }
679
680 /**
681 *
682 * @param endElement
683 */
684 protected void popUnimplemented(EndElement endElement) {
685 QName qName = endElement.asEndElement().getName();
686 if (unhandledElements.peek().equals(qName)){
687 unhandledElements.pop();
688 }else{
689 String message = "End element is not last on stack: %s";
690 message = String.format(message, qName.getLocalPart());
691 IIoEvent event = makeProblemEvent(endElement.getLocation(), message, 16, 1);
692 fire(event);
693 }
694
695 }
696
697
698 /**
699 * Fires an unexpected element event if the unhandled element stack is empty.
700 * @param event
701 */
702 protected void handleUnexpectedElement(XMLEvent event) {
703 if (event.isStartElement()){
704 handleUnexpectedStartElement(event, 2);
705 }else if (event.isEndElement()){
706 handleUnexpectedEndElement(event.asEndElement(), 2);
707 }else if (event.getEventType() == XMLStreamConstants.COMMENT){
708 //do nothing
709 }else if (! unhandledElements.empty()){
710 //do nothing
711 }else{
712 fireUnexpectedEvent(event, 1);
713 }
714 }
715
716 /**
717 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
718 * @param event
719 */
720 protected void handleNotYetImplementedCharacters(XMLEvent event) {
721 Characters chars = event.asCharacters();
722 fireNotYetImplementedCharacters(event.getLocation(), chars, 1);
723 }
724
725 /**
726 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
727 * @param event
728 */
729 protected void handleNotYetImplementedElement(XMLEvent event) {
730 QName qName = event.asStartElement().getName();
731 boolean isTopLevel = unhandledElements.isEmpty();
732 unhandledElements.push(qName);
733 if (isTopLevel){
734 fireNotYetImplementedElement(event.getLocation(), qName, 1);
735 }
736 }
737
738 /**
739 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
740 * @param event
741 */
742 protected void handleIgnoreElement(XMLEvent event) {
743 QName qName = event.asStartElement().getName();
744 unhandledElements.push(qName);
745 }
746
747 protected void handleAmbigousManually(MarkupImportState state,
748 XMLEventReader reader, StartElement startElement) {
749 QName qName = startElement.getName();
750 unhandledElements.push(qName);
751 fireWarningEvent(
752 "Handle manually: " + qName.getLocalPart() + " is ambigous and should therefore be handled manually",
753 makeLocationStr(startElement.getLocation()), 2, 2);
754 }
755
756 /**
757 * Checks if a mandatory text is not empty or null.
758 * Returns true if text is given.
759 * Fires an mandatory element is missing event otherwise and returns <code>null</code>.
760 * @param text
761 * @param parentEvent
762 * @return
763 */
764 protected boolean checkMandatoryText(String text, XMLEvent parentEvent) {
765 if (! StringUtils.isNotBlank(text)){
766 fireMandatoryElementIsMissing(parentEvent, "CData", 4, 1);
767 return false;
768 }
769 return true;
770 }
771
772 /**
773 * Fires an mandatory element is missing event if exists is <code>false</code>.
774 * @param hasMandatory
775 * @param parentEvent
776 * @param string
777 */
778 protected void checkMandatoryElement(boolean exists, StartElement parentEvent, String attrName) {
779 if (! exists){
780 fireMandatoryElementIsMissing(parentEvent, attrName, 5, 1);
781 }
782 }
783
784
785 /**
786 * Fires an element is missing event.
787 * @param xmlEvent
788 * @param string
789 * @param severity
790 * @param stackDepth
791 * @throws IllegalStateException if xmlEvent is not a StartElement and not an Attribute
792 */
793 private void fireMandatoryElementIsMissing(XMLEvent xmlEvent, String missingEventName, int severity, int stackDepth) throws IllegalStateException{
794 Location location = xmlEvent.getLocation();
795 String typeName;
796 QName qName;
797 if (xmlEvent.isAttribute()){
798 Attribute attribute = ((Attribute)xmlEvent);
799 typeName = "attribute";
800 qName = attribute.getName();
801 }else if (xmlEvent.isStartElement()){
802 typeName = "element";
803 qName = xmlEvent.asStartElement().getName();
804 }else{
805 throw new IllegalStateException("mandatory element only allowed for attributes and start tags in " + makeLocationStr(location));
806 }
807 String message = "Mandatory %s '%s' is missing in %s";
808 message = String.format(message, typeName , missingEventName, qName.getLocalPart());
809 IIoEvent event = makeProblemEvent(location, message, severity, stackDepth +1);
810 fire(event);
811 }
812
813
814
815
816 /**
817 * Returns <code>true</code> if the "next" event is the ending tag for the "parent" event.
818 * @param next end element to test, must not be null
819 * @param parentEvent start element to test
820 * @return true if the "next" event is the ending tag for the "parent" event.
821 * @throws XMLStreamException
822 */
823 protected boolean isMyEndingElement(XMLEvent next, XMLEvent parentEvent) throws XMLStreamException {
824 if (! parentEvent.isStartElement()){
825 String message = "Parent event should be start tag";
826 fireWarningEvent(message, makeLocationStr(next.getLocation()), 6);
827 return false;
828 }
829 return isEndingElement(next, parentEvent.asStartElement().getName().getLocalPart());
830 }
831
832 /**
833 * Trims the text and removes turns all whitespaces into single empty space.
834 * @param text
835 * @return
836 */
837 protected String normalize(String text) {
838 text = StringUtils.trimToEmpty(text);
839 text = text.replaceAll("\\s+", " ");
840 return text;
841 }
842
843
844
845 /**
846 * Removes whitespaces at beginning and end and makes the first letter
847 * a capital letter and all other letters small letters.
848 * @param value
849 * @return
850 */
851 protected String toFirstCapital(String value) {
852 if (StringUtils.isBlank(value)){
853 return value;
854 }else{
855 String result = "";
856 value = value.trim();
857 result += value.trim().substring(0,1).toUpperCase();
858 if (value.length()>1){
859 result += value.substring(1).toLowerCase();
860 }
861 return result;
862 }
863 }
864
865 /**
866 * Currently not used.
867 * @param str
868 * @param allowedNumberOfCharacters
869 * @param onlyFirstCapital
870 * @return
871 */
872 protected boolean isAbbreviation(String str, int allowedNumberOfCharacters, boolean onlyFirstCapital){
873 if (isBlank(str)){
874 return false;
875 }
876 str = str.trim();
877 if (! str.endsWith(".")){
878 return false;
879 }
880 str = str.substring(0, str.length() -1);
881 if (str.length() > allowedNumberOfCharacters){
882 return false;
883 }
884 final String re = "^\\p{javaUpperCase}\\p{javaLowerCase}*$";
885 if (str.matches(re)){
886 return true;
887 }else{
888 return false;
889 }
890 }
891
892 /**
893 * Checks if <code>abbrev</code> is the short form for the genus name (strGenusName).
894 * Usually this is the case if <code>abbrev</code> is the first letter (optional with ".")
895 * of strGenusName. But in older floras it may also be the first 2 or 3 letters (optional with dot).
896 * However, we allow only a maximum of 2 letters to be anambigous. In cases with 3 letters better
897 * change the original markup data.
898 * @param single
899 * @param strGenusName
900 * @return
901 */
902 protected boolean isGenusAbbrev(String abbrev, String strGenusName) {
903 if (! abbrev.matches("[A-Z][a-z]?\\.?")) {
904 return false;
905 }else if (abbrev.length() == 0 || strGenusName == null || strGenusName.length() == 0){
906 return false;
907 }else{
908 abbrev = abbrev.replace(".", "");
909 return strGenusName.startsWith(abbrev);
910 // boolean result = true;
911 // for (int i = 0 ; i < abbrev.length(); i++){
912 // result &= ( abbrev.charAt(i) == strGenusName.charAt(i));
913 // }
914 // return result;
915 }
916 }
917
918
919 /**
920 * Checks if all words in the given string start with a capital letter but do not have any further capital letter.
921 * @param word the string to be checekd. Usually should be a single word.
922 * @return true if the above is the case, false otherwise
923 */
924 protected boolean isFirstCapitalWord(String word) {
925 if (WordUtils.capitalizeFully(word).equals(word)){
926 return true;
927 }else if (WordUtils.capitalizeFully(word,new char[]{'-'}).equals(word)){
928 //for words like Le-Testui (which is a species epithet)
929 return true;
930 }else{
931 return false;
932 }
933 }
934
935
936 /**
937 * Read next event. Ignore whitespace events.
938 * @param reader
939 * @return
940 * @throws XMLStreamException
941 */
942 protected XMLEvent readNoWhitespace(XMLEventReader reader) throws XMLStreamException {
943 XMLEvent event = reader.nextEvent();
944 while (!unhandledElements.isEmpty()){
945 if (event.isStartElement()){
946 handleNotYetImplementedElement(event);
947 }else if (event.isEndElement()){
948 popUnimplemented(event.asEndElement());
949 }
950 event = reader.nextEvent();
951 }
952 while (event.isCharacters() && event.asCharacters().isWhiteSpace()){
953 event = reader.nextEvent();
954 }
955 return event;
956 }
957
958 /**
959 * Returns the REQUIRED "class" attribute for a given event and checks that it is the only attribute.
960 * @param parentEvent
961 * @return
962 */
963 protected String getClassOnlyAttribute(XMLEvent parentEvent) {
964 return getClassOnlyAttribute(parentEvent, true);
965 }
966
967
968 /**
969 * Returns the "class" attribute for a given event and checks that it is the only attribute.
970 * @param parentEvent
971 * @return
972 */
973 protected String getClassOnlyAttribute(XMLEvent parentEvent, boolean required) {
974 return getOnlyAttribute(parentEvent, CLASS, required);
975 }
976
977 /**
978 * Returns the value for the only attribute for a given event and checks that it is the only attribute.
979 * @param parentEvent
980 * @return
981 */
982 protected String getOnlyAttribute(XMLEvent parentEvent, String attrName, boolean required) {
983 Map<String, Attribute> attributes = getAttributes(parentEvent);
984 String classValue =getAndRemoveAttributeValue(parentEvent, attributes, attrName, required, 1);
985 checkNoAttributes(attributes, parentEvent);
986 return classValue;
987 }
988
989
990 protected void fireWarningEvent(String message, String locationStr, Integer severity, Integer depth) {
991 docImport.fireWarningEvent(message, locationStr, severity, depth);
992 }
993
994 protected void fireWarningEvent(String message, XMLEvent event, Integer severity) {
995 docImport.fireWarningEvent(message, makeLocationStr(event.getLocation()), severity, 1);
996 }
997
998 protected void fireSchemaConflictEventExpectedStartTag(String elName, XMLEventReader reader) throws XMLStreamException {
999 docImport.fireSchemaConflictEventExpectedStartTag(elName, reader);
1000 }
1001
1002
1003 protected void fireWarningEvent(String message, String locationStr, int severity) {
1004 docImport.fireWarningEvent(message, locationStr, severity, 1);
1005 }
1006
1007 protected void fire(IIoEvent event) {
1008 docImport.fire(event);
1009 }
1010
1011 protected boolean isNotBlank(String str){
1012 return StringUtils.isNotBlank(str);
1013 }
1014
1015 protected boolean isBlank(String str){
1016 return StringUtils.isBlank(str);
1017 }
1018
1019 protected TaxonDescription getTaxonDescription(Taxon taxon, Reference ref, boolean isImageGallery, boolean createNewIfNotExists) {
1020 return docImport.getTaxonDescription(taxon, ref, isImageGallery, createNewIfNotExists);
1021 }
1022
1023 protected TaxonDescription getDefaultTaxonDescription(Taxon taxon, boolean isImageGallery, boolean createNewIfNotExists, Reference source) {
1024 return docImport.getDefaultTaxonDescription(taxon, isImageGallery, createNewIfNotExists, source);
1025 }
1026
1027 /**
1028 * Returns the taxon description with marked as <code>true</code> with the given marker type.
1029 * If createNewIfNotExists a new description is created if it does not yet exist.
1030 * For the new description the source and the title are set if not <code>null</code>.
1031 * @param taxon
1032 * @param markerType
1033 * @param isImageGallery
1034 * @param createNewIfNotExists
1035 * @param source
1036 * @param title
1037 * @return the existing or new taxon description
1038 */
1039 protected TaxonDescription getMarkedTaxonDescription(Taxon taxon, MarkerType markerType, boolean isImageGallery, boolean createNewIfNotExists, Reference source, String title) {
1040 return docImport.getMarkedTaxonDescription(taxon, markerType, isImageGallery, createNewIfNotExists, source, title);
1041 }
1042
1043
1044 /**
1045 * Returns the default language defined in the state. If no default language is defined in the state,
1046 * the CDM default language is returned.
1047 * @param state
1048 * @return
1049 */
1050 protected Language getDefaultLanguage(MarkupImportState state) {
1051 Language result = state.getDefaultLanguage();
1052 if (result == null){
1053 result = Language.DEFAULT();
1054 }
1055 return result;
1056 }
1057
1058
1059 //*********************** FROM XML IMPORT BASE ****************************************
1060 protected boolean isEndingElement(XMLEvent event, String elName) throws XMLStreamException {
1061 return docImport.isEndingElement(event, elName);
1062 }
1063
1064 protected boolean isStartingElement(XMLEvent event, String elName) throws XMLStreamException {
1065 return docImport.isStartingElement(event, elName);
1066 }
1067
1068
1069 protected void fillMissingEpithetsForTaxa(Taxon parentTaxon, Taxon childTaxon) {
1070 docImport.fillMissingEpithetsForTaxa(parentTaxon, childTaxon);
1071 }
1072
1073 protected Feature getFeature(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<Feature> voc){
1074 return docImport.getFeature(state, uuid, label, text, labelAbbrev, voc);
1075 }
1076
1077 protected PresenceAbsenceTerm getPresenceAbsenceTerm(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, boolean isAbsenceTerm, TermVocabulary<PresenceAbsenceTerm> voc){
1078 return docImport.getPresenceTerm(state, uuid, label, text, labelAbbrev, isAbsenceTerm, voc);
1079 }
1080
1081 protected ExtensionType getExtensionType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev){
1082 return docImport.getExtensionType(state, uuid, label, text, labelAbbrev);
1083 }
1084
1085 protected DefinedTerm getIdentifierType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<DefinedTerm> voc){
1086 return docImport.getIdentifierType(state, uuid, label, text, labelAbbrev, voc);
1087 }
1088
1089 protected AnnotationType getAnnotationType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<AnnotationType> voc){
1090 return docImport.getAnnotationType(state, uuid, label, text, labelAbbrev, voc);
1091 }
1092
1093 protected MarkerType getMarkerType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<MarkerType> voc){
1094 return docImport.getMarkerType(state, uuid, label, text, labelAbbrev, voc);
1095 }
1096
1097 protected NamedAreaLevel getNamedAreaLevel(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<NamedAreaLevel> voc){
1098 return docImport.getNamedAreaLevel(state, uuid, label, text, labelAbbrev, voc);
1099 }
1100
1101 protected NamedArea getNamedArea(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, NamedAreaType areaType, NamedAreaLevel level, TermVocabulary voc, TermMatchMode matchMode){
1102 return docImport.getNamedArea(state, uuid, label, text, labelAbbrev, areaType, level, voc, matchMode);
1103 }
1104
1105 protected Language getLanguage(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<?> voc){
1106 return docImport.getLanguage(state, uuid, label, text, labelAbbrev, voc);
1107 }
1108
1109 // *************************************** Concrete methods **********************************************/
1110
1111
1112 /**
1113 * @param state
1114 * @param classValue
1115 * @param byAbbrev
1116 * @return
1117 */
1118 protected Rank makeRank(MarkupImportState state, String value, boolean byAbbrev) {
1119 Rank rank = null;
1120 if (StringUtils.isBlank(value)) {
1121 return null;
1122 }
1123 try {
1124 boolean useUnknown = true;
1125 NomenclaturalCode nc = makeNomenclaturalCode(state);
1126 if (value.equals(GENUS_ABBREVIATION)){
1127 rank = Rank.GENUS();
1128 }else if (byAbbrev) {
1129 rank = Rank.getRankByIdInVoc(value.toLowerCase(), nc, useUnknown);
1130 if (value.equalsIgnoreCase("forma")){
1131 return Rank.FORM();
1132 }else if (value.toLowerCase().matches("(sub)?(section|genus|series|tribe)")){
1133 return Rank.getRankByEnglishName(value, nc, useUnknown);
1134 }else if (value.equals("§")){
1135 return Rank.SECTION_BOTANY(); //Special case in Flora Malesiana
1136 }
1137 } else {
1138 rank = Rank.getRankByEnglishName(value, nc, useUnknown);
1139 }
1140 if (rank.equals(Rank.UNKNOWN_RANK())) {
1141 rank = null;
1142 }
1143 if (rank == null && "sous-genre".equalsIgnoreCase(value)){
1144 rank = Rank.SUBGENUS();
1145 }
1146 } catch (UnknownCdmTypeException e) {
1147 // doNothing
1148 }
1149 return rank;
1150 }
1151
1152 NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
1153 protected TeamOrPersonBase<?> createAuthor(MarkupImportState state, String authorTitle) {
1154 TeamOrPersonBase<?> result = parser.author(authorTitle);
1155 return state.getDeduplicationHelper(docImport).getExistingAuthor(state, result);
1156 }
1157
1158 protected String getAndRemoveMapKey(Map<String, String> map, String key) {
1159 String result = map.get(key);
1160 map.remove(key);
1161 if (result != null) {
1162 result = normalize(result);
1163 }
1164 return StringUtils.stripToNull(result);
1165 }
1166
1167
1168 /**
1169 * Creates a {@link INonViralName} object depending on the defined {@link NomenclaturalCode}
1170 * and the given parameters.
1171 * @param state
1172 * @param rank
1173 * @return
1174 */
1175 protected INonViralName createNameByCode(MarkupImportState state, Rank rank) {
1176 NomenclaturalCode nc = makeNomenclaturalCode(state);
1177 INonViralName name = nc.getNewTaxonNameInstance(rank);
1178 return name;
1179 }
1180
1181 protected void handleFullName(MarkupImportState state, XMLEventReader reader,
1182 INonViralName name, XMLEvent event) throws XMLStreamException {
1183 String fullNameStr;
1184 Map<String, Attribute> attrs = getAttributes(event);
1185 String rankStr = getAndRemoveRequiredAttributeValue(event, attrs, "rank");
1186 String hybridClass = getAndRemoveAttributeValue(attrs, "hybridClass");
1187
1188 Rank rank = makeRank(state, rankStr, false);
1189 name.setRank(rank);
1190 if (rank == null) {
1191 String message = "Rank was computed as null. This must not be.";
1192 fireWarningEvent(message, event, 6);
1193 name.setRank(Rank.UNKNOWN_RANK());
1194 }
1195 if (!attrs.isEmpty()) {
1196 handleUnexpectedAttributes(event.getLocation(), attrs);
1197 }
1198 fullNameStr = getCData(state, reader, event, false);
1199 NonViralNameParserImpl.NewInstance().parseFullName(name, fullNameStr, rank, false);
1200 if (hybridClass != null ){
1201 if ("hybrid formula".equals(hybridClass)){
1202 if (!name.isHybridFormula()){
1203 fireWarningEvent("Hybrid formula is not set though requested: " + fullNameStr, event, 4);
1204 }
1205 }else if ("hybrid".equals(hybridClass)){
1206 if (!name.isHybridName()){
1207 fireWarningEvent("Hybrid name is recognized: " + fullNameStr, event, 4);
1208 }
1209 }else{
1210 handleNotYetImplementedAttributeValue(event, "hybridClass", hybridClass);
1211 }
1212 }
1213 }
1214
1215
1216 /**
1217 * Returns the {@link NomenclaturalCode} for this import. Default is {@link NomenclaturalCode#ICBN} if
1218 * no code is defined.
1219 * @param state
1220 * @return
1221 */
1222 protected NomenclaturalCode makeNomenclaturalCode(MarkupImportState state) {
1223 NomenclaturalCode nc = state.getConfig().getNomenclaturalCode();
1224 if (nc == null) {
1225 nc = NomenclaturalCode.ICNAFP; // default;
1226 }
1227 return nc;
1228 }
1229
1230
1231 /**
1232 * @param state
1233 * @param levelString
1234 * @param next
1235 * @return
1236 */
1237 protected NamedAreaLevel makeNamedAreaLevel(MarkupImportState state, String levelString, XMLEvent next) {
1238 NamedAreaLevel level;
1239 try {
1240 level = state.getTransformer().getNamedAreaLevelByKey(levelString);
1241 if (level == null) {
1242 UUID levelUuid = state.getTransformer().getNamedAreaLevelUuid(levelString);
1243 if (levelUuid == null) {
1244 String message = "Unknown distribution locality class (named area level): %s. Create new level instead.";
1245 message = String.format(message, levelString);
1246 fireWarningEvent(message, next, 6);
1247 }
1248 level = getNamedAreaLevel(state, levelUuid, levelString, levelString, levelString, null);
1249 }
1250 } catch (UndefinedTransformerMethodException e) {
1251 throw new RuntimeException(e);
1252 }
1253 return level;
1254 }
1255
1256
1257 /**
1258 * @param state
1259 * @param areaName
1260 * @param level
1261 * @return
1262 */
1263 protected NamedArea makeArea(MarkupImportState state, String areaName, NamedAreaLevel level) {
1264
1265 //TODO FM vocabulary
1266 TermVocabulary<NamedArea> voc = null;
1267 NamedAreaType areaType = null;
1268
1269 NamedArea area = null;
1270 try {
1271 area = state.getTransformer().getNamedAreaByKey(areaName);
1272 } catch (UndefinedTransformerMethodException e) {
1273 throw new RuntimeException(e);
1274 }
1275 if (area == null){
1276 boolean isNewInState = false;
1277 UUID uuid = state.getAreaUuid(areaName);
1278 if (uuid == null){
1279 isNewInState = true;
1280 try {
1281 uuid = state.getTransformer().getNamedAreaUuid(areaName);
1282 if (uuid == null){
1283 uuid = UUID.randomUUID();
1284 state.putAreaUuid(areaName, uuid);
1285 }
1286 } catch (UndefinedTransformerMethodException e) {
1287 throw new RuntimeException(e);
1288 }
1289 }
1290
1291 CdmImportBase.TermMatchMode matchMode = CdmImportBase.TermMatchMode.UUID_LABEL;
1292 area = getNamedArea(state, uuid, areaName, areaName, areaName, areaType, level, voc, matchMode);
1293 if (isNewInState){
1294 state.putAreaUuid(areaName, area.getUuid());
1295
1296 //TODO just for testing -> make generic and move to better place
1297 String geoServiceLayer="vmap0_as_bnd_political_boundary_a";
1298 String layerFieldName ="nam";
1299
1300 if ("Bangka".equals(areaName)){
1301 String areaValue = "PULAU BANGKA#SUMATERA SELATAN";
1302 GeoServiceArea geoServiceArea = new GeoServiceArea();
1303 geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1304 this.editGeoService.setMapping(area, geoServiceArea);
1305 // save(area, state);
1306 }
1307 if ("Luzon".equals(areaName)){
1308 GeoServiceArea geoServiceArea = new GeoServiceArea();
1309
1310 List<String> list = Arrays.asList("HERMANA MAYOR ISLAND#CENTRAL LUZON",
1311 "HERMANA MENOR ISLAND#CENTRAL LUZON",
1312 "CENTRAL LUZON");
1313 for (String areaValue : list){
1314 geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1315 }
1316
1317 this.editGeoService.setMapping(area, geoServiceArea);
1318 // save(area, state);
1319 }
1320 if ("Mindanao".equals(areaName)){
1321 GeoServiceArea geoServiceArea = new GeoServiceArea();
1322
1323 List<String> list = Arrays.asList("NORTHERN MINDANAO",
1324 "SOUTHERN MINDANAO",
1325 "WESTERN MINDANAO");
1326 //TODO to be continued
1327 for (String areaValue : list){
1328 geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1329 }
1330
1331 this.editGeoService.setMapping(area, geoServiceArea);
1332 // save(area, state);
1333 }
1334 if ("Palawan".equals(areaName)){
1335 GeoServiceArea geoServiceArea = new GeoServiceArea();
1336
1337 List<String> list = Arrays.asList("PALAWAN#SOUTHERN TAGALOG");
1338 for (String areaValue : list){
1339 geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1340 }
1341
1342 this.editGeoService.setMapping(area, geoServiceArea);
1343 // save(area, state);
1344 }
1345
1346 }
1347 }
1348 return area;
1349 }
1350
1351
1352
1353 /**
1354 * Reads character data. Any element other than character data or the ending
1355 * tag will fire an unexpected element event.
1356 *
1357 * @see #getCData(MarkupImportState, XMLEventReader, XMLEvent, boolean)
1358 * @param state
1359 * @param reader
1360 * @param next
1361 * @return
1362 * @throws XMLStreamException
1363 */
1364 protected String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent next) throws XMLStreamException {
1365 return getCData(state, reader, next, true);
1366 }
1367
1368 /**
1369 * Reads character data. Any element other than character data or the ending
1370 * tag will fire an unexpected element event.
1371 *
1372 * @param state
1373 * @param reader
1374 * @param next
1375 * @param inlineMarkup map for inline markup, this is used for e.g. the locality markup within a subheading
1376 * The map will be filled by the markup element name as key. The value may be a String, a CdmBase or any other object.
1377 * If null any markup text will be neglected but a warning will be fired if they exist.
1378 * @param removeInlineMarkupText if true the markedup text will be removed from the returned String
1379 * @param checkAttributes
1380 * @return
1381 * @throws XMLStreamException
1382 */
1383 protected String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent parent, /*Map<String, Object> inlineMarkup, *boolean removeInlineMarkupText,*/ boolean checkAttributes) throws XMLStreamException {
1384 if (checkAttributes){
1385 checkNoAttributes(parent);
1386 }
1387
1388 String text = "";
1389 while (reader.hasNext()) {
1390 XMLEvent next = readNoWhitespace(reader);
1391 if (isMyEndingElement(next, parent)) {
1392 return text;
1393 } else if (next.isCharacters()) {
1394 text += next.asCharacters().getData();
1395 } else if (isStartingElement(next, FOOTNOTE_REF)){
1396 handleNotYetImplementedElement(next);
1397 // } else if (isStartingElement(next, LOCALITY)){
1398 // handleCDataLocality(state, reader, parent);
1399 } else {
1400 handleUnexpectedElement(next);
1401 }
1402 }
1403 throw new IllegalStateException("Event has no closing tag");
1404
1405 }
1406
1407 // private void handleCDataLocality(MarkupImportState state, XMLEventReader reader, XMLEvent parent) {
1408 // checkAndRemoveAttributeValue(attributes, attrName, value)
1409 //
1410 // }
1411
1412
1413
1414 /**
1415 * For it returns a pure CData annotation string. This behaviour may change in future. More complex annotations
1416 * should be handled differently.
1417 * @param state
1418 * @param reader
1419 * @param parentEvent
1420 * @return
1421 * @throws XMLStreamException
1422 */
1423 protected String handleSimpleAnnotation(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1424 String annotation = getCData(state, reader, parentEvent);
1425 return annotation;
1426 }
1427
1428 /**
1429 * True if text is single "." oder "," or ";" or ":"
1430 * @param text
1431 * @return
1432 */
1433 protected boolean isPunctuation(String text) {
1434 return text == null ? false : text.trim().matches("^[\\.,;:]$");
1435 }
1436
1437
1438 /**
1439 * Text indicating that type information is following but no information about the type of the type
1440 * @param text
1441 * @return
1442 */
1443 protected boolean charIsSimpleType(String text) {
1444 return text.matches("(?i)Type:");
1445 }
1446
1447 protected String getXmlTag(XMLEvent event) {
1448 String result;
1449 if (event.isStartElement()) {
1450 result = "<" + event.asStartElement().getName().getLocalPart()
1451 + ">";
1452 } else if (event.isEndElement()) {
1453 result = "</" + event.asEndElement().getName().getLocalPart() + ">";
1454 } else {
1455 String message = "Only start or end elements are allowed as Html tags";
1456 throw new IllegalStateException(message);
1457 }
1458 return result;
1459 }
1460
1461 protected WriterDataHolder handleWriter(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1462 String text = "";
1463 checkNoAttributes(parentEvent);
1464 WriterDataHolder dataHolder = new WriterDataHolder();
1465 List<FootnoteDataHolder> footnotes = new ArrayList<>();
1466
1467 // TODO handle attributes
1468 while (reader.hasNext()) {
1469 XMLEvent next = readNoWhitespace(reader);
1470 if (isMyEndingElement(next, parentEvent)) {
1471 text = CdmUtils.removeBrackets(text);
1472 if (checkMandatoryText(text, parentEvent)) {
1473 text = normalize(text);
1474 dataHolder.writer = text;
1475 dataHolder.footnotes = footnotes;
1476
1477 // Extension
1478 UUID uuidWriterExtension = MarkupTransformer.uuidWriterExtension;
1479 ExtensionType writerExtensionType =
1480 this.getExtensionType(state, uuidWriterExtension,"Writer", "writer", "writer");
1481 Extension extension = Extension.NewInstance();
1482 extension.setType(writerExtensionType);
1483 extension.setValue(text);
1484 dataHolder.extension = extension;
1485
1486 // Annotation
1487 UUID uuidWriterAnnotation = MarkupTransformer.uuidWriterAnnotation;
1488 AnnotationType writerAnnotationType = this.getAnnotationType(state, uuidWriterAnnotation, "Writer", "writer", "writer", null);
1489 Annotation annotation = Annotation.NewInstance(text, writerAnnotationType, getDefaultLanguage(state));
1490 dataHolder.annotation = annotation;
1491
1492 return dataHolder;
1493 } else {
1494 return null;
1495 }
1496 } else if (isStartingElement(next, FOOTNOTE_REF)) {
1497 FootnoteDataHolder footNote = handleFootnoteRef(state, reader, next);
1498 if (footNote.isRef()) {
1499 footnotes.add(footNote);
1500 } else {
1501 logger.warn("Non ref footnotes not yet impelemnted");
1502 }
1503 } else if (next.isCharacters()) {
1504 text += next.asCharacters().getData();
1505
1506 } else {
1507 handleUnexpectedElement(next);
1508 state.setUnsuccessfull();
1509 }
1510 }
1511 throw new IllegalStateException("<writer> has no end tag");
1512 }
1513
1514
1515 protected void registerFootnotes(MarkupImportState state, AnnotatableEntity entity, List<FootnoteDataHolder> footnotes) {
1516 for (FootnoteDataHolder footNote : footnotes) {
1517 registerFootnoteDemand(state, entity, footNote);
1518 }
1519 }
1520
1521
1522 private void registerFootnoteDemand(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
1523 FootnoteDataHolder existingFootnote = state.getFootnote(footnote.ref);
1524 if (existingFootnote != null) {
1525 attachFootnote(state, entity, existingFootnote);
1526 } else {
1527 Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.ref);
1528 if (demands == null) {
1529 demands = new HashSet<>();
1530 state.putFootnoteDemands(footnote.ref, demands);
1531 }
1532 demands.add(entity);
1533 }
1534 }
1535
1536
1537 protected void attachFootnote(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
1538 AnnotationType annotationType = this.getAnnotationType(state, MarkupTransformer.uuidFootnote, "Footnote", "An e-flora footnote", "fn", null);
1539 Annotation annotation = Annotation.NewInstance(footnote.string, annotationType, getDefaultLanguage(state));
1540 // TODO transient objects
1541 entity.addAnnotation(annotation);
1542 save(entity, state);
1543 }
1544
1545
1546 protected void attachFigure(MarkupImportState state, XMLEvent next, AnnotatableEntity entity, Media figure) {
1547 // IdentifiableEntity<?> toSave;
1548 if (entity.isInstanceOf(TextData.class)) {
1549 TextData deb = CdmBase.deproxy(entity, TextData.class);
1550 deb.addMedia(figure);
1551 // toSave = ((TaxonDescription)deb.getInDescription()).getTaxon();
1552 } else if (entity.isInstanceOf(SpecimenOrObservationBase.class)) {
1553 String message = "figures for specimen should be handled as Textdata";
1554 fireWarningEvent(message, next, 4);
1555 // toSave = ime;
1556 } else if (entity.isInstanceOf(IdentifiableMediaEntity.class)) {
1557 IdentifiableMediaEntity<?> ime = CdmBase.deproxy(entity, IdentifiableMediaEntity.class);
1558 ime.addMedia(figure);
1559 // toSave = ime;
1560 } else {
1561 String message = "Unsupported entity to attach media: %s";
1562 message = String.format(message, entity.getClass().getName());
1563 // toSave = null;
1564 }
1565 save(entity, state);
1566 }
1567
1568
1569 protected void registerGivenFootnote(MarkupImportState state, FootnoteDataHolder footnote) {
1570 state.registerFootnote(footnote);
1571 Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.id);
1572 if (demands != null) {
1573 for (AnnotatableEntity entity : demands) {
1574 attachFootnote(state, entity, footnote);
1575 }
1576 }
1577 }
1578
1579
1580 protected FootnoteDataHolder handleFootnote(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
1581 MarkupSpecimenImport specimenImport, MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1582 FootnoteDataHolder result = new FootnoteDataHolder();
1583 Map<String, Attribute> attributes = getAttributes(parentEvent);
1584 result.id = getAndRemoveAttributeValue(attributes, ID);
1585 // result.ref = getAndRemoveAttributeValue(attributes, REF);
1586 checkNoAttributes(attributes, parentEvent);
1587
1588 while (reader.hasNext()) {
1589 XMLEvent next = readNoWhitespace(reader);
1590 if (isStartingElement(next, FOOTNOTE_STRING)) {
1591 String string = handleFootnoteString(state, reader, next, specimenImport, nomenclatureImport);
1592 result.string = string;
1593 } else if (isMyEndingElement(next, parentEvent)) {
1594 return result;
1595 } else {
1596 fireUnexpectedEvent(next, 0);
1597 }
1598 }
1599 return result;
1600 }
1601
1602
1603 protected Media handleFigure(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
1604 MarkupSpecimenImport specimenImport, MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1605 // FigureDataHolder result = new FigureDataHolder();
1606
1607 Map<String, Attribute> attributes = getAttributes(parentEvent);
1608 String id = getAndRemoveAttributeValue(attributes, ID);
1609 String type = getAndRemoveAttributeValue(attributes, TYPE);
1610 String urlAttr = getAndRemoveAttributeValue(attributes, URL);
1611 checkNoAttributes(attributes, parentEvent);
1612
1613 String urlString = null;
1614 String legendString = null;
1615 String titleString = null;
1616 String numString = null;
1617 String text = null;
1618 if (isNotBlank(urlAttr)){
1619 urlString = CdmUtils.Nz(state.getBaseMediaUrl()) + urlAttr;
1620 }
1621 while (reader.hasNext()) {
1622 XMLEvent next = readNoWhitespace(reader);
1623 if (isMyEndingElement(next, parentEvent)) {
1624 if (isNotBlank(text)){
1625 if (isNeglectableFigureText(text)){
1626 fireWarningEvent("Text not yet handled for figures: " + text, next, 4);
1627 }
1628 }
1629 Media media = makeFigure(state, id, type, urlString, legendString, titleString, numString, next);
1630 return media;
1631 } else if (isStartingElement(next, FIGURE_LEGEND)) {
1632 // TODO same as figure string ?
1633 legendString = handleFootnoteString(state, reader, next, specimenImport, nomenclatureImport);
1634 } else if (isStartingElement(next, FIGURE_TITLE)) {
1635 titleString = getCData(state, reader, next);
1636 } else if (isStartingElement(next, URL)) {
1637 String localUrl = getCData(state, reader, next);
1638 String url = CdmUtils.Nz(state.getBaseMediaUrl()) + localUrl;
1639 if (isBlank(urlString)){
1640 urlString = url;
1641 }
1642 if (! url.equals(urlString)){
1643 String message = "URL attribute and URL element differ. Attribute: %s, Element: %s";
1644 fireWarningEvent(String.format(message, urlString, url), next, 2);
1645 }
1646 } else if (isStartingElement(next, NUM)) {
1647 numString = getCData(state, reader, next);
1648 } else if (next.isCharacters()) {
1649 text = CdmUtils.concat("", text, next.asCharacters().getData());
1650 } else {
1651 fireUnexpectedEvent(next, 0);
1652 }
1653 }
1654 throw new IllegalStateException("<figure> has no end tag");
1655 }
1656
1657
1658 /**
1659 * @param text2
1660 * @return
1661 */
1662 private boolean isNeglectableFigureText(String text) {
1663 if (text.matches("Fig\\.*")){
1664 return true;
1665 }else{
1666 return false;
1667 }
1668 }
1669
1670
1671 /**
1672 * @param state
1673 * @param id
1674 * @param type
1675 * @param urlString
1676 * @param legendString
1677 * @param titleString
1678 * @param numString
1679 * @param next
1680 */
1681 private Media makeFigure(MarkupImportState state, String id, String type, String urlString,
1682 String legendString, String titleString, String numString, XMLEvent next) {
1683 Media media = null;
1684 // boolean isFigure = false; //no difference between figure and media since v3.3
1685 try {
1686 //TODO maybe everything is a figure as it is all taken from a book
1687 if ("lineart".equals(type)) {
1688 // isFigure = true;
1689 // media = Figure.NewInstance(url.toURI(), null, null, null);
1690 } else if (type == null || "photo".equals(type)
1691 || "signature".equals(type)
1692 || "others".equals(type)) {
1693 //TODO
1694 } else {
1695 String message = "Unknown figure type '%s'";
1696 message = String.format(message, type);
1697 fireWarningEvent(message, next, 2);
1698 }
1699 media = docImport.getImageMedia(urlString, docImport.getReadMediaData());
1700
1701 if (media != null){
1702 // title
1703 if (StringUtils.isNotBlank(titleString)) {
1704 media.putTitle(getDefaultLanguage(state), titleString);
1705 }
1706 // legend
1707 if (StringUtils.isNotBlank(legendString)) {
1708 media.putDescription(getDefaultLanguage(state), legendString);
1709 }
1710 if (StringUtils.isNotBlank(numString)) {
1711 // TODO use concrete source (e.g. DAPHNIPHYLLACEAE in FM
1712 // vol.13)
1713 Reference citation = state.getConfig().getSourceReference();
1714 media.addSource(OriginalSourceType.Import, numString, "num", citation, null);
1715 // TODO name used in source if available
1716 }
1717 // TODO which citation
1718 if (StringUtils.isNotBlank(id)) {
1719 media.addSource(OriginalSourceType.Import, id, null, state.getConfig().getSourceReference(), null);
1720 } else {
1721 String message = "Figure id should never be empty or null";
1722 fireWarningEvent(message, next, 6);
1723 }
1724
1725 // text
1726 // do nothing
1727 registerGivenFigure(state, next, id, media);
1728
1729 }else{
1730 String message = "No media found: ";
1731 fireWarningEvent(message, next, 4);
1732 }
1733 } catch (MalformedURLException e) {
1734 String message = "Media uri has incorrect syntax: %s";
1735 message = String.format(message, urlString);
1736 fireWarningEvent(message, next, 4);
1737 // } catch (URISyntaxException e) {
1738 // String message = "Media uri has incorrect syntax: %s";
1739 // message = String.format(message, urlString);
1740 // fireWarningEvent(message, next, 4);
1741 }
1742
1743 return media;
1744 }
1745
1746
1747 private void registerGivenFigure(MarkupImportState state, XMLEvent next, String id, Media figure) {
1748 state.registerFigure(id, figure);
1749 Set<AnnotatableEntity> demands = state.getFigureDemands(id);
1750 if (demands != null) {
1751 for (AnnotatableEntity entity : demands) {
1752 attachFigure(state, next, entity, figure);
1753 }
1754 }
1755 save(figure, state);
1756 }
1757
1758
1759 private FootnoteDataHolder handleFootnoteRef(MarkupImportState state,
1760 XMLEventReader reader, XMLEvent parentEvent)
1761 throws XMLStreamException {
1762 FootnoteDataHolder result = new FootnoteDataHolder();
1763 Map<String, Attribute> attributes = getAttributes(parentEvent);
1764 result.ref = getAndRemoveAttributeValue(attributes, REF);
1765 checkNoAttributes(attributes, parentEvent);
1766
1767 // text is not handled, needed only for debugging purposes
1768 String text = "";
1769 while (reader.hasNext()) {
1770 XMLEvent next = readNoWhitespace(reader);
1771 // if (isStartingElement(next, FOOTNOTE_STRING)){
1772 // String string = handleFootnoteString(state, reader, next);
1773 // result.string = string;
1774 // }else
1775 if (isMyEndingElement(next, parentEvent)) {
1776 if (StringUtils.isNotBlank(text)){
1777 fireWarningEvent("text is not empty but not handled during import", parentEvent, 4);
1778 }
1779 return result;
1780 } else if (next.isCharacters() && unhandledElements.isEmpty()) {
1781 text += next.asCharacters().getData();
1782 } else if (isStartingElement(next, NUM)) {
1783 //ignore numbering of footnotes as they are numbered differently in the CDM
1784 handleIgnoreElement(next);
1785 } else {
1786 handleUnexpectedElement(next);
1787 }
1788 }
1789 return result;
1790 }
1791
1792
1793
1794 private String handleFootnoteString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, MarkupSpecimenImport specimenImport, MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1795 boolean isTextMode = true;
1796 String text = "";
1797 while (reader.hasNext()) {
1798 XMLEvent next = readNoWhitespace(reader);
1799 if (isMyEndingElement(next, parentEvent)) {
1800 return text;
1801 } else if (next.isEndElement()) {
1802 if (isEndingElement(next, FULL_NAME)) {
1803 popUnimplemented(next.asEndElement());
1804 } else if (isEndingElement(next, BR)) {
1805 isTextMode = true;
1806 } else if (isHtml(next)) {
1807 text += getXmlTag(next);
1808 } else {
1809 handleUnexpectedEndElement(next.asEndElement());
1810 }
1811 } else if (next.isStartElement()) {
1812 if (isStartingElement(next, FULL_NAME)) {
1813 handleNotYetImplementedElement(next);
1814 } else if (isStartingElement(next, GATHERING)) {
1815 text += specimenImport.handleInLineGathering(state, reader, next);
1816 } else if (isStartingElement(next, REFERENCES)) {
1817 text += " " + handleInLineReferences(state, reader, next, nomenclatureImport) + " ";
1818 } else if (isStartingElement(next, BR)) {
1819 text += "<br/>";
1820 isTextMode = false;
1821 } else if (isStartingElement(next, NOMENCLATURE)) {
1822 handleNotYetImplementedElement(next);
1823 } else if (isHtml(next)) {
1824 text += getXmlTag(next);
1825 } else {
1826 handleUnexpectedStartElement(next.asStartElement());
1827 }
1828 } else if (next.isCharacters()) {
1829 if (!isTextMode) {
1830 String message = "footnoteString is not in text mode";
1831 fireWarningEvent(message, next, 6);
1832 } else {
1833 text += next.asCharacters().getData().trim();
1834 // getCData(state, reader, next); does not work as we have inner tags like <references>
1835 }
1836 } else {
1837 handleUnexpectedEndElement(next.asEndElement());
1838 }
1839 }
1840 throw new IllegalStateException("<footnoteString> has no closing tag");
1841
1842 }
1843
1844 private static final List<String> htmlList = Arrays.asList("sub", "sup",
1845 "ol", "ul", "li", "i", "b", "table", "br","tr","td","th");
1846
1847 protected boolean isHtml(XMLEvent event) {
1848 if (event.isStartElement()) {
1849 String tag = event.asStartElement().getName().getLocalPart();
1850 return htmlList.contains(tag);
1851 } else if (event.isEndElement()) {
1852 String tag = event.asEndElement().getName().getLocalPart();
1853 return htmlList.contains(tag);
1854 } else {
1855 return false;
1856 }
1857
1858 }
1859
1860
1861 private String handleInLineReferences(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent,
1862 MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1863 checkNoAttributes(parentEvent);
1864
1865 boolean hasReference = false;
1866 String text = "";
1867 while (reader.hasNext()) {
1868 XMLEvent next = readNoWhitespace(reader);
1869 if (isMyEndingElement(next, parentEvent)) {
1870 checkMandatoryElement(hasReference, parentEvent.asStartElement(), REFERENCE);
1871 return text;
1872 } else if (isStartingElement(next, REFERENCE)) {
1873 text += handleInLineReference(state, reader, next, nomenclatureImport);
1874 hasReference = true;
1875 } else {
1876 handleUnexpectedElement(next);
1877 }
1878 }
1879 throw new IllegalStateException("<References> has no closing tag");
1880 }
1881
1882 private String handleInLineReference(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent, MarkupNomenclatureImport nomenclatureImport)throws XMLStreamException {
1883 Reference reference = nomenclatureImport.handleReference(state, reader, parentEvent);
1884 fireWarningEvent("Check correct usage of inline reference", parentEvent, 3);
1885 IntextReference intext = IntextReference.NewInstance(reference, null, 0, 0);
1886 save(reference, state);
1887 return intext.toInlineString(reference.getTitleCache());
1888 }
1889
1890 protected class SubheadingResult{
1891 String text;
1892 StringReferences references;
1893 List<IntextReference> inlineReferences;
1894 }
1895
1896 /**
1897 * Handle < string > .
1898 * @param state
1899 * @param reader
1900 * @param parentEvent
1901 * @param feature only needed for distributionLocalities
1902 * @return
1903 * @throws XMLStreamException
1904 */
1905 protected Map<String, SubheadingResult> handleString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature feature)throws XMLStreamException {
1906 // attributes
1907 String classValue = getClassOnlyAttribute(parentEvent, false);
1908 if (StringUtils.isNotBlank(classValue)) {
1909 String message = "class attribute for <string> not yet implemented";
1910 fireWarningEvent(message, parentEvent, 2);
1911 }
1912 boolean isHabitat = false;
1913
1914 // subheadings
1915 Map<String, SubheadingResult> subHeadingMap = new HashMap<>();
1916 String currentSubheading = null;
1917
1918 boolean isTextMode = true;
1919 String text = "";
1920 StringReferences currentReferences = null;
1921 List<IntextReference> inlineReferences = new ArrayList<>();
1922 boolean lastWasReference = false;
1923 while (reader.hasNext()) {
1924 XMLEvent next = readNoWhitespace(reader);
1925 if (isMyEndingElement(next, parentEvent)) {
1926 putCurrentSubheading(subHeadingMap, currentSubheading, text, currentReferences, inlineReferences);
1927 return subHeadingMap;
1928 }
1929 //check if last event was reference
1930 if (lastWasReference && !isStartingElement(next, BR) && !isEndingElement(next, BR)
1931 && !isStartingElement(next, SUB_HEADING)){
1932 for (LabeledReference labeledRef : currentReferences.content){
1933 if (labeledRef.ref != null){
1934 IntextReference intext = IntextReference.NewInstance(labeledRef.ref, null, 0, 0);
1935 inlineReferences.add(intext);
1936 text += intext.toInlineString(labeledRef.label);
1937 }else{
1938 text += labeledRef.label;
1939 }
1940 }
1941 lastWasReference = false;
1942 }
1943 if (isStartingElement(next, BR)) {
1944 text += "<br/>";
1945 isTextMode = false;
1946 } else if (isEndingElement(next, BR)) {
1947 isTextMode = true;
1948 } else if (isHtml(next)) {
1949 text += getXmlTag(next);
1950 } else if (isStartingElement(next, SUB_HEADING)) {
1951 text = putCurrentSubheading(subHeadingMap, currentSubheading, text, currentReferences, inlineReferences);
1952 currentReferences = null;
1953 inlineReferences = new ArrayList<>();
1954 lastWasReference = false;
1955 // TODO footnotes
1956 currentSubheading = getCData(state, reader, next).trim();
1957 } else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
1958 if (feature != null && !feature.equals(Feature.DISTRIBUTION())) {
1959 String message = "Distribution locality only allowed for feature of type 'distribution'";
1960 fireWarningEvent(message, next, 4);
1961 }
1962 text += handleDistributionLocality(state, reader, next);
1963 } else if (next.isCharacters()) {
1964 if (! isTextMode) {
1965 String message = "String is not in text mode";
1966 fireWarningEvent(message, next, 6);
1967 } else {
1968 text += next.asCharacters().getData();
1969 }
1970 } else if (isStartingElement(next, HEADING)) {
1971 //TODO
1972 handleNotYetImplementedElement(next);
1973 } else if (isStartingElement(next, VERNACULAR_NAMES)) {
1974 //TODO
1975 handleNotYetImplementedElement(next);
1976 } else if (isStartingElement(next, QUOTE)) {
1977 //TODO
1978 handleNotYetImplementedElement(next);
1979 } else if (isStartingElement(next, DEDICATION)) {
1980 //TODO
1981 handleNotYetImplementedElement(next);
1982 } else if (isStartingElement(next, TAXONTYPE)) {
1983 //TODO
1984 handleNotYetImplementedElement(next);
1985 } else if (isStartingElement(next, FULL_NAME)) {
1986 //TODO
1987 handleNotYetImplementedElement(next);
1988 }else if (isStartingElement(next, REFERENCES)) {
1989 if (currentReferences != null){
1990 fireWarningEvent("References do already exist", next, 2);
1991 }
1992 currentReferences = handleStringReferences(state, reader, next);
1993 lastWasReference = true;
1994 }else if (isStartingElement(next, REFERENCE)) {
1995 //TODO
1996 handleNotYetImplementedElement(next);
1997 } else if (isStartingElement(next, GATHERING)) {
1998 //TODO
1999 handleNotYetImplementedElement(next);
2000 } else if (isStartingElement(next, ANNOTATION)) {
2001 //TODO //TODO test handleSimpleAnnotation
2002 handleNotYetImplementedElement(next);
2003 } else if (isStartingElement(next, HABITAT)) {
2004 text += featureImport.handleHabitat(state, reader, next);
2005 isHabitat = true;
2006 } else if (isStartingElement(next, FIGURE_REF)) {
2007 //TODO
2008 handleNotYetImplementedElement(next);
2009 } else if (isStartingElement(next, FIGURE)) {
2010 //TODO
2011 handleNotYetImplementedElement(next);
2012 } else if (isStartingElement(next, FOOTNOTE_REF)) {
2013 //TODO
2014 handleNotYetImplementedElement(next);
2015 } else if (isStartingElement(next, FOOTNOTE)) {
2016 //TODO
2017 handleNotYetImplementedElement(next);
2018 } else if (isStartingElement(next, WRITER)) {
2019 //TODO
2020 handleNotYetImplementedElement(next);
2021 } else if (isStartingElement(next, DATES)) {
2022 //TODO
2023 handleNotYetImplementedElement(next);
2024 } else if (isStartingElement(next, TO_KEY)) {
2025 handleNotYetImplementedElement(next);
2026 } else {
2027 handleUnexpectedElement(next);
2028 }
2029 }
2030 throw new IllegalStateException("<String> has no closing tag");
2031 }
2032
2033
2034 /**
2035 * container class more or less representing a list of labeled references
2036 */
2037 protected class StringReferences{
2038 String subheading;
2039 List<LabeledReference> content = new ArrayList<>() ; //either String or LabeledReference
2040 @Override
2041 public String toString(){
2042 String result = null;
2043 for (LabeledReference labRef : content){
2044 result = CdmUtils.concat("", labRef.label);
2045 }
2046 return result;
2047 }
2048 public List<LabeledReference> getReferences() {
2049 List<LabeledReference> result = new ArrayList<>();
2050 for (LabeledReference labRef : content){
2051 if (labRef.ref != null){
2052 result.add(labRef);
2053 }
2054 }
2055 return result;
2056 }
2057 }
2058
2059 protected class LabeledReference{
2060 public LabeledReference(Reference ref, String detail, String label) {
2061 this.ref = ref; this.detail = detail; this.label = label;
2062 }
2063 protected Reference ref; //if null, this LabeledReference represents only a string in between references
2064 protected String detail; //micro reference
2065 protected String label;
2066 }
2067
2068 private StringReferences handleStringReferences(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
2069 checkNoAttributes(parentEvent);
2070 StringReferences result = new StringReferences();
2071 while (reader.hasNext()) {
2072 XMLEvent next = readNoWhitespace(reader);
2073 if (isMyEndingElement(next, parentEvent)) {
2074 return result;
2075 } else if (isStartingElement(next, SUB_HEADING)) {
2076 String subheading = getCData(state, reader, next);
2077 if (!subheading.matches("(References?|Literature):?")){
2078 fireWarningEvent("Subheading for references not recognized: " + subheading, next, 4);
2079 }
2080 result.subheading = subheading;
2081 } else if (isStartingElement(next, REFERENCE)) {
2082 handleInlineReference(state, reader, next, result);
2083 } else {
2084 handleUnexpectedElement(next);
2085 }
2086 }
2087 throw new IllegalStateException("<References> has no closing tag");
2088 }
2089
2090 private void handleInlineReference(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
2091 StringReferences result) throws XMLStreamException {
2092 checkNoAttributes(parentEvent);
2093 boolean hasRefPart = false;
2094 Map<String, String> refMap = new HashMap<>();
2095 String label = "";
2096 while (reader.hasNext()) {
2097 XMLEvent next = readNoWhitespace(reader);
2098 if (isMyEndingElement(next, parentEvent)) {
2099 checkMandatoryElement(hasRefPart, parentEvent.asStartElement(), REF_PART);
2100 String details = refMap.get(DETAILS);
2101 // String label = makeLabel(state, refMap, next);
2102 Reference ref = createReference(state, refMap, next);
2103 ref = state.getDeduplicationHelper(docImport).getExistingReference(state, ref);
2104
2105 String label2 = ref.getTitleCache(); //TODO preliminary for debugging and testing
2106 result.content.add(new LabeledReference(ref, details, label));
2107 return;
2108 } else if (isStartingElement(next, REF_PART)) {
2109 String classValue = handleRefPart(state, reader, next, refMap);
2110 String text = refMap.get(classValue);
2111 if (classValue.equals(YEAR)){
2112 text = "("+text+")";
2113 }
2114 hasRefPart = true;
2115 label = CdmUtils.concat(" ", label, text);
2116 } else {
2117 handleUnexpectedElement(next);
2118 }
2119 }
2120 throw new IllegalStateException("<References> has no closing tag");
2121
2122 }
2123
2124
2125 // this is more or less a duplicate Nomenclature import, maybe merge later
2126 private Reference createReference(MarkupImportState state,
2127 Map<String, String> refMap, XMLEvent parentEvent) {
2128
2129 Reference reference;
2130
2131 String type = getAndRemoveMapKey(refMap, PUBTYPE);
2132 String authorStr = getAndRemoveMapKey(refMap, AUTHOR);
2133 String titleStr = getAndRemoveMapKey(refMap, PUBTITLE);
2134 String titleCache = getAndRemoveMapKey(refMap, PUBFULLNAME);
2135 String volume = getAndRemoveMapKey(refMap, VOLUME);
2136 String edition = getAndRemoveMapKey(refMap, EDITION);
2137 String editors = getAndRemoveMapKey(refMap, EDITORS);
2138 String year = getAndRemoveMapKey(refMap, YEAR);
2139 String pubName = getAndRemoveMapKey(refMap, PUBNAME);
2140 String pages = getAndRemoveMapKey(refMap, PAGES);
2141 String publication = getAndRemoveMapKey(refMap, PUBLOCATION);
2142 String publisher = getAndRemoveMapKey(refMap, PUBLISHER);
2143 String appendix = getAndRemoveMapKey(refMap, APPENDIX);
2144 String issue = getAndRemoveMapKey(refMap, ISSUE);
2145
2146 reference = handleNonCitationSpecific(state, type, authorStr, titleStr,
2147 titleCache, volume, issue, edition, editors, pubName, appendix, pages, parentEvent);
2148
2149 //year
2150 VerbatimTimePeriod timeperiod = TimePeriodParser.parseStringVerbatim(year);
2151 if (reference.getType().equals(ReferenceType.BookSection)){
2152 reference.getInBook().setDatePublished(timeperiod);
2153 }
2154 reference.setDatePublished(timeperiod);
2155
2156 //Quickfix for these 2 attributes (publication, publisher) used in feature.references
2157 Reference inRef = reference.getInReference() == null ? reference : reference.getInReference();
2158 //publication
2159 if (isNotBlank(publisher)){
2160 inRef.setPublisher(publisher);
2161 }
2162
2163 //publisher
2164 if (isNotBlank(publication)){
2165 inRef.setPlacePublished(publication);
2166 }
2167
2168 // TODO
2169 String[] unhandledList = new String[] { ALTERNATEPUBTITLE, NOTES, STATUS };
2170 for (String unhandled : unhandledList) {
2171 String value = getAndRemoveMapKey(refMap, unhandled);
2172 if (isNotBlank(value)) {
2173 this.handleNotYetImplementedAttributeValue(parentEvent, CLASS, unhandled);
2174 }
2175 }
2176
2177 for (String key : refMap.keySet()) {
2178 if (!DETAILS.equalsIgnoreCase(key)) {
2179 this.fireUnexpectedAttributeValue(parentEvent, CLASS, key);
2180 }
2181 }
2182
2183 return reference;
2184 }
2185
2186
2187 /**
2188 * Create reference for non nomenclatural references
2189 * @return
2190 */
2191 protected Reference handleNonCitationSpecific(MarkupImportState state, String type, String authorStr,
2192 String titleStr, String titleCache, String volume, String issue, String edition,
2193 String editors, String pubName, String appendix, String pages, XMLEvent parentEvent) {
2194
2195 Reference reference;
2196
2197 //volume / issue
2198 if (isBlank(volume) && isNotBlank(issue)){
2199 String message = "Issue ('"+issue+"') exists but no volume";
2200 fireWarningEvent(message, parentEvent, 4);
2201 volume = issue;
2202 }else if (isNotBlank(issue)){
2203 volume = volume + "("+ issue + ")";
2204 }
2205
2206 //pubName / appendix
2207 if (isNotBlank(appendix)){
2208 pubName = pubName == null ? appendix : (pubName + " " + appendix).replaceAll(" ", " ");
2209 }
2210
2211 if (isArticleNonCitation(type, pubName, volume, editors)) {
2212 IArticle article = ReferenceFactory.newArticle();
2213 if (pubName != null) {
2214 IJournal journal = ReferenceFactory.newJournal();
2215 journal.setTitle(pubName);
2216 article.setInJournal(journal);
2217 }else{
2218 fireWarningEvent("Article has no journal", parentEvent, 4);
2219 }
2220 reference = (Reference) article;
2221 } else {
2222 if (isBookSection(type, authorStr, titleStr, editors, pubName, volume)){
2223 IBookSection bookSection = ReferenceFactory.newBookSection();
2224 if (pubName != null) {
2225 IBook book = ReferenceFactory.newBook();
2226 book.setTitle(pubName);
2227 bookSection.setInBook(book);
2228 }
2229 reference = (Reference)bookSection;
2230 }else{
2231 //??
2232 Reference bookOrPartOf = ReferenceFactory.newGeneric();
2233 if (pubName != null && titleStr != null) {
2234 Reference inReference = ReferenceFactory.newGeneric();
2235 inReference.setTitle(pubName);
2236 bookOrPartOf.setInReference(inReference);
2237 }
2238 reference = bookOrPartOf;
2239 }
2240 }
2241
2242 //author
2243 TeamOrPersonBase<?> author = createAuthor(state, authorStr);
2244 reference.setAuthorship(author);
2245
2246 //title
2247 reference.setTitle(titleStr);
2248 if (StringUtils.isNotBlank(titleCache)) {
2249 reference.setTitleCache(titleCache, true);
2250 }
2251
2252 //edition
2253 if(reference.getInReference() != null){
2254 reference.getInReference().setEdition(edition);
2255 reference.getInReference().setEditor(editors);
2256 }else{
2257 //edition
2258 reference.setEdition(edition);
2259 reference.setEditor(editors);
2260 }
2261
2262 //volume
2263 reference.setVolume(volume);
2264
2265 //pages
2266 reference.setPages(pages);
2267
2268 return reference;
2269 }
2270
2271 private boolean isBookSection(String type, String authorStr, String pubTitle,
2272 String editors, String pubName, String volume) {
2273 //type not yet handled
2274 if (authorStr != null && editors != null
2275 && pubTitle != null && pubName != null){
2276 return true;
2277 }else if (pubTitle != null && pubName != null && volume == null){
2278 return true;
2279 }else{
2280 return false;
2281 }
2282 }
2283
2284
2285 private boolean isArticleNonCitation(String type, String pubName, String volume, String editors) {
2286 if ("journal".equalsIgnoreCase(type)){
2287 return true;
2288 }else if (volume != null && editors == null){
2289 if (pubName != null && IJournal.guessIsJournalName(pubName)){
2290 return true;
2291 }else{
2292 return false; //unclear
2293 }
2294 }else{
2295 return false;
2296 }
2297 }
2298
2299 protected String handleRefPart(MarkupImportState state, XMLEventReader reader,
2300 XMLEvent parentEvent, Map<String, String> refMap)
2301 throws XMLStreamException {
2302 String classValue = getClassOnlyAttribute(parentEvent);
2303
2304 String text = "";
2305 while (reader.hasNext()) {
2306 XMLEvent next = readNoWhitespace(reader);
2307 if (isMyEndingElement(next, parentEvent)) {
2308 refMap.put(classValue, text);
2309 return classValue;
2310 } else if (next.isStartElement()) {
2311 if (isStartingElement(next, ANNOTATION)) {
2312 handleNotYetImplementedElement(next); // TODO test handleSimpleAnnotation
2313 } else if (isStartingElement(next, ITALICS)) {
2314 handleNotYetImplementedElement(next);
2315 } else if (isStartingElement(next, BOLD)) {
2316 handleNotYetImplementedElement(next);
2317 } else {
2318 handleUnexpectedStartElement(next.asStartElement());
2319 }
2320 } else if (next.isCharacters()) {
2321 text += next.asCharacters().getData();
2322 } else {
2323 handleUnexpectedEndElement(next.asEndElement());
2324 }
2325 }
2326 throw new IllegalStateException("RefPart has no closing tag");
2327 }
2328
2329
2330 private boolean isBlankOrPunctuation(String text) {
2331 if (text == null){
2332 return true;
2333 } else {
2334 return text.matches("^[\\s\\.,;:]*$");
2335 }
2336 }
2337
2338
2339 /**
2340 *Is heading an "habitat" type heading
2341 * @param heading
2342 * @return true if heading matches something like Eco(logy), Habitat(s) or Habitat & Ecology
2343 */
2344 private boolean isHabitatHeading(String heading) {
2345 return heading.trim().matches("(Ecol(ogy)?|Habitat|Habitat\\s&\\sEcology)\\.?");
2346 }
2347
2348
2349 private String putCurrentSubheading(Map<String, SubheadingResult> subHeadingMap, String currentSubheading,
2350 String text, StringReferences fullReferences, List<IntextReference> inlineReferences) {
2351 if (isNotBlank(text) || (fullReferences != null && isNotEmptyCollection(fullReferences.content))
2352 ||isNotEmptyCollection(inlineReferences)) {
2353 SubheadingResult result = new SubheadingResult();
2354 text = removeStartingMinus(text);
2355 result.text = text.trim();
2356 result.references = fullReferences == null ? new StringReferences() : fullReferences;
2357 result.inlineReferences = inlineReferences;
2358 subHeadingMap.put(currentSubheading, result);
2359 }
2360 return "";
2361 }
2362
2363 /**
2364 * @param references2
2365 * @return
2366 */
2367 protected boolean isNotEmptyCollection(Collection<?> list) {
2368 return list != null && !list.isEmpty();
2369 }
2370
2371
2372 private String removeStartingMinus(String string) {
2373 string = replaceStart(string, "-");
2374 string = replaceStart(string, "\u002d");
2375 string = replaceStart(string, "\u2013");
2376 string = replaceStart(string, "\u2014");
2377 string = replaceStart(string, "--");
2378 return string;
2379 }
2380
2381
2382 /**
2383 * @param value
2384 * @param replacementString
2385 */
2386 private String replaceStart(String value, String replacementString) {
2387 if (value.startsWith(replacementString) ){
2388 value = value.substring(replacementString.length()).trim();
2389 }
2390 while (value.startsWith("-") || value.startsWith("\u2014") ){
2391 value = value.substring("-".length()).trim();
2392 }
2393 return value;
2394 }
2395
2396
2397 private String handleDistributionLocality(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
2398 Map<String, Attribute> attributes = getAttributes(parentEvent);
2399 String classValue = getAndRemoveRequiredAttributeValue(parentEvent, attributes, CLASS);
2400 String statusValue =getAndRemoveAttributeValue(attributes, STATUS);
2401 String frequencyValue =getAndRemoveAttributeValue(attributes, FREQUENCY);
2402
2403 Taxon taxon = state.getCurrentTaxon();
2404 // TODO which ref to take?
2405 Reference sourceReference = state.getConfig().getSourceReference();
2406
2407 String text = "";
2408 while (reader.hasNext()) {
2409 XMLEvent next = readNoWhitespace(reader);
2410 if (isMyEndingElement(next, parentEvent)) {
2411 if (StringUtils.isNotBlank(text)) {
2412 String label = CdmUtils.removeTrailingDot(normalize(text));
2413 TaxonDescription description = getExtractedMarkupMarkedDescription(state, taxon, sourceReference);
2414 NamedAreaLevel level = makeNamedAreaLevel(state,classValue, next);
2415
2416 //status
2417 PresenceAbsenceTerm status = null;
2418 if (isNotBlank(statusValue)){
2419 try {
2420 status = state.getTransformer().getPresenceTermByKey(statusValue);
2421 if (status == null){
2422 UUID uuid = state.getTransformer().getPresenceTermUuid(statusValue);
2423 if (uuid != null){
2424 status = this.getPresenceAbsenceTerm(state, uuid, statusValue, statusValue, statusValue, false, null);
2425 }
2426 }
2427 if (status == null){
2428 //TODO
2429 String message = "The presence/absence status '%s' could not be transformed to an CDM status";
2430 fireWarningEvent(String.format(message, statusValue), next, 4);
2431 }
2432 } catch (UndefinedTransformerMethodException e) {
2433 throw new RuntimeException(e);
2434 }
2435 }else{
2436 status = PresenceAbsenceTerm.PRESENT();
2437 }
2438 //frequency
2439 if (isNotBlank(frequencyValue)){
2440 if (frequencyValue.equalsIgnoreCase("absent") && PresenceAbsenceTerm.PRESENT().equals(status)){ //to be on the safe side that not real status has been defined yet.
2441 status = PresenceAbsenceTerm.ABSENT();
2442 }else{
2443 String message = "The frequency attribute is currently not yet available in CDM";
2444 fireWarningEvent(message, parentEvent, 6);
2445 }
2446 }
2447
2448 NamedArea higherArea = null;
2449 List<NamedArea> areas = new ArrayList<>();
2450
2451 String patSingleArea = "([^,\\(]{3,})";
2452 String patSeparator = "(,|\\sand\\s)";
2453 String hierarchiePattern = String.format("%s\\((%s(%s%s)*)\\)", patSingleArea, patSingleArea, patSeparator, patSingleArea);
2454 Pattern patHierarchie = Pattern.compile(hierarchiePattern, Pattern.CASE_INSENSITIVE);
2455 Matcher matcher = patHierarchie.matcher(label);
2456 if (matcher.matches()){
2457 String higherAreaStr = matcher.group(1).trim();
2458 higherArea = makeArea(state, higherAreaStr, level);
2459 String[] innerAreas = matcher.group(2).split(patSeparator);
2460 for (String innerArea : innerAreas){
2461 if (isNotBlank(innerArea)){
2462 NamedArea singleArea = makeArea(state, innerArea.trim(), level);
2463 areas.add(singleArea);
2464 NamedArea partOf = singleArea.getPartOf();
2465 // if (partOf == null){
2466 // singleArea.setPartOf(higherArea);
2467 // }
2468 }
2469 }
2470 }else{
2471 NamedArea singleArea = makeArea(state, label, level);
2472 areas.add(singleArea);
2473 }
2474
2475 for (NamedArea area : areas){
2476 //create distribution
2477 Distribution distribution = Distribution.NewInstance(area,status);
2478 distribution.addPrimaryTaxonomicSource(sourceReference);
2479 description.addElement(distribution);
2480 }
2481 } else {
2482 String message = "Empty distribution locality";
2483 fireWarningEvent(message, next, 4);
2484 }
2485 return text;
2486 } else if (isStartingElement(next, COORDINATES)) {
2487 //TODO
2488 handleNotYetImplementedElement(next);
2489 } else if (isEndingElement(next, COORDINATES)) {
2490 //TODO
2491 popUnimplemented(next.asEndElement());
2492 } else if (next.isCharacters()) {
2493 text += next.asCharacters().getData();
2494 } else {
2495 handleUnexpectedElement(next);
2496 }
2497 }
2498 throw new IllegalStateException("<DistributionLocality> has no closing tag");
2499 }
2500
2501 /**
2502 * @param state
2503 * @param taxon
2504 * @param ref
2505 * @return
2506 */
2507 protected TaxonDescription getExtractedMarkupMarkedDescription(MarkupImportState state, Taxon taxon, Reference sourceReference) {
2508 MarkerType markerType = getMarkerType(
2509 state,
2510 MarkupTransformer.uuidMarkerExtractedMarkupData,
2511 "Extracted factual data", "Marker type for factual data imported from markup where the markup for this data was included in parent markup that was also imported including the text from this markup.",
2512 "Extr. data",
2513 null);
2514 String title = "Extracted markup data for " + taxon.getName().getTitleCache();
2515 TaxonDescription description = getMarkedTaxonDescription(taxon, markerType, false, true, sourceReference, title);
2516 return description;
2517 }
2518
2519 }