Project

General

Profile

Download (90.8 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.io.markup;
11

    
12
import java.net.MalformedURLException;
13
import java.util.ArrayList;
14
import java.util.Arrays;
15
import java.util.Collection;
16
import java.util.HashMap;
17
import java.util.HashSet;
18
import java.util.Iterator;
19
import java.util.List;
20
import java.util.Map;
21
import java.util.Set;
22
import java.util.Stack;
23
import java.util.UUID;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26

    
27
import javax.xml.namespace.QName;
28
import javax.xml.stream.Location;
29
import javax.xml.stream.XMLEventReader;
30
import javax.xml.stream.XMLStreamConstants;
31
import javax.xml.stream.XMLStreamException;
32
import javax.xml.stream.events.Attribute;
33
import javax.xml.stream.events.Characters;
34
import javax.xml.stream.events.EndElement;
35
import javax.xml.stream.events.StartElement;
36
import javax.xml.stream.events.XMLEvent;
37

    
38
import org.apache.commons.lang.StringUtils;
39
import org.apache.commons.lang.WordUtils;
40
import org.apache.log4j.Logger;
41

    
42
import eu.etaxonomy.cdm.api.service.IClassificationService;
43
import eu.etaxonomy.cdm.api.service.ITermService;
44
import eu.etaxonomy.cdm.common.CdmUtils;
45
import eu.etaxonomy.cdm.ext.geo.GeoServiceArea;
46
import eu.etaxonomy.cdm.ext.geo.IEditGeoService;
47
import eu.etaxonomy.cdm.io.common.CdmImportBase;
48
import eu.etaxonomy.cdm.io.common.CdmImportBase.TermMatchMode;
49
import eu.etaxonomy.cdm.io.common.events.IIoEvent;
50
import eu.etaxonomy.cdm.io.common.events.IoProblemEvent;
51
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
52
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
53
import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
54
import eu.etaxonomy.cdm.model.common.Annotation;
55
import eu.etaxonomy.cdm.model.common.AnnotationType;
56
import eu.etaxonomy.cdm.model.common.CdmBase;
57
import eu.etaxonomy.cdm.model.common.Extension;
58
import eu.etaxonomy.cdm.model.common.ExtensionType;
59
import eu.etaxonomy.cdm.model.common.IntextReference;
60
import eu.etaxonomy.cdm.model.common.Language;
61
import eu.etaxonomy.cdm.model.common.MarkerType;
62
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
63
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
64
import eu.etaxonomy.cdm.model.description.Distribution;
65
import eu.etaxonomy.cdm.model.description.Feature;
66
import eu.etaxonomy.cdm.model.description.PolytomousKey;
67
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
68
import eu.etaxonomy.cdm.model.description.TaxonDescription;
69
import eu.etaxonomy.cdm.model.description.TextData;
70
import eu.etaxonomy.cdm.model.location.NamedArea;
71
import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
72
import eu.etaxonomy.cdm.model.location.NamedAreaType;
73
import eu.etaxonomy.cdm.model.media.IdentifiableMediaEntity;
74
import eu.etaxonomy.cdm.model.media.Media;
75
import eu.etaxonomy.cdm.model.name.INonViralName;
76
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
77
import eu.etaxonomy.cdm.model.name.Rank;
78
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
79
import eu.etaxonomy.cdm.model.reference.IArticle;
80
import eu.etaxonomy.cdm.model.reference.IBook;
81
import eu.etaxonomy.cdm.model.reference.IBookSection;
82
import eu.etaxonomy.cdm.model.reference.IJournal;
83
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
84
import eu.etaxonomy.cdm.model.reference.Reference;
85
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
86
import eu.etaxonomy.cdm.model.reference.ReferenceType;
87
import eu.etaxonomy.cdm.model.taxon.Classification;
88
import eu.etaxonomy.cdm.model.taxon.Taxon;
89
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
90
import eu.etaxonomy.cdm.model.term.DefinedTerm;
91
import eu.etaxonomy.cdm.model.term.DefinedTermBase;
92
import eu.etaxonomy.cdm.model.term.TermVocabulary;
93
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
94
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
95
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
96

    
97
/**
98
 * @author a.mueller
99
 * @since 04.08.2008
100
 */
101
public abstract class MarkupImportBase  {
102
	private static final Logger logger = Logger.getLogger(MarkupImportBase.class);
103

    
104
	//Base
105
	protected static final String ALTITUDE = "altitude";
106
	protected static final String ANNOTATION = "annotation";
107
	protected static final String BOLD = "bold";
108
	protected static final String BR = "br";
109
	protected static final String DOUBTFUL = "doubtful";
110
	protected static final String CITATION = "citation";
111
	protected static final String CLASS = "class";
112
	protected static final String COORDINATES = "coordinates";
113
	protected static final String DATES = "dates";
114
	protected static final String GATHERING = "gathering";
115
	protected static final String GATHERING_GROUP = "gatheringGroup";
116
	protected static final String GENUS_ABBREVIATION = "genus abbreviation";
117
	protected static final String FOOTNOTE = "footnote";
118
	protected static final String FOOTNOTE_REF = "footnoteRef";
119
	protected static final String FULL_NAME = "fullName";
120
	protected static final String ITALICS = "italics";
121
	protected static final String NUM = "num";
122
	protected static final String NOTES = "notes";
123
	protected static final String PUBLICATION = "publication";
124
	protected static final String SPECIMEN_TYPE = "specimenType";
125
	protected static final String STATUS = "status";
126
	protected static final String SUB_HEADING = "subHeading";
127
	protected static final String TYPE = "type";
128
	protected static final String TYPE_STATUS = "typeStatus";
129
	protected static final String UNKNOWN = "unknown";
130

    
131

    
132
	protected static final boolean CREATE_NEW = true;
133
	protected static final boolean NO_IMAGE_GALLERY = false;
134
	protected static final boolean IMAGE_GALLERY = true;
135

    
136
	protected static final String ADDENDA = "addenda";
137
	protected static final String BIBLIOGRAPHY = "bibliography";
138
	protected static final String BIOGRAPHIES = "biographies";
139
	protected static final String CHAR = "char";
140
	protected static final String DEDICATION = "dedication";
141
	protected static final String DEFAULT_MEDIA_URL = "defaultMediaUrl";
142
	protected static final String DISTRIBUTION_LIST = "distributionList";
143
	protected static final String DISTRIBUTION_LOCALITY = "distributionLocality";
144
	protected static final String FEATURE = "feature";
145
	protected static final String FIGURE = "figure";
146
	protected static final String FIGURE_LEGEND = "figureLegend";
147
	protected static final String FIGURE_PART = "figurePart";
148
	protected static final String FIGURE_REF = "figureRef";
149
	protected static final String FIGURE_TITLE = "figureTitle";
150
	protected static final String FOOTNOTE_STRING = "footnoteString";
151
	protected static final String FREQUENCY = "frequency";
152
	protected static final String HEADING = "heading";
153
	protected static final String HABITAT = "habitat";
154
	protected static final String HABITAT_LIST = "habitatList";
155
	protected static final String IS_FREETEXT = "isFreetext";
156
	protected static final String ID = "id";
157
	protected static final String KEY = "key";
158
	protected static final String LIFE_CYCLE_PERIODS = "lifeCyclePeriods";
159
	protected static final String META_DATA = "metaData";
160
	protected static final String MODS = "mods";
161

    
162
	protected static final String NOMENCLATURE = "nomenclature";
163
	protected static final String QUOTE = "quote";
164
	protected static final String RANK = "rank";
165
	protected static final String REF = "ref";
166
	protected static final String REF_NUM = "refNum";
167
	protected static final String REFERENCE = "reference";
168
	protected static final String REFERENCES = "references";
169
	protected static final String SUB_CHAR = "subChar";
170
	protected static final String TAXON = "taxon";
171
	protected static final String TAXONTITLE = "taxontitle";
172
	protected static final String TAXONTYPE = "taxontype";
173
	protected static final String TEXT_SECTION = "textSection";
174
	protected static final String TREATMENT = "treatment";
175
	protected static final String SERIALS_ABBREVIATIONS = "serialsAbbreviations";
176
	protected static final String STRING = "string";
177
	protected static final String URL = "url";
178
	protected static final String WRITER = "writer";
179

    
180
	protected static final String LOCALITY = "locality";
181

    
182

    
183

    
184
	//Nomenclature
185
	protected static final String ACCEPTED = "accepted";
186
	protected static final String ACCEPTED_NAME = "acceptedName";
187
	protected static final String ALTERNATEPUBTITLE = "alternatepubtitle";
188
	protected static final String APPENDIX = "appendix";
189
	protected static final String AUTHOR = "author";
190
	protected static final String DETAILS = "details";
191
	protected static final String EDITION = "edition";
192
	protected static final String EDITORS = "editors";
193
	protected static final String HOMONYM = "homonym";
194
	protected static final String HOMOTYPES = "homotypes";
195
	protected static final String NOMENCLATURAL_NOTES = "nomenclaturalNotes";
196
	protected static final String INFRANK = "infrank";
197
	protected static final String INFRAUT = "infraut";
198
	protected static final String INFRPARAUT = "infrparaut";
199
	protected static final String ISSUE = "issue";
200
	protected static final String NAME_STATUS = "namestatus";
201
	protected static final String NAME = "name";
202
	protected static final String NAME_TYPE = "nameType";
203
	protected static final String NOM = "nom";
204
	protected static final String PAGES = "pages";
205
	protected static final String PARAUT = "paraut";
206
	protected static final String PUBFULLNAME = "pubfullname";
207
	protected static final String PUBLOCATION = "publocation";
208
	protected static final String PUBLISHER = "publisher";
209
	protected static final String PUBNAME = "pubname";
210
	protected static final String PUBTITLE = "pubtitle";
211
	protected static final String PUBTYPE = "pubtype";
212
	protected static final String REF_PART = "refPart";
213
	protected static final String SYNONYM = "synonym";
214
	protected static final String USAGE = "usage";
215
	protected static final String VOLUME = "volume";
216
	protected static final String YEAR = "year";
217

    
218

    
219
	//keys
220
	protected static final String COUPLET = "couplet";
221
	protected static final String IS_SPOTCHARACTERS = "isSpotcharacters";
222
	protected static final String ONLY_NUMBERED_TAXA_EXIST = "onlyNumberedTaxaExist";
223
	protected static final String EXISTS = "exists";
224
	protected static final String KEYNOTES = "keynotes";
225
	protected static final String KEY_TITLE = "keyTitle";
226
	protected static final String QUESTION = "question";
227
	protected static final String TEXT = "text";
228
	protected static final String TO_COUPLET = "toCouplet";
229
	protected static final String TO_KEY = "toKey";
230
	protected static final String TO_TAXON = "toTaxon";
231

    
232

    
233
	//Feature
234
	protected static final String VERNACULAR_NAMES = "vernacularNames";
235
	protected static final String VERNACULAR_NAME = "vernacularName";
236
	protected static final String TRANSLATION = "translation";
237
	protected static final String LOCAL_LANGUAGE = "localLanguage";
238

    
239

    
240

    
241
	protected MarkupDocumentImport docImport;
242

    
243
	private final IEditGeoService editGeoService;
244
	protected MarkupFeatureImport featureImport;
245

    
246
	public MarkupImportBase(MarkupDocumentImport docImport) {
247
		super();
248
		this.docImport = docImport;
249
		this.editGeoService = docImport.getEditGeoService();
250
	}
251

    
252
	private final Stack<QName> unhandledElements = new Stack<QName>();
253
	private final Stack<QName> handledElements = new Stack<QName>();
254

    
255

    
256
	protected <T extends CdmBase> void  save(Collection<T> collection, MarkupImportState state) {
257
		if (state.isCheck() || collection.isEmpty()){
258
			return;
259
		}
260
		T example = collection.iterator().next();
261
		if (example.isInstanceOf(TaxonBase.class)){
262
			Collection<TaxonBase> typedCollection = (Collection<TaxonBase>)collection;
263
			docImport.getTaxonService().saveOrUpdate(typedCollection);
264
		}else if (example.isInstanceOf(Classification.class)){
265
			Collection<Classification> typedCollection = (Collection<Classification>)collection;
266
			docImport.getClassificationService().saveOrUpdate(typedCollection);
267
		}else if (example.isInstanceOf(PolytomousKey.class)){
268
			Collection<PolytomousKey> typedCollection = (Collection<PolytomousKey>)collection;
269
			docImport.getPolytomousKeyService().saveOrUpdate(typedCollection);
270
		}else if (example.isInstanceOf(DefinedTermBase.class)){
271
			Collection<DefinedTermBase> typedCollection = (Collection<DefinedTermBase>)collection;
272
			getTermService().saveOrUpdate(typedCollection);
273
		}
274

    
275
	}
276

    
277

    
278
	//TODO move to service layer for all IdentifiableEntities
279
	protected void save(CdmBase cdmBase, MarkupImportState state) {
280
		if (state.isCheck()){
281
			return;
282
		}
283
		cdmBase = CdmBase.deproxy(cdmBase, CdmBase.class);
284
		if (cdmBase == null){
285
			String message = "Tried to save a null object.";
286
			fireWarningEvent(message, "--location ?? --", 6,1);
287
		} else if (cdmBase.isInstanceOf(TaxonBase.class)){
288
			docImport.getTaxonService().saveOrUpdate((TaxonBase<?>)cdmBase);
289
		}else if (cdmBase.isInstanceOf(Classification.class)){
290
			docImport.getClassificationService().saveOrUpdate((Classification)cdmBase);
291
		}else if (cdmBase.isInstanceOf(PolytomousKey.class)){
292
			docImport.getPolytomousKeyService().saveOrUpdate((PolytomousKey)cdmBase);
293
		}else if (cdmBase.isInstanceOf(DefinedTermBase.class)){
294
			docImport.getTermService().saveOrUpdate((DefinedTermBase<?>)cdmBase);
295
		}else if (cdmBase.isInstanceOf(Media.class)){
296
			docImport.getMediaService().saveOrUpdate((Media)cdmBase);
297
		}else if (cdmBase.isInstanceOf(SpecimenOrObservationBase.class)){
298
			docImport.getOccurrenceService().saveOrUpdate((SpecimenOrObservationBase<?>)cdmBase);
299
		}else if (cdmBase.isInstanceOf(DescriptionElementBase.class)){
300
			docImport.getDescriptionElementService().save((DescriptionElementBase)cdmBase);
301
		}else if (cdmBase.isInstanceOf(Reference.class)){
302
			docImport.getReferenceService().saveOrUpdate((Reference)cdmBase);
303
		}else{
304
			String message = "Unknown cdmBase type to save: " + cdmBase.getClass();
305
			fireWarningEvent(message, "Unknown location", 8);
306
		}
307
		//logger.warn("Saved " +  cdmBase);
308
	}
309

    
310

    
311
	protected ITermService getTermService() {
312
		return docImport.getTermService();
313
	}
314

    
315
	protected IClassificationService getClassificationService() {
316
		return docImport.getClassificationService();
317
	}
318

    
319
//*********************** Attribute methods *************************************/
320

    
321
	/**
322
	 * Returns a map for all attributes of an start element
323
	 * @param event
324
	 * @return
325
	 */
326
	protected Map<String, Attribute> getAttributes(XMLEvent event) {
327
		Map<String, Attribute> result = new HashMap<>();
328
		if (!event.isStartElement()){
329
			fireWarningEvent("Event is not an startElement. Can't check attributes", makeLocationStr(event.getLocation()), 1, 1);
330
			return result;
331
		}
332
		StartElement element = event.asStartElement();
333
		@SuppressWarnings("unchecked")
334
        Iterator<Attribute> attributes = element.getAttributes();
335
		while (attributes.hasNext()){
336
			Attribute attribute = attributes.next();
337
			//TODO namespaces
338
			result.put(attribute.getName().getLocalPart(), attribute);
339
		}
340
		return result;
341
	}
342

    
343
	/**
344
	 * Throws an unexpected attributes event if the event has any attributes.
345
	 * @param event
346
	 */
347
	protected void checkNoAttributes(Map<String, Attribute> attributes, XMLEvent event) {
348
		String[] exceptions = new String[]{};
349
		handleUnexpectedAttributes(event.getLocation(), attributes, 1, exceptions);
350
	}
351

    
352

    
353

    
354
	/**
355
	 * Throws an unexpected attributes event if the event has any attributes.
356
	 * @param event
357
	 */
358
	protected void checkNoAttributes(XMLEvent event) {
359
		String[] exceptions = new String[]{};
360
		checkNoAttributes(event, 1, exceptions);
361
	}
362

    
363
	/**
364
	 * Throws an unexpected attributes event if the event has any attributes except those mentioned in "exceptions".
365
	 * @param event
366
	 * @param exceptions
367
	 */
368
	protected void checkNoAttributes(XMLEvent event, int stackDepth, String... exceptions) {
369
		if (! event.isStartElement()){
370
			fireWarningEvent("Event is not an startElement. Can't check attributes", makeLocationStr(event.getLocation()), 1, 1);
371
			return;
372
		}
373
		StartElement startElement = event.asStartElement();
374
		Map<String, Attribute> attributes = getAttributes(startElement);
375
		handleUnexpectedAttributes(startElement.getLocation(), attributes, stackDepth+1, exceptions);
376
	}
377

    
378

    
379
	/**
380
	 * Checks if the given attribute exists and has the given value.
381
	 * If yes, true is returned and the attribute is removed from the attributes map.
382
	 * Otherwise false is returned.
383
	 * @param attributes
384
	 * @param attrName
385
	 * @param value
386
	 * @return <code>true</code> if attribute has given value, <code>false</code> otherwise
387
	 */
388
	protected boolean checkAndRemoveAttributeValue( Map<String, Attribute> attributes, String attrName, String value) {
389
		Attribute attr = attributes.get(attrName);
390
		if (attr == null ||value == null ){
391
			return false;
392
		}else{
393
			if (value.equals(attr.getValue())){
394
				attributes.remove(attrName);
395
				return true;
396
			}else{
397
				return false;
398
			}
399
		}
400
	}
401

    
402

    
403
	/**
404
	 * Returns the value of a given attribute name and removes the attribute from the attributes map.
405
	 * Returns <code>null</code> if attribute does not exist.
406
	 * @param attributes the list of all attributes
407
	 * @param attrName the requested attribute name
408
	 * @return the value for the attribute
409
	 */
410
	protected String getAndRemoveAttributeValue(Map<String, Attribute> attributes, String attrName) {
411
		return getAndRemoveAttributeValue(null, attributes, attrName, false, 1);
412
	}
413

    
414
	/**
415
	 * Returns the value of a boolean attribute with the given name and removes the attribute from the attributes map.
416
	 * Returns <code>defaultValue</code> if the attribute does not exist. ALso returns <code>defaultValue</code> and throws a warning if the
417
	 * attribute has no boolean value (true, false).
418
	 * @param
419
	 * @param attributes the
420
	 * @param attrName the name of the attribute
421
	 * @param defaultValue the default value to return if attribute does not exist or can not be defined
422
	 * @return
423
	 */
424
	protected Boolean getAndRemoveBooleanAttributeValue(XMLEvent event, Map<String, Attribute> attributes, String attrName, Boolean defaultValue) {
425
		String value = getAndRemoveAttributeValue(null, attributes, attrName, false, 1);
426
		Boolean result = defaultValue;
427
		if (value != null){
428
			if (value.equalsIgnoreCase("true")){
429
				result = true;
430
			}else if (value.equalsIgnoreCase("false")){
431
				result = false;
432
			}else{
433
				String message = "Boolean attribute has no boolean value ('true', 'false') but '%s'";
434
				fireWarningEvent(String.format(message, value), makeLocationStr(event.getLocation()), 6, 1);
435
			}
436
		}
437
		return result;
438
	}
439

    
440

    
441
	/**
442
	 * Returns the value of a given attribute name and returns the attribute from the attributes map.
443
	 * Fires a mandatory field is missing event if the attribute does not exist.
444
	 * @param xmlEvent
445
	 * @param attributes
446
	 * @param attrName
447
	 * @return
448
	 */
449
	protected String getAndRemoveRequiredAttributeValue(XMLEvent xmlEvent, Map<String, Attribute> attributes, String attrName) {
450
		return getAndRemoveAttributeValue(xmlEvent, attributes, attrName, true, 1);
451
	}
452

    
453
	/**
454
	 * Returns the value of a given attribute name and returns the attribute from the attributes map.
455
	 * If required is <code>true</code> and the attribute does not exist a mandatory field is missing event is fired.
456
	 * @param xmlEvent
457
	 * @param attributes
458
	 * @param attrName
459
	 * @param isRequired
460
	 * @return
461
	 */
462
	private String getAndRemoveAttributeValue(XMLEvent xmlEvent, Map<String, Attribute> attributes, String attrName, boolean isRequired, int stackDepth) {
463
		Attribute attr = attributes.get(attrName);
464
		if (attr == null ){
465
			if (isRequired){
466
				fireMandatoryElementIsMissing(xmlEvent, attrName, 8, stackDepth+1);
467
			}
468
			return null;
469
		}else{
470
			attributes.remove(attrName);
471
			return attr.getValue();
472
		}
473
	}
474

    
475
	/**
476
	 * Fires an not yet implemented event if the given attribute exists in attributes.
477
	 * @param attributes
478
	 * @param attrName
479
	 * @param event
480
	 */
481
	protected void handleNotYetImplementedAttribute(Map<String, Attribute>  attributes,
482
	        String attrName, XMLEvent event) {
483
		Attribute attr = attributes.get(attrName);
484
		if (attr != null){
485
			attributes.remove(attrName);
486
			QName qName = attr.getName();
487
			fireNotYetImplementedAttribute(event.getLocation(), qName, attr.getValue(), 1);
488
		}
489
	}
490

    
491
	/**
492
	 * Fires an unhandled attributes event, if attributes exist in attributes map not covered by the exceptions.
493
	 * No event is fired if the unhandled elements stack is not empty.
494
	 * @param location
495
	 * @param attributes
496
	 * @param exceptions
497
	 */
498
	protected void handleUnexpectedAttributes(Location location,Map<String, Attribute> attributes, String... exceptions) {
499
		handleUnexpectedAttributes(location, attributes, 1, exceptions);
500
	}
501

    
502
	/**
503
	 * see {@link #handleUnexpectedAttributes(Location, Map, String...)}
504
     *
505
	 * @param location
506
	 * @param attributes
507
	 * @param stackDepth the stack trace depth
508
	 * @param exceptions
509
	 */
510
	private void handleUnexpectedAttributes(Location location,Map<String, Attribute> attributes, int stackDepth, String... exceptions) {
511
		if (attributes.size() > 0){
512
			if (this.unhandledElements.size() == 0 ){
513
				boolean hasUnhandledAttributes = false;
514
				for (String key : attributes.keySet()){
515
					boolean isException = false;
516
					for (String exception : exceptions){
517
						if(key.equals(exception)){
518
							isException = true;
519
						}
520
					}
521
					if (!isException){
522
						hasUnhandledAttributes = true;
523
					}
524
				}
525
				if (hasUnhandledAttributes){
526
					fireUnexpectedAttributes(location, attributes, stackDepth+1);
527
				}
528
			}
529
		}
530
	}
531

    
532

    
533
	private void fireUnexpectedAttributes(Location location, Map<String, Attribute> attributes, int stackDepth) {
534
		String attributesString = "";
535
		for (String key : attributes.keySet()){
536
			Attribute attribute = attributes.get(key);
537
			attributesString = CdmUtils.concat(",", attributesString, attribute.getName().getLocalPart() + ":" + attribute.getValue());
538
		}
539
		String message = "Unexpected attributes: %s";
540
		IoProblemEvent event = makeProblemEvent(location, String.format(message, attributesString), 1 , stackDepth +1 );
541
		fire(event);
542
	}
543

    
544

    
545
	protected void fireUnexpectedAttributeValue(XMLEvent parentEvent, String attrName, String attrValue) {
546
		String message = "Unexpected attribute value %s='%s'";
547
		message = String.format(message, attrName, attrValue);
548
		IoProblemEvent event = makeProblemEvent(parentEvent.getLocation(), message, 1 , 1 );
549
		fire(event);
550
	}
551

    
552
	protected void handleNotYetImplementedAttributeValue(XMLEvent xmlEvent, String attrName, String attrValue) {
553
		String message = "Attribute %s not yet implemented for value '%s'";
554
		message = String.format(message, attrName, attrValue);
555
		IIoEvent event = makeProblemEvent(xmlEvent.getLocation(), message, 1, 1 );
556
		fire(event);
557
	}
558

    
559
	protected void fireNotYetImplementedAttribute(Location location, QName qName,
560
	        String value, int stackDepth) {
561
		String message = "Attribute not yet implemented: %s (%s)";
562
		IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart(), value), 1, stackDepth+1 );
563
		fire(event);
564
	}
565

    
566

    
567
	protected void fireUnexpectedEvent(XMLEvent xmlEvent, int stackDepth) {
568
		Location location = xmlEvent.getLocation();
569
		String message = "Unexpected event: %s";
570
		IIoEvent event = makeProblemEvent(location, String.format(message, xmlEvent.toString()), 2, stackDepth +1);
571
		fire(event);
572
	}
573

    
574
	protected void fireUnexpectedStartElement(Location location, StartElement startElement, int stackDepth) {
575
		QName qName = startElement.getName();
576
		String message = "Unexpected start element: %s";
577
		IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart()), 2, stackDepth +1);
578
		fire(event);
579
	}
580

    
581

    
582
	protected void fireUnexpectedEndElement(Location location, EndElement endElement, int stackDepth) {
583
		QName qName = endElement.getName();
584
		String message = "Unexpected end element: %s";
585
		IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart()), 16, stackDepth+1);
586
		fire(event);
587
	}
588

    
589
	protected void fireNotYetImplementedElement(Location location, QName qName, int stackDepth) {
590
		String message = "Element not yet implemented: %s";
591
		IIoEvent event = makeProblemEvent(location, String.format(message, qName.getLocalPart()), 1, stackDepth+1 );
592
		fire(event);
593
	}
594

    
595
	protected void fireNotYetImplementedCharacters(Location location, Characters chars, int stackDepth) {
596
		String message = "Characters not yet handled: %s";
597
		IIoEvent event = makeProblemEvent(location, String.format(message, chars.getData()), 1, stackDepth+1 );
598
		fire(event);
599
	}
600

    
601
	/**
602
	 * Creates a problem event.
603
	 * Be aware of the right depths of the stack trace !
604
	 * @param location
605
	 * @param message
606
	 * @param severity
607
	 * @return
608
	 */
609
	private IoProblemEvent makeProblemEvent(Location location, String message, int severity, int stackDepth) {
610
		stackDepth++;
611
		StackTraceElement[] stackTrace = new Exception().getStackTrace();
612
		int lineNumber = stackTrace[stackDepth].getLineNumber();
613
		String methodName = stackTrace[stackDepth].getMethodName();
614
		String locationStr = makeLocationStr(location);
615
		String className = stackTrace[stackDepth].getClassName();
616
		Class<?> declaringClass;
617
		try {
618
			declaringClass = Class.forName(className);
619
		} catch (ClassNotFoundException e) {
620
			declaringClass = this.getClass();
621
		}
622
		IoProblemEvent event = IoProblemEvent.NewInstance(declaringClass, message,
623
				locationStr, lineNumber, severity, methodName);
624
		return event;
625
	}
626

    
627
	/**
628
	 * Creates a string from a location
629
	 * @param location
630
	 * @return
631
	 */
632
	protected String makeLocationStr(Location location) {
633
		String locationStr = location == null ? " - no location - " : "l." + location.getLineNumber() + "/c."+ location.getColumnNumber();
634
		return locationStr;
635
	}
636

    
637

    
638
	/**
639
	 * Fires an unexpected element event if the unhandled elements stack is empty.
640
	 * Otherwise adds the element to the stack.
641
	 * @param event
642
	 */
643
	protected void handleUnexpectedStartElement(XMLEvent event) {
644
		handleUnexpectedStartElement(event, 1);
645
	}
646

    
647
	/**
648
	 * Fires an unexpected element event if the unhandled elements stack is empty.
649
	 * Otherwise adds the element to the stack.
650
	 * @param event
651
	 */
652
	protected void handleUnexpectedStartElement(XMLEvent event, int stackDepth) {
653
		QName qName = event.asStartElement().getName();
654
		if (! unhandledElements.empty()){
655
			unhandledElements.push(qName);
656
		}else{
657
			fireUnexpectedStartElement(event.getLocation(), event.asStartElement(), stackDepth + 1);
658
		}
659
	}
660

    
661

    
662
	protected void handleUnexpectedEndElement(EndElement event) {
663
		handleUnexpectedEndElement(event, 1);
664
	}
665

    
666
	/**
667
	 * Fires an unexpected element event if the event is not the last on the stack.
668
	 * Otherwise removes last stack element.
669
	 * @param event
670
	 */
671
	protected void handleUnexpectedEndElement(EndElement event, int stackDepth) {
672
		QName qName = event.asEndElement().getName();
673
		if (!unhandledElements.isEmpty() && unhandledElements.peek().equals(qName)){
674
			unhandledElements.pop();
675
		}else{
676
			fireUnexpectedEndElement(event.getLocation(), event.asEndElement(), stackDepth + 1);
677
		}
678
	}
679

    
680
	/**
681
	 *
682
	 * @param endElement
683
	 */
684
	protected void popUnimplemented(EndElement endElement) {
685
		QName qName = endElement.asEndElement().getName();
686
		if (unhandledElements.peek().equals(qName)){
687
			unhandledElements.pop();
688
		}else{
689
			String message = "End element is not last on stack: %s";
690
			message = String.format(message, qName.getLocalPart());
691
			IIoEvent event = makeProblemEvent(endElement.getLocation(), message, 16, 1);
692
			fire(event);
693
		}
694

    
695
	}
696

    
697

    
698
	/**
699
	 * Fires an unexpected element event if the unhandled element stack is empty.
700
	 * @param event
701
	 */
702
	protected void handleUnexpectedElement(XMLEvent event) {
703
		if (event.isStartElement()){
704
			handleUnexpectedStartElement(event, 2);
705
		}else if (event.isEndElement()){
706
			handleUnexpectedEndElement(event.asEndElement(), 2);
707
		}else if (event.getEventType() == XMLStreamConstants.COMMENT){
708
			//do nothing
709
		}else if (! unhandledElements.empty()){
710
			//do nothing
711
		}else{
712
			fireUnexpectedEvent(event, 1);
713
		}
714
	}
715

    
716
	/**
717
	 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
718
	 * @param event
719
	 */
720
	protected void handleNotYetImplementedCharacters(XMLEvent event) {
721
		Characters chars = event.asCharacters();
722
		fireNotYetImplementedCharacters(event.getLocation(), chars, 1);
723
	}
724

    
725
	/**
726
	 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
727
	 * @param event
728
	 */
729
	protected void handleNotYetImplementedElement(XMLEvent event) {
730
		QName qName = event.asStartElement().getName();
731
		boolean isTopLevel = unhandledElements.isEmpty();
732
		unhandledElements.push(qName);
733
		if (isTopLevel){
734
			fireNotYetImplementedElement(event.getLocation(), qName, 1);
735
		}
736
	}
737

    
738
	/**
739
	 * Fires an not yet implemented event and adds the element name to the unhandled elements stack.
740
	 * @param event
741
	 */
742
	protected void handleIgnoreElement(XMLEvent event) {
743
		QName qName = event.asStartElement().getName();
744
		unhandledElements.push(qName);
745
	}
746

    
747
	protected void handleAmbigousManually(MarkupImportState state,
748
			XMLEventReader reader, StartElement startElement) {
749
		QName qName = startElement.getName();
750
		unhandledElements.push(qName);
751
		fireWarningEvent(
752
				"Handle manually: " + qName.getLocalPart() + " is ambigous and should therefore be handled manually",
753
				makeLocationStr(startElement.getLocation()), 2, 2);
754
	}
755

    
756
	/**
757
	 * Checks if a mandatory text is not empty or null.
758
	 * Returns true if text is given.
759
	 * Fires an mandatory element is missing event otherwise and returns <code>null</code>.
760
	 * @param text
761
	 * @param parentEvent
762
	 * @return
763
	 */
764
	protected boolean checkMandatoryText(String text, XMLEvent parentEvent) {
765
		if (! StringUtils.isNotBlank(text)){
766
			fireMandatoryElementIsMissing(parentEvent, "CData", 4, 1);
767
			return false;
768
		}
769
		return true;
770
	}
771

    
772
	/**
773
	 * Fires an mandatory element is missing event if exists is <code>false</code>.
774
	 * @param hasMandatory
775
	 * @param parentEvent
776
	 * @param string
777
	 */
778
	protected void checkMandatoryElement(boolean exists, StartElement parentEvent, String attrName) {
779
		if (! exists){
780
			fireMandatoryElementIsMissing(parentEvent, attrName, 5, 1);
781
		}
782
	}
783

    
784

    
785
	/**
786
	 * Fires an element is missing event.
787
	 * @param xmlEvent
788
	 * @param string
789
	 * @param severity
790
	 * @param stackDepth
791
	 * @throws IllegalStateException if xmlEvent is not a StartElement and not an Attribute
792
	 */
793
	private void fireMandatoryElementIsMissing(XMLEvent xmlEvent, String missingEventName, int severity, int stackDepth) throws IllegalStateException{
794
		Location location = xmlEvent.getLocation();
795
		String typeName;
796
		QName qName;
797
		if (xmlEvent.isAttribute()){
798
			Attribute attribute = ((Attribute)xmlEvent);
799
			typeName = "attribute";
800
			qName = attribute.getName();
801
		}else if (xmlEvent.isStartElement()){
802
			typeName = "element";
803
			qName = xmlEvent.asStartElement().getName();
804
		}else{
805
			throw new IllegalStateException("mandatory element only allowed for attributes and start tags in " + makeLocationStr(location));
806
		}
807
		String message = "Mandatory %s '%s' is missing in %s";
808
		message = String.format(message, typeName , missingEventName, qName.getLocalPart());
809
		IIoEvent event = makeProblemEvent(location, message, severity, stackDepth +1);
810
		fire(event);
811
	}
812

    
813

    
814

    
815

    
816
	/**
817
	 * Returns <code>true</code> if the "next" event is the ending tag for the "parent" event.
818
	 * @param next end element to test, must not be null
819
	 * @param parentEvent start element to test
820
	 * @return true if the "next" event is the ending tag for the "parent" event.
821
	 * @throws XMLStreamException
822
	 */
823
	protected boolean isMyEndingElement(XMLEvent next, XMLEvent parentEvent) throws XMLStreamException {
824
		if (! parentEvent.isStartElement()){
825
			String message = "Parent event should be start tag";
826
			fireWarningEvent(message, makeLocationStr(next.getLocation()), 6);
827
			return false;
828
		}
829
		return isEndingElement(next, parentEvent.asStartElement().getName().getLocalPart());
830
	}
831

    
832
	/**
833
	 * Trims the text and removes turns all whitespaces into single empty space.
834
	 * @param text
835
	 * @return
836
	 */
837
	protected String normalize(String text) {
838
		text = StringUtils.trimToEmpty(text);
839
		text = text.replaceAll("\\s+", " ");
840
		return text;
841
	}
842

    
843

    
844

    
845
	/**
846
	 * Removes whitespaces at beginning and end and makes the first letter
847
	 * a capital letter and all other letters small letters.
848
	 * @param value
849
	 * @return
850
	 */
851
	protected String toFirstCapital(String value) {
852
		if (StringUtils.isBlank(value)){
853
			return value;
854
		}else{
855
			String result = "";
856
			value = value.trim();
857
			result += value.trim().substring(0,1).toUpperCase();
858
			if (value.length()>1){
859
				result += value.substring(1).toLowerCase();
860
			}
861
			return result;
862
		}
863
	}
864

    
865
	/**
866
	 * Currently not used.
867
	 * @param str
868
	 * @param allowedNumberOfCharacters
869
	 * @param onlyFirstCapital
870
	 * @return
871
	 */
872
	protected boolean isAbbreviation(String str, int allowedNumberOfCharacters, boolean onlyFirstCapital){
873
		if (isBlank(str)){
874
			return false;
875
		}
876
		str = str.trim();
877
		if (! str.endsWith(".")){
878
			return false;
879
		}
880
		str = str.substring(0, str.length() -1);
881
		if (str.length() > allowedNumberOfCharacters){
882
			return false;
883
		}
884
		final String re = "^\\p{javaUpperCase}\\p{javaLowerCase}*$";
885
		if (str.matches(re)){
886
			return true;
887
		}else{
888
			return false;
889
		}
890
	}
891

    
892
	/**
893
	 * Checks if <code>abbrev</code> is the short form for the genus name (strGenusName).
894
	 * Usually this is the case if <code>abbrev</code> is the first letter (optional with ".")
895
	 * of strGenusName. But in older floras it may also be the first 2 or 3 letters (optional with dot).
896
	 * However, we allow only a maximum of 2 letters to be anambigous. In cases with 3 letters better
897
	 * change the original markup data.
898
	 * @param single
899
	 * @param strGenusName
900
	 * @return
901
	 */
902
	protected boolean isGenusAbbrev(String abbrev, String strGenusName) {
903
		if (! abbrev.matches("[A-Z][a-z]?\\.?")) {
904
			return false;
905
		}else if (abbrev.length() == 0 || strGenusName == null || strGenusName.length() == 0){
906
			return false;
907
		}else{
908
			abbrev = abbrev.replace(".", "");
909
			return strGenusName.startsWith(abbrev);
910
//			boolean result = true;
911
//			for (int i = 0 ; i < abbrev.length(); i++){
912
//				result &= ( abbrev.charAt(i) == strGenusName.charAt(i));
913
//			}
914
//			return result;
915
		}
916
	}
917

    
918

    
919
	/**
920
	 * Checks if all words in the given string start with a capital letter but do not have any further capital letter.
921
	 * @param word the string to be checekd. Usually should be a single word.
922
	 * @return true if the above is the case, false otherwise
923
	 */
924
	protected boolean isFirstCapitalWord(String word) {
925
		if (WordUtils.capitalizeFully(word).equals(word)){
926
			return true;
927
		}else if (WordUtils.capitalizeFully(word,new char[]{'-'}).equals(word)){
928
			//for words like Le-Testui (which is a species epithet)
929
			return true;
930
		}else{
931
			return false;
932
		}
933
	}
934

    
935

    
936
	/**
937
	 * Read next event. Ignore whitespace events.
938
	 * @param reader
939
	 * @return
940
	 * @throws XMLStreamException
941
	 */
942
	protected XMLEvent readNoWhitespace(XMLEventReader reader) throws XMLStreamException {
943
		XMLEvent event = reader.nextEvent();
944
		while (!unhandledElements.isEmpty()){
945
			if (event.isStartElement()){
946
				handleNotYetImplementedElement(event);
947
			}else if (event.isEndElement()){
948
				popUnimplemented(event.asEndElement());
949
			}
950
			event = reader.nextEvent();
951
		}
952
		while (event.isCharacters() && event.asCharacters().isWhiteSpace()){
953
			event = reader.nextEvent();
954
		}
955
		return event;
956
	}
957

    
958
	/**
959
	 * Returns the REQUIRED "class" attribute for a given event and checks that it is the only attribute.
960
	 * @param parentEvent
961
	 * @return
962
	 */
963
	protected String getClassOnlyAttribute(XMLEvent parentEvent) {
964
		return getClassOnlyAttribute(parentEvent, true);
965
	}
966

    
967

    
968
	/**
969
	 * Returns the "class" attribute for a given event and checks that it is the only attribute.
970
	 * @param parentEvent
971
	 * @return
972
	 */
973
	protected String getClassOnlyAttribute(XMLEvent parentEvent, boolean required) {
974
		return getOnlyAttribute(parentEvent, CLASS, required);
975
	}
976

    
977
	/**
978
	 * Returns the value for the only attribute for a given event and checks that it is the only attribute.
979
	 * @param parentEvent
980
	 * @return
981
	 */
982
	protected String getOnlyAttribute(XMLEvent parentEvent, String attrName, boolean required) {
983
		Map<String, Attribute> attributes = getAttributes(parentEvent);
984
		String classValue =getAndRemoveAttributeValue(parentEvent, attributes, attrName, required, 1);
985
		checkNoAttributes(attributes, parentEvent);
986
		return classValue;
987
	}
988

    
989

    
990
	protected void fireWarningEvent(String message, String locationStr, Integer severity, Integer depth) {
991
		docImport.fireWarningEvent(message, locationStr, severity, depth);
992
	}
993

    
994
	protected void fireWarningEvent(String message, XMLEvent event, Integer severity) {
995
		docImport.fireWarningEvent(message, makeLocationStr(event.getLocation()), severity, 1);
996
	}
997

    
998
	protected void fireSchemaConflictEventExpectedStartTag(String elName, XMLEventReader reader) throws XMLStreamException {
999
		docImport.fireSchemaConflictEventExpectedStartTag(elName, reader);
1000
	}
1001

    
1002

    
1003
	protected void fireWarningEvent(String message, String locationStr, int severity) {
1004
		docImport.fireWarningEvent(message, locationStr, severity, 1);
1005
	}
1006

    
1007
	protected void fire(IIoEvent event) {
1008
		docImport.fire(event);
1009
	}
1010

    
1011
	protected boolean isNotBlank(String str){
1012
		return StringUtils.isNotBlank(str);
1013
	}
1014

    
1015
	protected boolean isBlank(String str){
1016
		return StringUtils.isBlank(str);
1017
	}
1018

    
1019
	protected TaxonDescription getTaxonDescription(Taxon taxon, Reference ref, boolean isImageGallery, boolean createNewIfNotExists) {
1020
		return docImport.getTaxonDescription(taxon, ref, isImageGallery, createNewIfNotExists);
1021
	}
1022

    
1023
    protected TaxonDescription getDefaultTaxonDescription(Taxon taxon, boolean isImageGallery, boolean createNewIfNotExists, Reference source) {
1024
        return docImport.getDefaultTaxonDescription(taxon, isImageGallery, createNewIfNotExists, source);
1025
    }
1026

    
1027
    /**
1028
     * Returns the taxon description with marked as <code>true</code> with the given marker type.
1029
     * If createNewIfNotExists a new description is created if it does not yet exist.
1030
     * For the new description the source and the title are set if not <code>null</code>.
1031
     * @param taxon
1032
     * @param markerType
1033
     * @param isImageGallery
1034
     * @param createNewIfNotExists
1035
     * @param source
1036
     * @param title
1037
     * @return the existing or new taxon description
1038
     */
1039
    protected TaxonDescription getMarkedTaxonDescription(Taxon taxon, MarkerType markerType, boolean isImageGallery, boolean createNewIfNotExists, Reference source, String title) {
1040
        return docImport.getMarkedTaxonDescription(taxon, markerType, isImageGallery, createNewIfNotExists, source, title);
1041
    }
1042

    
1043

    
1044
	/**
1045
	 * Returns the default language defined in the state. If no default language is defined in the state,
1046
	 * the CDM default language is returned.
1047
	 * @param state
1048
	 * @return
1049
	 */
1050
	protected Language getDefaultLanguage(MarkupImportState state) {
1051
		Language result = state.getDefaultLanguage();
1052
		if (result == null){
1053
			result = Language.DEFAULT();
1054
		}
1055
		return result;
1056
	}
1057

    
1058

    
1059
//*********************** FROM XML IMPORT BASE ****************************************
1060
	protected boolean isEndingElement(XMLEvent event, String elName) throws XMLStreamException {
1061
		return docImport.isEndingElement(event, elName);
1062
	}
1063

    
1064
	protected boolean isStartingElement(XMLEvent event, String elName) throws XMLStreamException {
1065
		return docImport.isStartingElement(event, elName);
1066
	}
1067

    
1068

    
1069
	protected void fillMissingEpithetsForTaxa(Taxon parentTaxon, Taxon childTaxon) {
1070
		docImport.fillMissingEpithetsForTaxa(parentTaxon, childTaxon);
1071
	}
1072

    
1073
	protected Feature getFeature(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<Feature> voc){
1074
		return docImport.getFeature(state, uuid, label, text, labelAbbrev, voc);
1075
	}
1076

    
1077
    protected PresenceAbsenceTerm getPresenceAbsenceTerm(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, boolean isAbsenceTerm, TermVocabulary<PresenceAbsenceTerm> voc){
1078
        return docImport.getPresenceTerm(state, uuid, label, text, labelAbbrev, isAbsenceTerm, voc);
1079
    }
1080

    
1081
	protected ExtensionType getExtensionType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev){
1082
		return docImport.getExtensionType(state, uuid, label, text, labelAbbrev);
1083
	}
1084

    
1085
	protected DefinedTerm getIdentifierType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<DefinedTerm> voc){
1086
		return docImport.getIdentifierType(state, uuid, label, text, labelAbbrev, voc);
1087
	}
1088

    
1089
	protected AnnotationType getAnnotationType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<AnnotationType> voc){
1090
		return docImport.getAnnotationType(state, uuid, label, text, labelAbbrev, voc);
1091
	}
1092

    
1093
	protected MarkerType getMarkerType(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<MarkerType> voc){
1094
		return docImport.getMarkerType(state, uuid, label, text, labelAbbrev, voc);
1095
	}
1096

    
1097
	protected NamedAreaLevel getNamedAreaLevel(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<NamedAreaLevel> voc){
1098
		return docImport.getNamedAreaLevel(state, uuid, label, text, labelAbbrev, voc);
1099
	}
1100

    
1101
	protected NamedArea getNamedArea(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, NamedAreaType areaType, NamedAreaLevel level, TermVocabulary voc, TermMatchMode matchMode){
1102
		return docImport.getNamedArea(state, uuid, label, text, labelAbbrev, areaType, level, voc, matchMode);
1103
	}
1104

    
1105
	protected Language getLanguage(MarkupImportState state, UUID uuid, String label, String text, String labelAbbrev, TermVocabulary<?> voc){
1106
		return docImport.getLanguage(state, uuid, label, text, labelAbbrev, voc);
1107
	}
1108

    
1109
// *************************************** Concrete methods **********************************************/
1110

    
1111

    
1112
	/**
1113
	 * @param state
1114
	 * @param classValue
1115
	 * @param byAbbrev
1116
	 * @return
1117
	 */
1118
	protected Rank makeRank(MarkupImportState state, String value, boolean byAbbrev) {
1119
		Rank rank = null;
1120
		if (StringUtils.isBlank(value)) {
1121
			return null;
1122
		}
1123
		try {
1124
			boolean useUnknown = true;
1125
			NomenclaturalCode nc = makeNomenclaturalCode(state);
1126
			if (value.equals(GENUS_ABBREVIATION)){
1127
				rank = Rank.GENUS();
1128
			}else if (byAbbrev) {
1129
				rank = Rank.getRankByIdInVoc(value.toLowerCase(), nc, useUnknown);
1130
				if (value.equalsIgnoreCase("forma")){
1131
				    return Rank.FORM();
1132
				}else if (value.toLowerCase().matches("(sub)?(section|genus|series|tribe)")){
1133
				    return Rank.getRankByEnglishName(value, nc, useUnknown);
1134
				}else if (value.equals("§")){
1135
                    return Rank.SECTION_BOTANY();  //Special case in Flora Malesiana
1136
				}
1137
			} else {
1138
				rank = Rank.getRankByEnglishName(value, nc, useUnknown);
1139
			}
1140
			if (rank.equals(Rank.UNKNOWN_RANK())) {
1141
				rank = null;
1142
			}
1143
			if (rank == null && "sous-genre".equalsIgnoreCase(value)){
1144
				rank = Rank.SUBGENUS();
1145
			}
1146
		} catch (UnknownCdmTypeException e) {
1147
			// doNothing
1148
		}
1149
		return rank;
1150
	}
1151

    
1152
	NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
1153
    protected TeamOrPersonBase<?> createAuthor(MarkupImportState state, String authorTitle) {
1154
		TeamOrPersonBase<?> result = parser.author(authorTitle);
1155
		return state.getDeduplicationHelper().getExistingAuthor(result);
1156
	}
1157

    
1158
	protected String getAndRemoveMapKey(Map<String, String> map, String key) {
1159
		String result = map.get(key);
1160
		map.remove(key);
1161
		if (result != null) {
1162
			result = normalize(result);
1163
		}
1164
		return StringUtils.stripToNull(result);
1165
	}
1166

    
1167

    
1168
	/**
1169
	 * Creates a {@link INonViralName} object depending on the defined {@link NomenclaturalCode}
1170
	 * and the given parameters.
1171
	 * @param state
1172
	 * @param rank
1173
	 * @return
1174
	 */
1175
	protected INonViralName createNameByCode(MarkupImportState state, Rank rank) {
1176
		NomenclaturalCode nc = makeNomenclaturalCode(state);
1177
		INonViralName name = nc.getNewTaxonNameInstance(rank);
1178
		return name;
1179
	}
1180

    
1181
	protected void handleFullName(MarkupImportState state, XMLEventReader reader,
1182
			INonViralName name, XMLEvent event) throws XMLStreamException {
1183
		String fullNameStr;
1184
		Map<String, Attribute> attrs = getAttributes(event);
1185
		String rankStr = getAndRemoveRequiredAttributeValue(event, attrs, "rank");
1186
		String hybridClass = getAndRemoveAttributeValue(attrs, "hybridClass");
1187

    
1188
		Rank rank = makeRank(state, rankStr, false);
1189
		name.setRank(rank);
1190
		if (rank == null) {
1191
			String message = "Rank was computed as null. This must not be.";
1192
			fireWarningEvent(message, event, 6);
1193
			name.setRank(Rank.UNKNOWN_RANK());
1194
		}
1195
		if (!attrs.isEmpty()) {
1196
			handleUnexpectedAttributes(event.getLocation(), attrs);
1197
		}
1198
		fullNameStr = getCData(state, reader, event, false);
1199
		NonViralNameParserImpl.NewInstance().parseFullName(name, fullNameStr, rank, false);
1200
		if (hybridClass != null ){
1201
		    if ("hybrid formula".equals(hybridClass)){
1202
		        if (!name.isHybridFormula()){
1203
		            fireWarningEvent("Hybrid formula is not set though requested: " + fullNameStr, event, 4);
1204
		        }
1205
		    }else if ("hybrid".equals(hybridClass)){
1206
                if (!name.isHybridName()){
1207
                    fireWarningEvent("Hybrid name is recognized: " + fullNameStr, event, 4);
1208
                }
1209
            }else{
1210
                handleNotYetImplementedAttributeValue(event, "hybridClass", hybridClass);
1211
            }
1212
		}
1213
	}
1214

    
1215

    
1216
	/**
1217
	 * Returns the {@link NomenclaturalCode} for this import. Default is {@link NomenclaturalCode#ICBN} if
1218
	 * no code is defined.
1219
	 * @param state
1220
	 * @return
1221
	 */
1222
	protected NomenclaturalCode makeNomenclaturalCode(MarkupImportState state) {
1223
		NomenclaturalCode nc = state.getConfig().getNomenclaturalCode();
1224
		if (nc == null) {
1225
			nc = NomenclaturalCode.ICNAFP; // default;
1226
		}
1227
		return nc;
1228
	}
1229

    
1230

    
1231
	/**
1232
	 * @param state
1233
	 * @param levelString
1234
	 * @param next
1235
	 * @return
1236
	 */
1237
	protected NamedAreaLevel makeNamedAreaLevel(MarkupImportState state, String levelString, XMLEvent next) {
1238
		NamedAreaLevel level;
1239
		try {
1240
			level = state.getTransformer().getNamedAreaLevelByKey(levelString);
1241
			if (level == null) {
1242
				UUID levelUuid = state.getTransformer().getNamedAreaLevelUuid(levelString);
1243
				if (levelUuid == null) {
1244
					String message = "Unknown distribution locality class (named area level): %s. Create new level instead.";
1245
					message = String.format(message, levelString);
1246
					fireWarningEvent(message, next, 6);
1247
				}
1248
				level = getNamedAreaLevel(state, levelUuid, levelString, levelString, levelString, null);
1249
			}
1250
		} catch (UndefinedTransformerMethodException e) {
1251
			throw new RuntimeException(e);
1252
		}
1253
		return level;
1254
	}
1255

    
1256

    
1257
	/**
1258
	 * @param state
1259
	 * @param areaName
1260
	 * @param level
1261
	 * @return
1262
	 */
1263
	protected NamedArea makeArea(MarkupImportState state, String areaName, NamedAreaLevel level) {
1264

    
1265
		//TODO FM vocabulary
1266
		TermVocabulary<NamedArea> voc = null;
1267
		NamedAreaType areaType = null;
1268

    
1269
		NamedArea area = null;
1270
		try {
1271
			area = state.getTransformer().getNamedAreaByKey(areaName);
1272
		} catch (UndefinedTransformerMethodException e) {
1273
			throw new RuntimeException(e);
1274
		}
1275
		if (area == null){
1276
			boolean isNewInState = false;
1277
			UUID uuid = state.getAreaUuid(areaName);
1278
			if (uuid == null){
1279
				isNewInState = true;
1280
				try {
1281
					uuid = state.getTransformer().getNamedAreaUuid(areaName);
1282
					if (uuid == null){
1283
					    uuid = UUID.randomUUID();
1284
					    state.putAreaUuid(areaName, uuid);
1285
					}
1286
				} catch (UndefinedTransformerMethodException e) {
1287
					throw new RuntimeException(e);
1288
				}
1289
			}
1290

    
1291
			CdmImportBase.TermMatchMode matchMode = CdmImportBase.TermMatchMode.UUID_LABEL;
1292
			area = getNamedArea(state, uuid, areaName, areaName, areaName, areaType, level, voc, matchMode);
1293
			if (isNewInState){
1294
				state.putAreaUuid(areaName, area.getUuid());
1295

    
1296
				//TODO just for testing -> make generic and move to better place
1297
				String geoServiceLayer="vmap0_as_bnd_political_boundary_a";
1298
				String layerFieldName ="nam";
1299

    
1300
				if ("Bangka".equals(areaName)){
1301
					String areaValue = "PULAU BANGKA#SUMATERA SELATAN";
1302
					GeoServiceArea geoServiceArea = new GeoServiceArea();
1303
					geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1304
					this.editGeoService.setMapping(area, geoServiceArea);
1305
//					save(area, state);
1306
				}
1307
				if ("Luzon".equals(areaName)){
1308
					GeoServiceArea geoServiceArea = new GeoServiceArea();
1309

    
1310
					List<String> list = Arrays.asList("HERMANA MAYOR ISLAND#CENTRAL LUZON",
1311
							"HERMANA MENOR ISLAND#CENTRAL LUZON",
1312
							"CENTRAL LUZON");
1313
					for (String areaValue : list){
1314
						geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1315
					}
1316

    
1317
					this.editGeoService.setMapping(area, geoServiceArea);
1318
//					save(area, state);
1319
				}
1320
				if ("Mindanao".equals(areaName)){
1321
					GeoServiceArea geoServiceArea = new GeoServiceArea();
1322

    
1323
					List<String> list = Arrays.asList("NORTHERN MINDANAO",
1324
							"SOUTHERN MINDANAO",
1325
							"WESTERN MINDANAO");
1326
					//TODO to be continued
1327
					for (String areaValue : list){
1328
						geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1329
					}
1330

    
1331
					this.editGeoService.setMapping(area, geoServiceArea);
1332
//					save(area, state);
1333
				}
1334
				if ("Palawan".equals(areaName)){
1335
					GeoServiceArea geoServiceArea = new GeoServiceArea();
1336

    
1337
					List<String> list = Arrays.asList("PALAWAN#SOUTHERN TAGALOG");
1338
					for (String areaValue : list){
1339
						geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
1340
					}
1341

    
1342
					this.editGeoService.setMapping(area, geoServiceArea);
1343
//					save(area, state);
1344
				}
1345

    
1346
			}
1347
		}
1348
		return area;
1349
	}
1350

    
1351

    
1352

    
1353
	/**
1354
	 * Reads character data. Any element other than character data or the ending
1355
	 * tag will fire an unexpected element event.
1356
     *
1357
	 * @see #getCData(MarkupImportState, XMLEventReader, XMLEvent, boolean)
1358
	 * @param state
1359
	 * @param reader
1360
	 * @param next
1361
	 * @return
1362
	 * @throws XMLStreamException
1363
	 */
1364
	protected String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent next) throws XMLStreamException {
1365
		return getCData(state, reader, next, true);
1366
	}
1367

    
1368
	/**
1369
	 * Reads character data. Any element other than character data or the ending
1370
	 * tag will fire an unexpected element event.
1371
	 *
1372
	 * @param state
1373
	 * @param reader
1374
	 * @param next
1375
	 * @param inlineMarkup map for inline markup, this is used for e.g. the locality markup within a subheading
1376
	 * The map will be filled by the markup element name as key. The value may be a String, a CdmBase or any other object.
1377
	 * If null any markup text will be neglected but a warning will be fired if they exist.
1378
	 * @param removeInlineMarkupText if true the markedup text will be removed from the returned String
1379
	 * @param checkAttributes
1380
	 * @return
1381
	 * @throws XMLStreamException
1382
	 */
1383
	protected String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent parent, /*Map<String, Object> inlineMarkup, *boolean removeInlineMarkupText,*/ boolean checkAttributes) throws XMLStreamException {
1384
		if (checkAttributes){
1385
			checkNoAttributes(parent);
1386
		}
1387

    
1388
		String text = "";
1389
		while (reader.hasNext()) {
1390
			XMLEvent next = readNoWhitespace(reader);
1391
			if (isMyEndingElement(next, parent)) {
1392
				return text;
1393
			} else if (next.isCharacters()) {
1394
				text += next.asCharacters().getData();
1395
			} else if (isStartingElement(next, FOOTNOTE_REF)){
1396
				handleNotYetImplementedElement(next);
1397
//			} else if (isStartingElement(next, LOCALITY)){
1398
//				handleCDataLocality(state, reader, parent);
1399
			} else {
1400
				handleUnexpectedElement(next);
1401
			}
1402
		}
1403
		throw new IllegalStateException("Event has no closing tag");
1404

    
1405
	}
1406

    
1407
//	private void handleCDataLocality(MarkupImportState state, XMLEventReader reader, XMLEvent parent) {
1408
//		checkAndRemoveAttributeValue(attributes, attrName, value)
1409
//
1410
//	}
1411

    
1412

    
1413

    
1414
	/**
1415
	 * For it returns a pure CData annotation string. This behaviour may change in future. More complex annotations
1416
	 * should be handled differently.
1417
	 * @param state
1418
	 * @param reader
1419
	 * @param parentEvent
1420
	 * @return
1421
	 * @throws XMLStreamException
1422
	 */
1423
	protected String handleSimpleAnnotation(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1424
		String annotation = getCData(state, reader, parentEvent);
1425
		return annotation;
1426
	}
1427

    
1428
	/**
1429
	 * True if text is single "." oder "," or ";" or ":"
1430
	 * @param text
1431
	 * @return
1432
	 */
1433
	protected boolean isPunctuation(String text) {
1434
		return text == null ? false : text.trim().matches("^[\\.,;:]$");
1435
	}
1436

    
1437

    
1438
	/**
1439
	 * Text indicating that type information is following but no information about the type of the type
1440
	 * @param text
1441
	 * @return
1442
	 */
1443
	protected boolean charIsSimpleType(String text) {
1444
		return text.matches("(?i)Type:");
1445
	}
1446

    
1447
	protected String getXmlTag(XMLEvent event) {
1448
		String result;
1449
		if (event.isStartElement()) {
1450
			result = "<" + event.asStartElement().getName().getLocalPart()
1451
					+ ">";
1452
		} else if (event.isEndElement()) {
1453
			result = "</" + event.asEndElement().getName().getLocalPart() + ">";
1454
		} else {
1455
			String message = "Only start or end elements are allowed as Html tags";
1456
			throw new IllegalStateException(message);
1457
		}
1458
		return result;
1459
	}
1460

    
1461
	protected WriterDataHolder handleWriter(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1462
		String text = "";
1463
		checkNoAttributes(parentEvent);
1464
		WriterDataHolder dataHolder = new WriterDataHolder();
1465
		List<FootnoteDataHolder> footnotes = new ArrayList<>();
1466

    
1467
		// TODO handle attributes
1468
		while (reader.hasNext()) {
1469
			XMLEvent next = readNoWhitespace(reader);
1470
			if (isMyEndingElement(next, parentEvent)) {
1471
				text = CdmUtils.removeBrackets(text);
1472
				if (checkMandatoryText(text, parentEvent)) {
1473
					text = normalize(text);
1474
					dataHolder.writer = text;
1475
					dataHolder.footnotes = footnotes;
1476

    
1477
					// Extension
1478
					UUID uuidWriterExtension = MarkupTransformer.uuidWriterExtension;
1479
					ExtensionType writerExtensionType =
1480
							this.getExtensionType(state, uuidWriterExtension,"Writer", "writer", "writer");
1481
					Extension extension = Extension.NewInstance();
1482
					extension.setType(writerExtensionType);
1483
					extension.setValue(text);
1484
					dataHolder.extension = extension;
1485

    
1486
					// Annotation
1487
					UUID uuidWriterAnnotation = MarkupTransformer.uuidWriterAnnotation;
1488
					AnnotationType writerAnnotationType = this.getAnnotationType(state, uuidWriterAnnotation, "Writer", "writer", "writer", null);
1489
					Annotation annotation = Annotation.NewInstance(text, writerAnnotationType, getDefaultLanguage(state));
1490
					dataHolder.annotation = annotation;
1491

    
1492
					return dataHolder;
1493
				} else {
1494
					return null;
1495
				}
1496
			} else if (isStartingElement(next, FOOTNOTE_REF)) {
1497
				FootnoteDataHolder footNote = handleFootnoteRef(state, reader, next);
1498
				if (footNote.isRef()) {
1499
					footnotes.add(footNote);
1500
				} else {
1501
					logger.warn("Non ref footnotes not yet impelemnted");
1502
				}
1503
			} else if (next.isCharacters()) {
1504
				text += next.asCharacters().getData();
1505

    
1506
			} else {
1507
				handleUnexpectedElement(next);
1508
				state.setUnsuccessfull();
1509
			}
1510
		}
1511
		throw new IllegalStateException("<writer> has no end tag");
1512
	}
1513

    
1514

    
1515
	protected void registerFootnotes(MarkupImportState state, AnnotatableEntity entity, List<FootnoteDataHolder> footnotes) {
1516
		for (FootnoteDataHolder footNote : footnotes) {
1517
			registerFootnoteDemand(state, entity, footNote);
1518
		}
1519
	}
1520

    
1521

    
1522
	private void registerFootnoteDemand(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
1523
		FootnoteDataHolder existingFootnote = state.getFootnote(footnote.ref);
1524
		if (existingFootnote != null) {
1525
			attachFootnote(state, entity, existingFootnote);
1526
		} else {
1527
			Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.ref);
1528
			if (demands == null) {
1529
				demands = new HashSet<>();
1530
				state.putFootnoteDemands(footnote.ref, demands);
1531
			}
1532
			demands.add(entity);
1533
		}
1534
	}
1535

    
1536

    
1537
	protected void attachFootnote(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
1538
		AnnotationType annotationType = this.getAnnotationType(state, MarkupTransformer.uuidFootnote, "Footnote", "An e-flora footnote", "fn", null);
1539
		Annotation annotation = Annotation.NewInstance(footnote.string, annotationType, getDefaultLanguage(state));
1540
		// TODO transient objects
1541
		entity.addAnnotation(annotation);
1542
		save(entity, state);
1543
	}
1544

    
1545

    
1546
	protected void attachFigure(MarkupImportState state, XMLEvent next, AnnotatableEntity entity, Media figure) {
1547
		// IdentifiableEntity<?> toSave;
1548
		if (entity.isInstanceOf(TextData.class)) {
1549
			TextData deb = CdmBase.deproxy(entity, TextData.class);
1550
			deb.addMedia(figure);
1551
			// toSave = ((TaxonDescription)deb.getInDescription()).getTaxon();
1552
		} else if (entity.isInstanceOf(SpecimenOrObservationBase.class)) {
1553
			String message = "figures for specimen should be handled as Textdata";
1554
			fireWarningEvent(message, next, 4);
1555
			// toSave = ime;
1556
		} else if (entity.isInstanceOf(IdentifiableMediaEntity.class)) {
1557
			IdentifiableMediaEntity<?> ime = CdmBase.deproxy(entity, IdentifiableMediaEntity.class);
1558
			ime.addMedia(figure);
1559
			// toSave = ime;
1560
		} else {
1561
			String message = "Unsupported entity to attach media: %s";
1562
			message = String.format(message, entity.getClass().getName());
1563
			// toSave = null;
1564
		}
1565
		save(entity, state);
1566
	}
1567

    
1568

    
1569
	protected void registerGivenFootnote(MarkupImportState state, FootnoteDataHolder footnote) {
1570
		state.registerFootnote(footnote);
1571
		Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.id);
1572
		if (demands != null) {
1573
			for (AnnotatableEntity entity : demands) {
1574
				attachFootnote(state, entity, footnote);
1575
			}
1576
		}
1577
	}
1578

    
1579

    
1580
	protected FootnoteDataHolder handleFootnote(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
1581
			MarkupSpecimenImport specimenImport, MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1582
		FootnoteDataHolder result = new FootnoteDataHolder();
1583
		Map<String, Attribute> attributes = getAttributes(parentEvent);
1584
		result.id = getAndRemoveAttributeValue(attributes, ID);
1585
		// result.ref = getAndRemoveAttributeValue(attributes, REF);
1586
		checkNoAttributes(attributes, parentEvent);
1587

    
1588
		while (reader.hasNext()) {
1589
			XMLEvent next = readNoWhitespace(reader);
1590
			if (isStartingElement(next, FOOTNOTE_STRING)) {
1591
				String string = handleFootnoteString(state, reader, next, specimenImport, nomenclatureImport);
1592
				result.string = string;
1593
			} else if (isMyEndingElement(next, parentEvent)) {
1594
				return result;
1595
			} else {
1596
				fireUnexpectedEvent(next, 0);
1597
			}
1598
		}
1599
		return result;
1600
	}
1601

    
1602

    
1603
	protected Media handleFigure(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
1604
			MarkupSpecimenImport specimenImport, MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1605
		// FigureDataHolder result = new FigureDataHolder();
1606

    
1607
		Map<String, Attribute> attributes = getAttributes(parentEvent);
1608
		String id = getAndRemoveAttributeValue(attributes, ID);
1609
		String type = getAndRemoveAttributeValue(attributes, TYPE);
1610
		String urlAttr = getAndRemoveAttributeValue(attributes, URL);
1611
		checkNoAttributes(attributes, parentEvent);
1612

    
1613
		String urlString = null;
1614
		String legendString = null;
1615
		String titleString = null;
1616
		String numString = null;
1617
		String text = null;
1618
		if (isNotBlank(urlAttr)){
1619
			urlString = CdmUtils.Nz(state.getBaseMediaUrl()) + urlAttr;
1620
		}
1621
		while (reader.hasNext()) {
1622
			XMLEvent next = readNoWhitespace(reader);
1623
			if (isMyEndingElement(next, parentEvent)) {
1624
				if (isNotBlank(text)){
1625
				    if (isNeglectableFigureText(text)){
1626
				        fireWarningEvent("Text not yet handled for figures: " + text, next, 4);
1627
				    }
1628
				}
1629
				Media media = makeFigure(state, id, type, urlString, legendString, titleString, numString, next);
1630
				return media;
1631
			} else if (isStartingElement(next, FIGURE_LEGEND)) {
1632
				// TODO same as figure string ?
1633
				legendString = handleFootnoteString(state, reader, next, specimenImport, nomenclatureImport);
1634
			} else if (isStartingElement(next, FIGURE_TITLE)) {
1635
				titleString = getCData(state, reader, next);
1636
			} else if (isStartingElement(next, URL)) {
1637
				String localUrl = getCData(state, reader, next);
1638
				String url = CdmUtils.Nz(state.getBaseMediaUrl()) + localUrl;
1639
				if (isBlank(urlString)){
1640
					urlString = url;
1641
				}
1642
				if (! url.equals(urlString)){
1643
					String message = "URL attribute and URL element differ. Attribute: %s, Element: %s";
1644
					fireWarningEvent(String.format(message, urlString, url), next, 2);
1645
				}
1646
			} else if (isStartingElement(next, NUM)) {
1647
				numString = getCData(state, reader, next);
1648
			} else if (next.isCharacters()) {
1649
				text = CdmUtils.concat("", text, next.asCharacters().getData());
1650
			} else {
1651
				fireUnexpectedEvent(next, 0);
1652
			}
1653
		}
1654
		throw new IllegalStateException("<figure> has no end tag");
1655
	}
1656

    
1657

    
1658
	/**
1659
     * @param text2
1660
     * @return
1661
     */
1662
    private boolean isNeglectableFigureText(String text) {
1663
        if (text.matches("Fig\\.*")){
1664
            return true;
1665
        }else{
1666
            return false;
1667
        }
1668
    }
1669

    
1670

    
1671
    /**
1672
	 * @param state
1673
	 * @param id
1674
	 * @param type
1675
	 * @param urlString
1676
	 * @param legendString
1677
	 * @param titleString
1678
	 * @param numString
1679
	 * @param next
1680
	 */
1681
	private Media makeFigure(MarkupImportState state, String id, String type, String urlString,
1682
			String legendString, String titleString, String numString, XMLEvent next) {
1683
		Media media = null;
1684
//		boolean isFigure = false;  //no difference between figure and media since v3.3
1685
		try {
1686
			//TODO maybe everything is a figure as it is all taken from a book
1687
			if ("lineart".equals(type)) {
1688
//				isFigure = true;
1689
//				media = Figure.NewInstance(url.toURI(), null, null,	null);
1690
			} else if (type == null || "photo".equals(type)
1691
					|| "signature".equals(type)
1692
					|| "others".equals(type)) {
1693
				//TODO
1694
			} else {
1695
				String message = "Unknown figure type '%s'";
1696
				message = String.format(message, type);
1697
				fireWarningEvent(message, next, 2);
1698
			}
1699
			media = docImport.getImageMedia(urlString, docImport.getReadMediaData());
1700

    
1701
			if (media != null){
1702
				// title
1703
				if (StringUtils.isNotBlank(titleString)) {
1704
					media.putTitle(getDefaultLanguage(state), titleString);
1705
				}
1706
				// legend
1707
				if (StringUtils.isNotBlank(legendString)) {
1708
					media.putDescription(getDefaultLanguage(state), legendString);
1709
				}
1710
				if (StringUtils.isNotBlank(numString)) {
1711
					// TODO use concrete source (e.g. DAPHNIPHYLLACEAE in FM
1712
					// vol.13)
1713
					Reference citation = state.getConfig().getSourceReference();
1714
					media.addSource(OriginalSourceType.Import, numString, "num", citation, null);
1715
					// TODO name used in source if available
1716
				}
1717
				// TODO which citation
1718
				if (StringUtils.isNotBlank(id)) {
1719
					media.addSource(OriginalSourceType.Import, id, null, state.getConfig().getSourceReference(), null);
1720
				} else {
1721
					String message = "Figure id should never be empty or null";
1722
					fireWarningEvent(message, next, 6);
1723
				}
1724

    
1725
				// text
1726
				// do nothing
1727
				registerGivenFigure(state, next, id, media);
1728

    
1729
			}else{
1730
				String message = "No media found: ";
1731
				fireWarningEvent(message, next, 4);
1732
			}
1733
		} catch (MalformedURLException e) {
1734
			String message = "Media uri has incorrect syntax: %s";
1735
			message = String.format(message, urlString);
1736
			fireWarningEvent(message, next, 4);
1737
//		} catch (URISyntaxException e) {
1738
//			String message = "Media uri has incorrect syntax: %s";
1739
//			message = String.format(message, urlString);
1740
//			fireWarningEvent(message, next, 4);
1741
		}
1742

    
1743
		return media;
1744
	}
1745

    
1746

    
1747
	private void registerGivenFigure(MarkupImportState state, XMLEvent next, String id, Media figure) {
1748
		state.registerFigure(id, figure);
1749
		Set<AnnotatableEntity> demands = state.getFigureDemands(id);
1750
		if (demands != null) {
1751
			for (AnnotatableEntity entity : demands) {
1752
				attachFigure(state, next, entity, figure);
1753
			}
1754
		}
1755
		save(figure, state);
1756
	}
1757

    
1758

    
1759
	private FootnoteDataHolder handleFootnoteRef(MarkupImportState state,
1760
			XMLEventReader reader, XMLEvent parentEvent)
1761
			throws XMLStreamException {
1762
		FootnoteDataHolder result = new FootnoteDataHolder();
1763
		Map<String, Attribute> attributes = getAttributes(parentEvent);
1764
		result.ref = getAndRemoveAttributeValue(attributes, REF);
1765
		checkNoAttributes(attributes, parentEvent);
1766

    
1767
		// text is not handled, needed only for debugging purposes
1768
		String text = "";
1769
		while (reader.hasNext()) {
1770
			XMLEvent next = readNoWhitespace(reader);
1771
			// if (isStartingElement(next, FOOTNOTE_STRING)){
1772
			// String string = handleFootnoteString(state, reader, next);
1773
			// result.string = string;
1774
			// }else
1775
			if (isMyEndingElement(next, parentEvent)) {
1776
				if (StringUtils.isNotBlank(text)){
1777
					fireWarningEvent("text is not empty but not handled during import", parentEvent, 4);
1778
				}
1779
				return result;
1780
			} else if (next.isCharacters() && unhandledElements.isEmpty()) {
1781
				text += next.asCharacters().getData();
1782
			} else if (isStartingElement(next, NUM)) {
1783
				//ignore numbering of footnotes as they are numbered differently in the CDM
1784
				handleIgnoreElement(next);
1785
			} else {
1786
				handleUnexpectedElement(next);
1787
			}
1788
		}
1789
		return result;
1790
	}
1791

    
1792

    
1793

    
1794
	private String handleFootnoteString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, MarkupSpecimenImport specimenImport, MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1795
		boolean isTextMode = true;
1796
		String text = "";
1797
		while (reader.hasNext()) {
1798
			XMLEvent next = readNoWhitespace(reader);
1799
			if (isMyEndingElement(next, parentEvent)) {
1800
				return text;
1801
			} else if (next.isEndElement()) {
1802
				if (isEndingElement(next, FULL_NAME)) {
1803
					popUnimplemented(next.asEndElement());
1804
				} else if (isEndingElement(next, BR)) {
1805
					isTextMode = true;
1806
				} else if (isHtml(next)) {
1807
					text += getXmlTag(next);
1808
				} else {
1809
					handleUnexpectedEndElement(next.asEndElement());
1810
				}
1811
			} else if (next.isStartElement()) {
1812
				if (isStartingElement(next, FULL_NAME)) {
1813
					handleNotYetImplementedElement(next);
1814
				} else if (isStartingElement(next, GATHERING)) {
1815
					text += specimenImport.handleInLineGathering(state, reader, next);
1816
				} else if (isStartingElement(next, REFERENCES)) {
1817
					text += " " + handleInLineReferences(state, reader, next, nomenclatureImport) + " ";
1818
				} else if (isStartingElement(next, BR)) {
1819
					text += "<br/>";
1820
					isTextMode = false;
1821
				} else if (isStartingElement(next, NOMENCLATURE)) {
1822
					handleNotYetImplementedElement(next);
1823
				} else if (isHtml(next)) {
1824
					text += getXmlTag(next);
1825
				} else {
1826
					handleUnexpectedStartElement(next.asStartElement());
1827
				}
1828
			} else if (next.isCharacters()) {
1829
				if (!isTextMode) {
1830
					String message = "footnoteString is not in text mode";
1831
					fireWarningEvent(message, next, 6);
1832
				} else {
1833
					text += next.asCharacters().getData().trim();
1834
					// getCData(state, reader, next); does not work as we have inner tags like <references>
1835
				}
1836
			} else {
1837
				handleUnexpectedEndElement(next.asEndElement());
1838
			}
1839
		}
1840
		throw new IllegalStateException("<footnoteString> has no closing tag");
1841

    
1842
	}
1843

    
1844
	private static final List<String> htmlList = Arrays.asList("sub", "sup",
1845
			"ol", "ul", "li", "i", "b", "table", "br","tr","td","th");
1846

    
1847
	protected boolean isHtml(XMLEvent event) {
1848
		if (event.isStartElement()) {
1849
			String tag = event.asStartElement().getName().getLocalPart();
1850
			return htmlList.contains(tag);
1851
		} else if (event.isEndElement()) {
1852
			String tag = event.asEndElement().getName().getLocalPart();
1853
			return htmlList.contains(tag);
1854
		} else {
1855
			return false;
1856
		}
1857

    
1858
	}
1859

    
1860

    
1861
	private String handleInLineReferences(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent,
1862
	        MarkupNomenclatureImport nomenclatureImport) throws XMLStreamException {
1863
		checkNoAttributes(parentEvent);
1864

    
1865
		boolean hasReference = false;
1866
		String text = "";
1867
		while (reader.hasNext()) {
1868
			XMLEvent next = readNoWhitespace(reader);
1869
			if (isMyEndingElement(next, parentEvent)) {
1870
				checkMandatoryElement(hasReference, parentEvent.asStartElement(), REFERENCE);
1871
				return text;
1872
			} else if (isStartingElement(next, REFERENCE)) {
1873
				text += handleInLineReference(state, reader, next, nomenclatureImport);
1874
				hasReference = true;
1875
			} else {
1876
				handleUnexpectedElement(next);
1877
			}
1878
		}
1879
		throw new IllegalStateException("<References> has no closing tag");
1880
	}
1881

    
1882
	private String handleInLineReference(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent, MarkupNomenclatureImport nomenclatureImport)throws XMLStreamException {
1883
		Reference reference = nomenclatureImport.handleReference(state, reader, parentEvent);
1884
		fireWarningEvent("Check correct usage of inline reference", parentEvent, 3);
1885
		IntextReference intext = IntextReference.NewInstance(reference, null, 0, 0);
1886
		save(reference, state);
1887
		return intext.toInlineString(reference.getTitleCache());
1888
	}
1889

    
1890
	protected class SubheadingResult{
1891
	    String text;
1892
	    StringReferences references;
1893
        List<IntextReference> inlineReferences;
1894
	}
1895

    
1896
	/**
1897
	 * Handle < string > .
1898
	 * @param state
1899
	 * @param reader
1900
	 * @param parentEvent
1901
	 * @param feature only needed for distributionLocalities
1902
	 * @return
1903
	 * @throws XMLStreamException
1904
	 */
1905
	protected Map<String, SubheadingResult> handleString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature feature)throws XMLStreamException {
1906
		// attributes
1907
		String classValue = getClassOnlyAttribute(parentEvent, false);
1908
		if (StringUtils.isNotBlank(classValue)) {
1909
			String message = "class attribute for <string> not yet implemented";
1910
			fireWarningEvent(message, parentEvent, 2);
1911
		}
1912
		boolean isHabitat = false;
1913

    
1914
		// subheadings
1915
		Map<String, SubheadingResult> subHeadingMap = new HashMap<>();
1916
		String currentSubheading = null;
1917

    
1918
		boolean isTextMode = true;
1919
		String text = "";
1920
		StringReferences currentReferences = null;
1921
		List<IntextReference> inlineReferences = new ArrayList<>();
1922
		boolean lastWasReference = false;
1923
		while (reader.hasNext()) {
1924
			XMLEvent next = readNoWhitespace(reader);
1925
			if (isMyEndingElement(next, parentEvent)) {
1926
				putCurrentSubheading(subHeadingMap, currentSubheading, text, currentReferences, inlineReferences);
1927
				return subHeadingMap;
1928
			}
1929
			//check if last event was reference
1930
			if (lastWasReference && !isStartingElement(next, BR) && !isEndingElement(next, BR)
1931
			        && !isStartingElement(next, SUB_HEADING)){
1932
			    for (LabeledReference labeledRef : currentReferences.content){
1933
			        if (labeledRef.ref != null){
1934
			            IntextReference intext = IntextReference.NewInstance(labeledRef.ref, null, 0, 0);
1935
			            inlineReferences.add(intext);
1936
			            text += intext.toInlineString(labeledRef.label);
1937
			        }else{
1938
			            text += labeledRef.label;
1939
			        }
1940
			    }
1941
			    lastWasReference = false;
1942
			}
1943
			if (isStartingElement(next, BR)) {
1944
				text += "<br/>";
1945
				isTextMode = false;
1946
			} else if (isEndingElement(next, BR)) {
1947
				isTextMode = true;
1948
			} else if (isHtml(next)) {
1949
				text += getXmlTag(next);
1950
			} else if (isStartingElement(next, SUB_HEADING)) {
1951
				text = putCurrentSubheading(subHeadingMap, currentSubheading, text, currentReferences, inlineReferences);
1952
				currentReferences = null;
1953
				inlineReferences = new ArrayList<>();
1954
				lastWasReference = false;
1955
				// TODO footnotes
1956
				currentSubheading = getCData(state, reader, next).trim();
1957
			} else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
1958
				if (feature != null && !feature.equals(Feature.DISTRIBUTION())) {
1959
					String message = "Distribution locality only allowed for feature of type 'distribution'";
1960
					fireWarningEvent(message, next, 4);
1961
				}
1962
				text += handleDistributionLocality(state, reader, next);
1963
			} else if (next.isCharacters()) {
1964
				if (! isTextMode) {
1965
					String message = "String is not in text mode";
1966
					fireWarningEvent(message, next, 6);
1967
				} else {
1968
					text += next.asCharacters().getData();
1969
				}
1970
			} else if (isStartingElement(next, HEADING)) {
1971
				//TODO
1972
				handleNotYetImplementedElement(next);
1973
			} else if (isStartingElement(next, VERNACULAR_NAMES)) {
1974
				//TODO
1975
				handleNotYetImplementedElement(next);
1976
			} else if (isStartingElement(next, QUOTE)) {
1977
				//TODO
1978
				handleNotYetImplementedElement(next);
1979
			} else if (isStartingElement(next, DEDICATION)) {
1980
				//TODO
1981
				handleNotYetImplementedElement(next);
1982
			} else if (isStartingElement(next, TAXONTYPE)) {
1983
				//TODO
1984
				handleNotYetImplementedElement(next);
1985
			} else if (isStartingElement(next, FULL_NAME)) {
1986
				//TODO
1987
				handleNotYetImplementedElement(next);
1988
			}else if (isStartingElement(next, REFERENCES)) {
1989
				if (currentReferences != null){
1990
				    fireWarningEvent("References do already exist", next, 2);
1991
				}
1992
			    currentReferences = handleStringReferences(state, reader, next);
1993
			    lastWasReference = true;
1994
			}else if (isStartingElement(next, REFERENCE)) {
1995
                //TODO
1996
                handleNotYetImplementedElement(next);
1997
            } else if (isStartingElement(next, GATHERING)) {
1998
				//TODO
1999
				handleNotYetImplementedElement(next);
2000
			} else if (isStartingElement(next, ANNOTATION)) {
2001
				//TODO  //TODO test handleSimpleAnnotation
2002
				handleNotYetImplementedElement(next);
2003
			} else if (isStartingElement(next, HABITAT)) {
2004
			    text += featureImport.handleHabitat(state, reader, next);
2005
			    isHabitat = true;
2006
			} else if (isStartingElement(next, FIGURE_REF)) {
2007
				//TODO
2008
				handleNotYetImplementedElement(next);
2009
			} else if (isStartingElement(next, FIGURE)) {
2010
				//TODO
2011
				handleNotYetImplementedElement(next);
2012
			} else if (isStartingElement(next, FOOTNOTE_REF)) {
2013
				//TODO
2014
				handleNotYetImplementedElement(next);
2015
			} else if (isStartingElement(next, FOOTNOTE)) {
2016
				//TODO
2017
				handleNotYetImplementedElement(next);
2018
			} else if (isStartingElement(next, WRITER)) {
2019
				//TODO
2020
				handleNotYetImplementedElement(next);
2021
			} else if (isStartingElement(next, DATES)) {
2022
				//TODO
2023
				handleNotYetImplementedElement(next);
2024
			} else if (isStartingElement(next, TO_KEY)) {
2025
			    handleNotYetImplementedElement(next);
2026
			} else {
2027
				handleUnexpectedElement(next);
2028
			}
2029
		}
2030
		throw new IllegalStateException("<String> has no closing tag");
2031
	}
2032

    
2033

    
2034
	/**
2035
	 * container class more or less representing a list of labeled references
2036
	 */
2037
	protected class StringReferences{
2038
	    String subheading;
2039
	    List<LabeledReference> content = new ArrayList<>() ; //either String or LabeledReference
2040
	    @Override
2041
        public String toString(){
2042
	        String result = null;
2043
	        for (LabeledReference labRef : content){
2044
	            result = CdmUtils.concat("", labRef.label);
2045
	        }
2046
	        return result;
2047
	    }
2048
        public List<LabeledReference> getReferences() {
2049
            List<LabeledReference> result = new ArrayList<>();
2050
            for (LabeledReference labRef : content){
2051
                if (labRef.ref != null){
2052
                    result.add(labRef);
2053
                }
2054
            }
2055
            return result;
2056
        }
2057
	}
2058

    
2059
	protected class LabeledReference{
2060
	    public LabeledReference(Reference ref, String detail, String label) {
2061
            this.ref = ref; this.detail = detail; this.label = label;
2062
        }
2063
	    protected Reference ref;  //if null, this LabeledReference represents only a string in between references
2064
	    protected String detail; //micro reference
2065
	    protected String label;
2066
	}
2067

    
2068
    private StringReferences handleStringReferences(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
2069
        checkNoAttributes(parentEvent);
2070
        StringReferences result = new StringReferences();
2071
        while (reader.hasNext()) {
2072
            XMLEvent next = readNoWhitespace(reader);
2073
            if (isMyEndingElement(next, parentEvent)) {
2074
                return result;
2075
            } else if (isStartingElement(next, SUB_HEADING)) {
2076
                String subheading = getCData(state, reader, next);
2077
                if (!subheading.matches("(References?|Literature):?")){
2078
                    fireWarningEvent("Subheading for references not recognized: " + subheading, next, 4);
2079
                }
2080
                result.subheading = subheading;
2081
            } else if (isStartingElement(next, REFERENCE)) {
2082
                handleInlineReference(state, reader, next, result);
2083
            } else {
2084
                handleUnexpectedElement(next);
2085
            }
2086
        }
2087
        throw new IllegalStateException("<References> has no closing tag");
2088
    }
2089

    
2090
    private void handleInlineReference(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
2091
            StringReferences result) throws XMLStreamException {
2092
        checkNoAttributes(parentEvent);
2093
        boolean hasRefPart = false;
2094
        Map<String, String> refMap = new HashMap<>();
2095
        String label = "";
2096
        while (reader.hasNext()) {
2097
            XMLEvent next = readNoWhitespace(reader);
2098
            if (isMyEndingElement(next, parentEvent)) {
2099
                checkMandatoryElement(hasRefPart, parentEvent.asStartElement(), REF_PART);
2100
                String details = refMap.get(DETAILS);
2101
//              String label = makeLabel(state, refMap, next);
2102
                Reference ref = createReference(state, refMap, next);
2103
                ref = state.getDeduplicationHelper().getExistingReference(ref);
2104

    
2105
                String label2 = ref.getTitleCache(); //TODO preliminary for debugging and testing
2106
                result.content.add(new LabeledReference(ref, details, label));
2107
                return;
2108
            } else if (isStartingElement(next, REF_PART)) {
2109
                String classValue = handleRefPart(state, reader, next, refMap);
2110
                String text = refMap.get(classValue);
2111
                if (classValue.equals(YEAR)){
2112
                    text = "("+text+")";
2113
                }
2114
                hasRefPart = true;
2115
                label = CdmUtils.concat(" ", label, text);
2116
            } else {
2117
                handleUnexpectedElement(next);
2118
            }
2119
        }
2120
        throw new IllegalStateException("<References> has no closing tag");
2121

    
2122
    }
2123

    
2124

    
2125
//    this is more or less a duplicate Nomenclature import, maybe merge later
2126
    private Reference createReference(MarkupImportState state,
2127
            Map<String, String> refMap, XMLEvent parentEvent) {
2128

    
2129
        Reference reference;
2130

    
2131
        String type = getAndRemoveMapKey(refMap, PUBTYPE);
2132
        String authorStr = getAndRemoveMapKey(refMap, AUTHOR);
2133
        String titleStr = getAndRemoveMapKey(refMap, PUBTITLE);
2134
        String titleCache = getAndRemoveMapKey(refMap, PUBFULLNAME);
2135
        String volume = getAndRemoveMapKey(refMap, VOLUME);
2136
        String edition = getAndRemoveMapKey(refMap, EDITION);
2137
        String editors = getAndRemoveMapKey(refMap, EDITORS);
2138
        String year = getAndRemoveMapKey(refMap, YEAR);
2139
        String pubName = getAndRemoveMapKey(refMap, PUBNAME);
2140
        String pages = getAndRemoveMapKey(refMap, PAGES);
2141
        String publication = getAndRemoveMapKey(refMap, PUBLOCATION);
2142
        String publisher = getAndRemoveMapKey(refMap, PUBLISHER);
2143
        String appendix = getAndRemoveMapKey(refMap, APPENDIX);
2144
        String issue = getAndRemoveMapKey(refMap, ISSUE);
2145

    
2146
        reference = handleNonCitationSpecific(state, type, authorStr, titleStr,
2147
                    titleCache, volume, issue, edition, editors, pubName, appendix, pages, parentEvent);
2148

    
2149
        //year
2150
        VerbatimTimePeriod timeperiod = TimePeriodParser.parseStringVerbatim(year);
2151
        if (reference.getType().equals(ReferenceType.BookSection)){
2152
            reference.getInBook().setDatePublished(timeperiod);
2153
        }
2154
        reference.setDatePublished(timeperiod);
2155

    
2156
        //Quickfix for these 2 attributes (publication, publisher) used in feature.references
2157
        Reference inRef = reference.getInReference() == null ? reference : reference.getInReference();
2158
        //publication
2159
        if (isNotBlank(publisher)){
2160
            inRef.setPublisher(publisher);
2161
        }
2162

    
2163
        //publisher
2164
        if (isNotBlank(publication)){
2165
            inRef.setPlacePublished(publication);
2166
        }
2167

    
2168
        // TODO
2169
        String[] unhandledList = new String[] { ALTERNATEPUBTITLE, NOTES, STATUS };
2170
        for (String unhandled : unhandledList) {
2171
            String value = getAndRemoveMapKey(refMap, unhandled);
2172
            if (isNotBlank(value)) {
2173
                this.handleNotYetImplementedAttributeValue(parentEvent, CLASS, unhandled);
2174
            }
2175
        }
2176

    
2177
        for (String key : refMap.keySet()) {
2178
            if (!DETAILS.equalsIgnoreCase(key)) {
2179
                this.fireUnexpectedAttributeValue(parentEvent, CLASS, key);
2180
            }
2181
        }
2182

    
2183
        return reference;
2184
    }
2185

    
2186

    
2187
    /**
2188
     * Create reference for non nomenclatural references
2189
     * @return
2190
     */
2191
    protected Reference handleNonCitationSpecific(MarkupImportState state, String type, String authorStr,
2192
            String titleStr, String titleCache, String volume, String issue, String edition,
2193
            String editors, String pubName, String appendix, String pages, XMLEvent parentEvent) {
2194

    
2195
        Reference reference;
2196

    
2197
        //volume / issue
2198
        if (isBlank(volume) && isNotBlank(issue)){
2199
            String message = "Issue ('"+issue+"') exists but no volume";
2200
            fireWarningEvent(message, parentEvent, 4);
2201
            volume = issue;
2202
        }else if (isNotBlank(issue)){
2203
            volume = volume + "("+ issue + ")";
2204
        }
2205

    
2206
        //pubName / appendix
2207
        if (isNotBlank(appendix)){
2208
            pubName = pubName == null ?  appendix : (pubName + " " + appendix).replaceAll("  ", " ");
2209
        }
2210

    
2211
        if (isArticleNonCitation(type, pubName, volume, editors)) {
2212
            IArticle article = ReferenceFactory.newArticle();
2213
            if (pubName != null) {
2214
                IJournal journal = ReferenceFactory.newJournal();
2215
                journal.setTitle(pubName);
2216
                article.setInJournal(journal);
2217
            }else{
2218
                fireWarningEvent("Article has no journal", parentEvent, 4);
2219
            }
2220
            reference = (Reference) article;
2221
        } else {
2222
            if (isBookSection(type, authorStr, titleStr, editors, pubName, volume)){
2223
                IBookSection bookSection = ReferenceFactory.newBookSection();
2224
                if (pubName != null) {
2225
                    IBook book = ReferenceFactory.newBook();
2226
                    book.setTitle(pubName);
2227
                    bookSection.setInBook(book);
2228
                }
2229
                reference = (Reference)bookSection;
2230
            }else{
2231
                //??
2232
                Reference bookOrPartOf = ReferenceFactory.newGeneric();
2233
                if (pubName != null && titleStr != null) {
2234
                    Reference inReference = ReferenceFactory.newGeneric();
2235
                    inReference.setTitle(pubName);
2236
                    bookOrPartOf.setInReference(inReference);
2237
                }
2238
                reference = bookOrPartOf;
2239
            }
2240
        }
2241

    
2242
        //author
2243
        TeamOrPersonBase<?> author = createAuthor(state, authorStr);
2244
        reference.setAuthorship(author);
2245

    
2246
        //title
2247
        reference.setTitle(titleStr);
2248
        if (StringUtils.isNotBlank(titleCache)) {
2249
            reference.setTitleCache(titleCache, true);
2250
        }
2251

    
2252
        //edition
2253
        if(reference.getInReference() != null){
2254
            reference.getInReference().setEdition(edition);
2255
            reference.getInReference().setEditor(editors);
2256
        }else{
2257
            //edition
2258
            reference.setEdition(edition);
2259
            reference.setEditor(editors);
2260
        }
2261

    
2262
        //volume
2263
        reference.setVolume(volume);
2264

    
2265
        //pages
2266
        reference.setPages(pages);
2267

    
2268
        return reference;
2269
    }
2270

    
2271
    private boolean isBookSection(String type, String authorStr, String pubTitle,
2272
            String editors, String pubName, String volume) {
2273
        //type not yet handled
2274
        if (authorStr != null && editors != null
2275
                && pubTitle != null && pubName != null){
2276
            return true;
2277
        }else if (pubTitle != null && pubName != null && volume == null){
2278
            return true;
2279
        }else{
2280
            return false;
2281
        }
2282
    }
2283

    
2284

    
2285
    private boolean isArticleNonCitation(String type, String pubName, String volume, String editors) {
2286
        if ("journal".equalsIgnoreCase(type)){
2287
            return true;
2288
        }else if (volume != null && editors == null){
2289
            if (pubName != null && IJournal.guessIsJournalName(pubName)){
2290
                return true;
2291
            }else{
2292
                return false;  //unclear
2293
            }
2294
        }else{
2295
            return false;
2296
        }
2297
    }
2298

    
2299
    protected String handleRefPart(MarkupImportState state, XMLEventReader reader,
2300
            XMLEvent parentEvent, Map<String, String> refMap)
2301
            throws XMLStreamException {
2302
        String classValue = getClassOnlyAttribute(parentEvent);
2303

    
2304
        String text = "";
2305
        while (reader.hasNext()) {
2306
            XMLEvent next = readNoWhitespace(reader);
2307
            if (isMyEndingElement(next, parentEvent)) {
2308
                refMap.put(classValue, text);
2309
                return classValue;
2310
            } else if (next.isStartElement()) {
2311
                if (isStartingElement(next, ANNOTATION)) {
2312
                    handleNotYetImplementedElement(next); // TODO test handleSimpleAnnotation
2313
                } else if (isStartingElement(next, ITALICS)) {
2314
                    handleNotYetImplementedElement(next);
2315
                } else if (isStartingElement(next, BOLD)) {
2316
                    handleNotYetImplementedElement(next);
2317
                } else {
2318
                    handleUnexpectedStartElement(next.asStartElement());
2319
                }
2320
            } else if (next.isCharacters()) {
2321
                text += next.asCharacters().getData();
2322
            } else {
2323
                handleUnexpectedEndElement(next.asEndElement());
2324
            }
2325
        }
2326
        throw new IllegalStateException("RefPart has no closing tag");
2327
    }
2328

    
2329

    
2330
    private boolean isBlankOrPunctuation(String text) {
2331
        if (text == null){
2332
            return true;
2333
        } else {
2334
            return text.matches("^[\\s\\.,;:]*$");
2335
        }
2336
    }
2337

    
2338

    
2339
    /**
2340
     *Is heading an "habitat" type heading
2341
     * @param heading
2342
     * @return true if heading matches something like Eco(logy), Habitat(s) or Habitat & Ecology
2343
     */
2344
    private boolean isHabitatHeading(String heading) {
2345
        return heading.trim().matches("(Ecol(ogy)?|Habitat|Habitat\\s&\\sEcology)\\.?");
2346
    }
2347

    
2348

    
2349
	private String putCurrentSubheading(Map<String, SubheadingResult> subHeadingMap, String currentSubheading,
2350
	        String text, StringReferences fullReferences, List<IntextReference> inlineReferences) {
2351
		if (isNotBlank(text) || (fullReferences != null && isNotEmptyCollection(fullReferences.content))
2352
		        ||isNotEmptyCollection(inlineReferences)) {
2353
			SubheadingResult result = new SubheadingResult();
2354
			text = removeStartingMinus(text);
2355
			result.text = text.trim();
2356
			result.references = fullReferences == null ? new StringReferences() : fullReferences;
2357
			result.inlineReferences = inlineReferences;
2358
            subHeadingMap.put(currentSubheading, result);
2359
		}
2360
		return "";
2361
	}
2362

    
2363
	/**
2364
     * @param references2
2365
     * @return
2366
     */
2367
    protected boolean isNotEmptyCollection(Collection<?> list) {
2368
        return list != null && !list.isEmpty();
2369
    }
2370

    
2371

    
2372
    private String removeStartingMinus(String string) {
2373
		string = replaceStart(string, "-");
2374
		string = replaceStart(string, "\u002d");
2375
		string = replaceStart(string, "\u2013");
2376
		string = replaceStart(string, "\u2014");
2377
		string = replaceStart(string, "--");
2378
		return string;
2379
	}
2380

    
2381

    
2382
	/**
2383
	 * @param value
2384
	 * @param replacementString
2385
	 */
2386
	private String replaceStart(String value, String replacementString) {
2387
		if (value.startsWith(replacementString) ){
2388
			value = value.substring(replacementString.length()).trim();
2389
		}
2390
		while (value.startsWith("-") || value.startsWith("\u2014") ){
2391
			value = value.substring("-".length()).trim();
2392
		}
2393
		return value;
2394
	}
2395

    
2396

    
2397
	private String handleDistributionLocality(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
2398
		Map<String, Attribute> attributes = getAttributes(parentEvent);
2399
		String classValue = getAndRemoveRequiredAttributeValue(parentEvent, attributes, CLASS);
2400
		String statusValue =getAndRemoveAttributeValue(attributes, STATUS);
2401
		String frequencyValue =getAndRemoveAttributeValue(attributes, FREQUENCY);
2402

    
2403
		Taxon taxon = state.getCurrentTaxon();
2404
		// TODO which ref to take?
2405
		Reference sourceReference = state.getConfig().getSourceReference();
2406

    
2407
		String text = "";
2408
		while (reader.hasNext()) {
2409
			XMLEvent next = readNoWhitespace(reader);
2410
			if (isMyEndingElement(next, parentEvent)) {
2411
				if (StringUtils.isNotBlank(text)) {
2412
					String label = CdmUtils.removeTrailingDots(normalize(text));
2413
					TaxonDescription description = getExtractedMarkupMarkedDescription(state, taxon, sourceReference);
2414
					NamedAreaLevel level = makeNamedAreaLevel(state,classValue, next);
2415

    
2416
					//status
2417
					PresenceAbsenceTerm status = null;
2418
					if (isNotBlank(statusValue)){
2419
						try {
2420
							status = state.getTransformer().getPresenceTermByKey(statusValue);
2421
							if (status == null){
2422
							    UUID uuid = state.getTransformer().getPresenceTermUuid(statusValue);
2423
							    if (uuid != null){
2424
							        status = this.getPresenceAbsenceTerm(state, uuid, statusValue, statusValue, statusValue, false, null);
2425
							    }
2426
							}
2427
							if (status == null){
2428
								//TODO
2429
								String message = "The presence/absence status '%s' could not be transformed to an CDM status";
2430
								fireWarningEvent(String.format(message, statusValue), next, 4);
2431
							}
2432
						} catch (UndefinedTransformerMethodException e) {
2433
							throw new RuntimeException(e);
2434
						}
2435
					}else{
2436
						status = PresenceAbsenceTerm.PRESENT();
2437
					}
2438
					//frequency
2439
					if (isNotBlank(frequencyValue)){
2440
						if (frequencyValue.equalsIgnoreCase("absent") && PresenceAbsenceTerm.PRESENT().equals(status)){ //to be on the safe side that not real status has been defined yet.
2441
						    status = PresenceAbsenceTerm.ABSENT();
2442
						}else{
2443
						    String message = "The frequency attribute is currently not yet available in CDM";
2444
						    fireWarningEvent(message, parentEvent, 6);
2445
						}
2446
					}
2447

    
2448
					NamedArea higherArea = null;
2449
					List<NamedArea> areas = new ArrayList<>();
2450

    
2451
					String patSingleArea = "([^,\\(]{3,})";
2452
					String patSeparator = "(,|\\sand\\s)";
2453
					String hierarchiePattern = String.format("%s\\((%s(%s%s)*)\\)", patSingleArea, patSingleArea, patSeparator, patSingleArea);
2454
					Pattern patHierarchie = Pattern.compile(hierarchiePattern, Pattern.CASE_INSENSITIVE);
2455
					Matcher matcher = patHierarchie.matcher(label);
2456
					if (matcher.matches()){
2457
						String higherAreaStr = matcher.group(1).trim();
2458
						higherArea =  makeArea(state, higherAreaStr, level);
2459
						String[] innerAreas = matcher.group(2).split(patSeparator);
2460
						for (String innerArea : innerAreas){
2461
							if (isNotBlank(innerArea)){
2462
								NamedArea singleArea = makeArea(state, innerArea.trim(), level);
2463
								areas.add(singleArea);
2464
								NamedArea partOf = singleArea.getPartOf();
2465
//								if (partOf == null){
2466
//									singleArea.setPartOf(higherArea);
2467
//								}
2468
							}
2469
						}
2470
					}else{
2471
						NamedArea singleArea = makeArea(state, label, level);
2472
						areas.add(singleArea);
2473
					}
2474

    
2475
					for (NamedArea area : areas){
2476
						//create distribution
2477
						Distribution distribution = Distribution.NewInstance(area,status);
2478
						distribution.addPrimaryTaxonomicSource(sourceReference);
2479
						description.addElement(distribution);
2480
					}
2481
				} else {
2482
					String message = "Empty distribution locality";
2483
					fireWarningEvent(message, next, 4);
2484
				}
2485
				return text;
2486
			} else if (isStartingElement(next, COORDINATES)) {
2487
				//TODO
2488
				handleNotYetImplementedElement(next);
2489
			} else if (isEndingElement(next, COORDINATES)) {
2490
				//TODO
2491
				popUnimplemented(next.asEndElement());
2492
			} else if (next.isCharacters()) {
2493
				text += next.asCharacters().getData();
2494
			} else {
2495
				handleUnexpectedElement(next);
2496
			}
2497
		}
2498
		throw new IllegalStateException("<DistributionLocality> has no closing tag");
2499
	}
2500

    
2501
	   /**
2502
     * @param state
2503
     * @param taxon
2504
     * @param ref
2505
     * @return
2506
     */
2507
    protected TaxonDescription getExtractedMarkupMarkedDescription(MarkupImportState state, Taxon taxon, Reference sourceReference) {
2508
        MarkerType markerType = getMarkerType(
2509
                state,
2510
                MarkupTransformer.uuidMarkerExtractedMarkupData,
2511
                "Extracted factual data", "Marker type for factual data imported from markup where the markup for this data was included in parent markup that was also imported including the text from this markup.",
2512
                "Extr. data",
2513
                null);
2514
        String title = "Extracted markup data for " + taxon.getName().getTitleCache();
2515
        TaxonDescription description = getMarkedTaxonDescription(taxon, markerType, false, true, sourceReference, title);
2516
        return description;
2517
    }
2518

    
2519
}
(9-9/19)