00af3a51f46b9a813f060341d349bca1a415ac52
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupDocumentImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.markup;
11
12 import java.net.MalformedURLException;
13 import java.net.URL;
14 import java.util.ArrayList;
15 import java.util.Arrays;
16 import java.util.HashMap;
17 import java.util.HashSet;
18 import java.util.LinkedList;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.Queue;
22 import java.util.Set;
23 import java.util.UUID;
24 import java.util.regex.Matcher;
25 import java.util.regex.Pattern;
26
27 import javax.xml.stream.FactoryConfigurationError;
28 import javax.xml.stream.Location;
29 import javax.xml.stream.XMLEventReader;
30 import javax.xml.stream.XMLStreamException;
31 import javax.xml.stream.events.Attribute;
32 import javax.xml.stream.events.StartElement;
33 import javax.xml.stream.events.XMLEvent;
34
35 import org.apache.commons.lang.StringUtils;
36 import org.apache.log4j.Logger;
37 import org.springframework.beans.factory.annotation.Autowired;
38 import org.springframework.security.access.PermissionEvaluator;
39 import org.springframework.security.authentication.AuthenticationManager;
40 import org.springframework.security.core.Authentication;
41 import org.springframework.stereotype.Component;
42 import org.springframework.transaction.TransactionStatus;
43
44 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
45 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade.DerivedUnitType;
46 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacadeCacheStrategy;
47 import eu.etaxonomy.cdm.common.CdmUtils;
48 import eu.etaxonomy.cdm.ext.geo.GeoServiceArea;
49 import eu.etaxonomy.cdm.io.common.ICdmIO;
50 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
51 import eu.etaxonomy.cdm.io.markup.UnmatchedLeads.UnmatchedLeadsKey;
52 import eu.etaxonomy.cdm.model.agent.AgentBase;
53 import eu.etaxonomy.cdm.model.agent.INomenclaturalAuthor;
54 import eu.etaxonomy.cdm.model.agent.Team;
55 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
56 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
57 import eu.etaxonomy.cdm.model.common.Annotation;
58 import eu.etaxonomy.cdm.model.common.AnnotationType;
59 import eu.etaxonomy.cdm.model.common.CdmBase;
60 import eu.etaxonomy.cdm.model.common.Extension;
61 import eu.etaxonomy.cdm.model.common.ExtensionType;
62 import eu.etaxonomy.cdm.model.common.Language;
63 import eu.etaxonomy.cdm.model.common.TermVocabulary;
64 import eu.etaxonomy.cdm.model.common.TimePeriod;
65 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
66 import eu.etaxonomy.cdm.model.description.Distribution;
67 import eu.etaxonomy.cdm.model.description.Feature;
68 import eu.etaxonomy.cdm.model.description.KeyStatement;
69 import eu.etaxonomy.cdm.model.description.PolytomousKey;
70 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
71 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
72 import eu.etaxonomy.cdm.model.description.PresenceTerm;
73 import eu.etaxonomy.cdm.model.description.TaxonDescription;
74 import eu.etaxonomy.cdm.model.description.TextData;
75 import eu.etaxonomy.cdm.model.location.NamedArea;
76 import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
77 import eu.etaxonomy.cdm.model.location.NamedAreaType;
78 import eu.etaxonomy.cdm.model.media.IdentifiableMediaEntity;
79 import eu.etaxonomy.cdm.model.media.Media;
80 import eu.etaxonomy.cdm.model.name.CultivarPlantName;
81 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
82 import eu.etaxonomy.cdm.model.name.NameTypeDesignationStatus;
83 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
84 import eu.etaxonomy.cdm.model.name.NomenclaturalStatus;
85 import eu.etaxonomy.cdm.model.name.NomenclaturalStatusType;
86 import eu.etaxonomy.cdm.model.name.NonViralName;
87 import eu.etaxonomy.cdm.model.name.Rank;
88 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
89 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
90 import eu.etaxonomy.cdm.model.occurrence.Collection;
91 import eu.etaxonomy.cdm.model.occurrence.DerivedUnitBase;
92 import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
93 import eu.etaxonomy.cdm.model.occurrence.Specimen;
94 import eu.etaxonomy.cdm.model.reference.IArticle;
95 import eu.etaxonomy.cdm.model.reference.IJournal;
96 import eu.etaxonomy.cdm.model.reference.Reference;
97 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
98 import eu.etaxonomy.cdm.model.reference.ReferenceType;
99 import eu.etaxonomy.cdm.model.taxon.Classification;
100 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
101 import eu.etaxonomy.cdm.model.taxon.Taxon;
102 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
103 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
104 import eu.etaxonomy.cdm.strategy.parser.NameTypeParser;
105 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
106 import eu.etaxonomy.cdm.strategy.parser.SpecimenTypeParser;
107 import eu.etaxonomy.cdm.strategy.parser.SpecimenTypeParser.TypeInfo;
108
109 /**
110 * @author a.mueller
111 *
112 */
113 @Component
114 public class MarkupDocumentImport extends MarkupImportBase implements ICdmIO<MarkupImportState> {
115 private static final String FREQUENCY = "frequency";
116
117 private static final String TAXONTYPE = "taxontype";
118
119 private static final String DEDICATION = "dedication";
120
121 private static final String QUOTE = "quote";
122
123 private static final Logger logger = Logger.getLogger(MarkupDocumentImport.class);
124
125 private static final boolean CREATE_NEW = true;
126 private static final boolean IS_IMAGE_GALLERY = true;
127 private static final boolean NO_IMAGE_GALLERY = false;
128
129 private static final String ACCEPTED = "accepted";
130 private static final String ACCEPTED_NAME = "acceptedName";
131 private static final String ADDENDA = "addenda";
132 private static final String ALTERNATEPUBTITLE = "alternatepubtitle";
133 private static final String ALTERNATIVE_COLLECTION_TYPE_STATUS = "alternativeCollectionTypeStatus";
134 private static final String ALTERNATIVE_COLLECTOR = "alternativeCollector";
135 private static final String ALTERNATIVE_FIELD_NUM = "alternativeFieldNum";
136 private static final String ALTITUDE = "altitude";
137 private static final String ANNOTATION = "annotation";
138 private static final String AUTHOR = "author";
139 private static final String BIBLIOGRAPHY = "bibliography";
140 private static final String BIOGRAPHIES = "biographies";
141 private static final String BOLD = "bold";
142 private static final String BR = "br";
143 private static final String CHAR = "char";
144 private static final String CITATION = "citation";
145 private static final String COLLECTION_TYPE_STATUS = "collectionTypeStatus";
146 private static final String COLLECTOR = "collector";
147 private static final String COORDINATES = "coordinates";
148 private static final String COUPLET = "couplet";
149 private static final String DATES = "dates";
150 private static final String DEFAULT_MEDIA_URL = "defaultMediaUrl";
151 private static final String DESTROYED = "destroyed";
152 private static final String DETAILS = "details";
153 private static final String DISTRIBUTION_LIST = "distributionList";
154 private static final String DISTRIBUTION_LOCALITY = "distributionLocality";
155 private static final String EDITION = "edition";
156 private static final String EDITORS = "editors";
157 private static final String FEATURE = "feature";
158 private static final String FIGURE = "figure";
159 private static final String FIGURE_LEGEND = "figureLegend";
160 private static final String FIGURE_PART = "figurePart";
161 private static final String FIGURE_REF = "figureRef";
162 private static final String FIGURE_TITLE = "figureTitle";
163 private static final String FOOTNOTE = "footnote";
164 private static final String FOOTNOTE_REF = "footnoteRef";
165 private static final String FOOTNOTE_STRING = "footnoteString";
166 private static final String FIELD_NUM = "fieldNum";
167 private static final String FULL_NAME = "fullName";
168 private static final String FULL_TYPE = "fullType";
169 private static final String GATHERING = "gathering";
170 private static final String HEADING = "heading";
171 private static final String HABITAT = "habitat";
172 private static final String HABITAT_LIST = "habitatList";
173 private static final String HOMONYM = "homonym";
174 private static final String HOMOTYPES = "homotypes";
175 private static final String ID = "id";
176 private static final String INFRANK = "infrank";
177 private static final String INFRAUT = "infraut";
178 private static final String INFRPARAUT = "infrparaut";
179 private static final String IS_SPOTCHARACTERS = "isSpotcharacters";
180 private static final String ISSUE = "issue";
181 private static final String ITALICS = "italics";
182 private static final String KEY = "key";
183 private static final String KEY_TITLE = "keyTitle";
184 private static final String KEYNOTES = "keynotes";
185 private static final String LIFE_CYCLE_PERIODS = "lifeCyclePeriods";
186 private static final String LOCALITY = "locality";
187 private static final String LOST = "lost";
188 private static final String META_DATA = "metaData";
189 private static final String NAME = "name";
190 private static final String NAME_TYPE = "nameType";
191 private static final String NOM = "nom";
192 private static final String NOMENCLATURE = "nomenclature";
193 private static final String NOT_FOUND = "notFound";
194 private static final String NOT_SEEN = "notSeen";
195 private static final String NOTES = "notes";
196 private static final String NUM = "num";
197 private static final String ORIGINAL_DETERMINATION = "originalDetermination";
198 private static final String PARAUT = "paraut";
199 private static final String PUBFULLNAME = "pubfullname";
200 private static final String PUBLICATION = "publication";
201 private static final String PUBNAME = "pubname";
202 private static final String PUBTITLE = "pubtitle";
203 private static final String PUBTYPE = "pubtype";
204 private static final String QUESTION = "question";
205 private static final String RANK = "rank";
206 private static final String REF = "ref";
207 private static final String REF_NUM = "refNum";
208 private static final String REF_PART = "refPart";
209 private static final String REFERENCE = "reference";
210 private static final String REFERENCES = "references";
211 private static final String TAXON = "taxon";
212 private static final String TAXONTITLE = "taxontitle";
213 private static final String TEXT = "text";
214 private static final String TEXT_SECTION = "textSection";
215 private static final String TO_COUPLET = "toCouplet";
216 private static final String TO_KEY = "toKey";
217 private static final String TO_TAXON = "toTaxon";
218 private static final String TYPE = "type";
219 private static final String TYPE_STATUS = "typeStatus";
220 private static final String TREATMENT = "treatment";
221 private static final String SERIALS_ABBREVIATIONS = "serialsAbbreviations";
222 private static final String SPECIMEN_TYPE = "specimenType";
223 private static final String STATUS = "status";
224 private static final String STRING = "string";
225 private static final String SUB_HEADING = "subHeading";
226 private static final String SUB_COLLECTION = "subCollection";
227 private static final String SYNONYM = "synonym";
228 private static final String UNKNOWN = "unknown";
229 private static final String URL = "url";
230 private static final String USAGE = "usage";
231 private static final String VOLUME = "volume";
232 private static final String WRITER = "writer";
233 private static final String YEAR = "year";
234
235 private NonViralNameParserImpl parser = new NonViralNameParserImpl();
236
237 // TODO make part of state, but state is renewed when invoking the import a
238 // second time
239 private UnmatchedLeads unmatchedLeads;
240
241 // TODO remove preliminary
242 @Autowired
243 private AuthenticationManager authenticationManager;
244 private Authentication authentication;
245 private PermissionEvaluator permissionEvaluator;
246
247 public MarkupDocumentImport() {
248 super();
249 System.out.println("TODO remove preliminary authentication");
250 // UsernamePasswordAuthenticationToken token = new
251 // UsernamePasswordAuthenticationToken("admin", "0000");
252 // authentication = authenticationManager.authenticate(token);
253 // SecurityContext context = SecurityContextHolder.getContext();
254 // context.setAuthentication(authentication);
255 // permissionEvaluator = new CdmPermissionEvaluator();
256 }
257
258 @Override
259 public boolean doCheck(MarkupImportState state) {
260 state.setCheck(true);
261 doInvoke(state);
262 state.setCheck(false);
263 return state.isSuccess();
264 }
265
266 @Override
267 public void doInvoke(MarkupImportState state) {
268 fireProgressEvent("Start import markup document", "Before start of document");
269
270 Queue<CdmBase> outputStream = new LinkedList<CdmBase>();
271
272 TransactionStatus tx = startTransaction();
273 // FIXME reset state
274 doAllTheOldOtherStuff(state);
275
276 // START
277 try {
278 // StAX
279 XMLEventReader reader = getStaxReader(state);
280 state.setReader(reader);
281 // start document
282 if (!validateStartOfDocument(reader)) {
283 state.setUnsuccessfull();
284 return;
285 }
286
287 // publication
288 String elName = PUBLICATION;
289 boolean hasPublication = false;
290
291 while (reader.hasNext()) {
292 XMLEvent nextEvent = reader.nextEvent();
293 if (isStartingElement(nextEvent, elName)) {
294 handlePublication(state, reader, nextEvent, elName);
295 hasPublication = true;
296 } else if (nextEvent.isEndDocument()) {
297 if (!hasPublication) {
298 String message = "No publication root element found";
299 fireWarningEvent(message, nextEvent, 8);
300 }
301 // done
302 } else {
303 fireSchemaConflictEventExpectedStartTag(elName, reader);
304 }
305 }
306 commitTransaction(tx);
307
308 // //SAX
309 // ImportHandlerBase handler= new PublicationHandler(this);
310 // parseSAX(state, handler);
311
312 } catch (FactoryConfigurationError e1) {
313 fireWarningEvent("Some error occurred while setting up xml factory. Data can't be imported", "Start", 16);
314 state.setUnsuccessfull();
315 } catch (XMLStreamException e1) {
316 fireWarningEvent("An XMLStreamException occurred while parsing. Data can't be imported", "Start", 16);
317 state.setUnsuccessfull();
318 // } catch (ParserConfigurationException e) {
319 // fireWarningEvent("A ParserConfigurationException occurred while parsing. Data can't be imported",
320 // "Start", 16);
321 // } catch (SAXException e) {
322 // fireWarningEvent("A SAXException occurred while parsing. Data can't be imported",
323 // "Start", 16);
324 // } catch (IOException e) {
325 // fireWarningEvent("An IO exception occurred while parsing. Data can't be imported",
326 // "Start", 16);
327
328 }
329
330 return;
331
332 }
333
334 private void handlePublication(MarkupImportState state,
335 XMLEventReader reader, XMLEvent currentEvent, String elName)
336 throws XMLStreamException {
337
338 // attributes
339 StartElement element = currentEvent.asStartElement();
340 Map<String, Attribute> attributes = getAttributes(element);
341 handleUnexpectedAttributes(element.getLocation(), attributes,
342 "noNamespaceSchemaLocation");
343
344 while (reader.hasNext()) {
345 XMLEvent event = readNoWhitespace(reader);
346 // TODO cardinality of alternative
347 if (event.isEndElement()) {
348 if (isEndingElement(event, elName)) {
349 return;
350 } else {
351 if (isEndingElement(event, BIOGRAPHIES)) {
352 // NOT YET IMPLEMENTED
353 popUnimplemented(event.asEndElement());
354 } else if (isEndingElement(event, REFERENCES)) {
355 // NOT YET IMPLEMENTED
356 popUnimplemented(event.asEndElement());
357 } else if (isEndingElement(event, TEXT_SECTION)) {
358 // NOT YET IMPLEMENTED
359 popUnimplemented(event.asEndElement());
360 } else if (isEndingElement(event, ADDENDA)) {
361 // NOT YET IMPLEMENTED
362 popUnimplemented(event.asEndElement());
363 } else {
364 handleUnexpectedElement(event);
365 }
366 }
367 } else if (event.isStartElement()) {
368 if (isStartingElement(event, META_DATA)) {
369 handleMetaData(state, reader, event);
370 } else if (isStartingElement(event, TREATMENT)) {
371 handleTreatment(state, reader, event);
372 } else if (isStartingElement(event, BIOGRAPHIES)) {
373 handleNotYetImplementedElement(event);
374 } else if (isStartingElement(event, REFERENCES)) {
375 handleNotYetImplementedElement(event);
376 } else if (isStartingElement(event, TEXT_SECTION)) {
377 handleNotYetImplementedElement(event);
378 } else if (isStartingElement(event, ADDENDA)) {
379 handleNotYetImplementedElement(event);
380 } else {
381 handleUnexpectedStartElement(event);
382 }
383 } else {
384 handleUnexpectedElement(event);
385 }
386 }
387 throw new IllegalStateException("Publication has no ending element");
388 }
389
390 private void handleMetaData(MarkupImportState state, XMLEventReader reader,
391 XMLEvent parentEvent) throws XMLStreamException {
392 checkNoAttributes(parentEvent);
393
394 while (reader.hasNext()) {
395 XMLEvent next = readNoWhitespace(reader);
396 if (isMyEndingElement(next, parentEvent)) {
397 return;
398 } else if (isStartingElement(next, DEFAULT_MEDIA_URL)) {
399 String baseUrl = getCData(state, reader, next);
400 try {
401 new URL(baseUrl);
402 state.setBaseMediaUrl(baseUrl);
403 } catch (MalformedURLException e) {
404 String message = "defaultMediaUrl '%s' is not a valid URL";
405 message = String.format(message, baseUrl);
406 fireWarningEvent(message, next, 8);
407 }
408 } else {
409 handleUnexpectedElement(next);
410 }
411 }
412 throw new IllegalStateException("MetaData has no ending element");
413
414 }
415
416 private void handleTreatment(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
417 checkNoAttributes(parentEvent);
418 Taxon lastTaxon = null;
419 while (reader.hasNext()) {
420 XMLEvent next = readNoWhitespace(reader);
421 if (isStartingElement(next, TAXON)) {
422 Taxon thisTaxon = handleTaxon(state, reader, next.asStartElement());
423 doTaxonRelation(state, thisTaxon, lastTaxon, parentEvent.getLocation());
424 lastTaxon = thisTaxon;
425 // TODO for imports spanning multiple documents ?? Still needed?
426 state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
427 } else if (isMyEndingElement(next, parentEvent)) {
428 Set<PolytomousKeyNode> keyNodesToSave = state.getPolytomousKeyNodesToSave();
429 //better save the key then the nodes
430 Set<PolytomousKey> keySet = new HashSet<PolytomousKey>();
431 for (PolytomousKeyNode node : keyNodesToSave){
432 PolytomousKey key = node.getKey();
433 keySet.add(key);
434 }
435 save(keySet, state);
436 //unmatched key leads
437 UnmatchedLeads unmatched = state.getUnmatchedLeads();
438 if (unmatched.size() > 0){
439 String message = "The following key leads are unmatched: %s";
440 message = String.format(message, state.getUnmatchedLeads().toString());
441 fireWarningEvent(message, next, 6);
442 }
443 // save(keyNodesToSave, state);
444
445 return;
446 } else {
447 handleUnexpectedElement(next);
448 }
449 }
450 return;
451 }
452
453 /**
454 * @param taxon
455 * @param lastTaxon
456 */
457 private void doTaxonRelation(MarkupImportState state, Taxon taxon,
458 Taxon lastTaxon, Location dataLocation) {
459
460 Classification tree = makeTree(state, dataLocation);
461 if (lastTaxon == null) {
462 tree.addChildTaxon(taxon, null, null, null);
463 return;
464 }
465 Rank thisRank = taxon.getName().getRank();
466 Rank lastRank = lastTaxon.getName().getRank();
467 if (lastTaxon.getTaxonNodes().size() > 0) {
468 TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
469 if (thisRank.isLower(lastRank)) {
470 lastNode.addChildTaxon(taxon, null, null, null);
471 fillMissingEpithetsForTaxa(lastTaxon, taxon);
472 } else if (thisRank.equals(lastRank)) {
473 TaxonNode parent = lastNode.getParent();
474 if (parent != null) {
475 parent.addChildTaxon(taxon, null, null, null);
476 fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
477 } else {
478 tree.addChildTaxon(taxon, null, null, null);
479 }
480 } else if (thisRank.isHigher(lastRank)) {
481 doTaxonRelation(state, taxon, lastNode.getParent().getTaxon(),
482 dataLocation);
483 // TaxonNode parentNode = handleTaxonRelation(state, taxon,
484 // lastNode.getParent().getTaxon());
485 // parentNode.addChildTaxon(taxon, null, null, null);
486 }
487 } else {
488
489 String message = "Last taxon has no node";
490 fireWarningEvent(message, makeLocationStr(dataLocation), 6);
491 }
492 }
493
494 /**
495 * @param state
496 * @param dataLocation
497 * @return
498 */
499 private Classification makeTree(MarkupImportState state, Location dataLocation) {
500 Classification result = state.getTree(null);
501 if (result == null) {
502 UUID uuid = state.getConfig().getClassificationUuid();
503 if (uuid == null) {
504 String message = "No classification uuid is defined";
505 fireWarningEvent(message, makeLocationStr(dataLocation), 6);
506 result = createNewClassification(state);
507 } else {
508 result = getClassificationService().find(uuid);
509 if (result == null) {
510 result = createNewClassification(state);
511 result.setUuid(uuid);
512 }
513 }
514 state.putTree(null, result);
515 }
516 save(result, state);
517 return result;
518 }
519
520 private Classification createNewClassification(MarkupImportState state) {
521 Classification result;
522 result = Classification.NewInstance(state.getConfig().getClassificationTitle());
523 state.putTree(null, result);
524 return result;
525 }
526
527 private Taxon handleTaxon(MarkupImportState state, XMLEventReader reader, StartElement parentEvent) throws XMLStreamException {
528 // TODO progress monitoring
529 Map<String, Attribute> attributes = getAttributes(parentEvent);
530 Taxon taxon = createTaxonAndName(state, attributes);
531 state.setCurrentTaxon(taxon);
532
533 boolean hasTitle = false;
534 boolean hasNomenclature = false;
535 String taxonTitle = null;
536
537 while (reader.hasNext()) {
538 XMLEvent next = readNoWhitespace(reader);
539 if (next.isEndElement()) {
540 if (isMyEndingElement(next, parentEvent)) {
541 checkMandatoryElement(hasTitle, parentEvent, TAXONTITLE);
542 checkMandatoryElement(hasNomenclature, parentEvent, NOMENCLATURE);
543 handleUnexpectedAttributes(parentEvent.getLocation(),attributes);
544
545 makeKeyNodes(state, parentEvent, taxonTitle);
546 state.setCurrentTaxon(null);
547 state.setCurrentTaxonNum(null);
548 save(taxon, state);
549 return taxon;
550 } else {
551 if (isEndingElement(next, HEADING)) {
552 // NOT YET IMPLEMENTED
553 popUnimplemented(next.asEndElement());
554 } else if (isEndingElement(next, TEXT_SECTION)) {
555 // NOT YET IMPLEMENTED
556 popUnimplemented(next.asEndElement());
557 } else if (isEndingElement(next, REFERENCES)) {
558 // NOT YET IMPLEMENTED
559 popUnimplemented(next.asEndElement());
560 } else {
561 handleUnexpectedEndElement(next.asEndElement());
562 }
563 }
564 } else if (next.isStartElement()) {
565 if (isStartingElement(next, HEADING)) {
566 handleNotYetImplementedElement(next);
567 } else if (isStartingElement(next, TAXONTITLE)) {
568 taxonTitle = handleTaxonTitle(state, reader, next);
569 hasTitle = true;
570 } else if (isStartingElement(next, WRITER)) {
571 makeKeyWriter(state, reader, taxon, taxonTitle, next);
572 } else if (isStartingElement(next, TEXT_SECTION)) {
573 handleNotYetImplementedElement(next);
574 } else if (isStartingElement(next, KEY)) {
575 handleKey(state, reader, next);
576 } else if (isStartingElement(next, NOMENCLATURE)) {
577 handleNomenclature(state, reader, next);
578 hasNomenclature = true;
579 } else if (isStartingElement(next, FEATURE)) {
580 handleFeature(state, reader, next);
581 } else if (isStartingElement(next, NOTES)) {
582 // TODO is this the correct way to handle notes?
583 String note = handleNotes(state, reader, next);
584
585 UUID notesUuid;
586 try {
587 notesUuid = state.getTransformer().getFeatureUuid(
588 "notes");
589 Feature feature = getFeature(state, notesUuid, "Notes",
590 "Notes", "note", null);
591 TextData textData = TextData.NewInstance(feature);
592 textData.putText(Language.DEFAULT(), note);
593 TaxonDescription description = getTaxonDescription(
594 taxon, false, true);
595 description.addElement(textData);
596 } catch (UndefinedTransformerMethodException e) {
597 String message = "getFeatureUuid method not yet implemented";
598 fireWarningEvent(message, next, 8);
599 }
600 } else if (isStartingElement(next, REFERENCES)) {
601 handleNotYetImplementedElement(next);
602 } else if (isStartingElement(next, FIGURE)) {
603 handleFigure(state, reader, next);
604 } else if (isStartingElement(next, FOOTNOTE)) {
605 FootnoteDataHolder footnote = handleFootnote(state, reader,
606 next);
607 if (footnote.isRef()) {
608 String message = "Ref footnote not implemented here";
609 fireWarningEvent(message, next, 4);
610 } else {
611 registerGivenFootnote(state, footnote);
612 }
613 } else {
614 handleUnexpectedStartElement(next);
615 }
616 } else {
617 handleUnexpectedElement(next);
618 }
619 }
620 throw new IllegalStateException("<Taxon> has no closing tag");
621 }
622
623 private void makeKeyNodes(MarkupImportState state, XMLEvent event, String taxonTitle) {
624 Taxon taxon = state.getCurrentTaxon();
625 String num = state.getCurrentTaxonNum();
626
627 String nameString = CdmBase.deproxy(taxon.getName(), NonViralName.class).getNameCache();
628 // String nameString = taxonTitle;
629
630 //try to find matching lead nodes
631 UnmatchedLeadsKey leadsKey = UnmatchedLeadsKey.NewInstance(num, nameString);
632 Set<PolytomousKeyNode> matchingNodes = handleMatchingNodes(state, taxon, leadsKey);
633
634 if (num != null){//same without using the num
635 UnmatchedLeadsKey noNumLeadsKey = UnmatchedLeadsKey.NewInstance("", nameString);
636 Set<PolytomousKeyNode> noNumMatchingNodes = handleMatchingNodes(state, taxon, noNumLeadsKey);
637 if(noNumMatchingNodes.size() > 0){
638 String message ="Taxon matches additional key node when not considering <num> attribute in taxontitle. This may be correct but may also indicate an error.";
639 fireWarningEvent(message, event, 1);
640 }
641 }
642 //report missing match, if num exists
643 if (matchingNodes.isEmpty() && num != null){
644 String message = "Taxon has <num> attribute in taxontitle but no matching key nodes exist: %s, Key: %s";
645 message = String.format(message, num, leadsKey.toString());
646 fireWarningEvent(message, event, 1);
647 }
648
649 }
650
651 private Set<PolytomousKeyNode> handleMatchingNodes(MarkupImportState state, Taxon taxon, UnmatchedLeadsKey leadsKey) {
652 Set<PolytomousKeyNode> matchingNodes = state.getUnmatchedLeads().getNodes(leadsKey);
653 for (PolytomousKeyNode matchingNode : matchingNodes){
654 state.getUnmatchedLeads().removeNode(leadsKey, matchingNode);
655 matchingNode.setTaxon(taxon);
656 state.getPolytomousKeyNodesToSave().add(matchingNode);
657 }
658 return matchingNodes;
659 }
660
661 private void handleKey(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
662 // attributes
663 Map<String, Attribute> attributes = getAttributes(parentEvent);
664 String isSpotcharacters = getAndRemoveAttributeValue(attributes, IS_SPOTCHARACTERS);
665 if (isNotBlank(isSpotcharacters) ) {
666 //TODO isSpotcharacters
667 String message = "Attribute isSpotcharacters not yet implemented for <key>";
668 fireWarningEvent(message, parentEvent, 4);
669 }
670
671 PolytomousKey key = PolytomousKey.NewInstance();
672 key.addTaxonomicScope(state.getCurrentTaxon());
673 state.setCurrentKey(key);
674
675 boolean isFirstCouplet = true;
676 while (reader.hasNext()) {
677 XMLEvent next = readNoWhitespace(reader);
678 if (isMyEndingElement(next, parentEvent)) {
679 save(key, state);
680 state.setCurrentKey(null);
681 return;
682 } else if (isEndingElement(next, KEYNOTES)){
683 popUnimplemented(next.asEndElement());
684 } else if (isStartingElement(next, KEY_TITLE)) {
685 handleKeyTitle(state, reader, next);
686 } else if (isStartingElement(next, KEYNOTES)) {
687 //TODO
688 handleNotYetImplementedElement(next);
689 } else if (isStartingElement(next, COUPLET)) {
690 PolytomousKeyNode node = null;
691 if (isFirstCouplet){
692 node = key.getRoot();
693 isFirstCouplet = false;
694 }
695 handleCouplet(state, reader, next, node);
696 } else {
697 handleUnexpectedElement(next);
698 }
699 }
700 throw new IllegalStateException("<key> has no closing tag");
701 }
702
703 /**
704 * @param state
705 * @param reader
706 * @param key
707 * @param next
708 * @throws XMLStreamException
709 */
710 private void handleKeyTitle(MarkupImportState state, XMLEventReader reader, XMLEvent next) throws XMLStreamException {
711 PolytomousKey key = state.getCurrentKey();
712 String keyTitle = getCData(state, reader, next);
713 String standardTitles = "(?i)(Key\\sto\\sthe\\s(genera|species|varieties|forms))";
714
715 if (isNotBlank(keyTitle) ){
716 if (!state.getConfig().isReplaceStandardKeyTitles() || ! keyTitle.matches(standardTitles)){
717 key.setTitleCache(keyTitle, true);
718 }
719 }
720 }
721
722 private void handleCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode parentNode) throws XMLStreamException {
723 String num = getOnlyAttribute(parentEvent, NUM, true);
724 List<PolytomousKeyNode> childList = new ArrayList<PolytomousKeyNode>();
725
726 while (reader.hasNext()) {
727 XMLEvent next = readNoWhitespace(reader);
728 if (isMyEndingElement(next, parentEvent)) {
729 completeCouplet(state, parentEvent, parentNode, num, childList);
730 return;
731 } else if (isStartingElement(next, QUESTION)) {
732 handleQuestion(state, reader, next, childList);
733 } else if (isStartingElement(next, KEYNOTES)) {
734 //TODO
735 handleNotYetImplementedElement(next);
736 } else if (isEndingElement(next, KEYNOTES)) {
737 //TODO
738 popUnimplemented(next.asEndElement());
739 } else {
740 handleUnexpectedElement(next);
741 }
742 }
743 throw new IllegalStateException("<couplet> has no closing tag");
744 }
745
746 /**
747 * @param state
748 * @param parentEvent
749 * @param parentNode
750 * @param num
751 * @param childList
752 */
753 private void completeCouplet(MarkupImportState state, XMLEvent parentEvent,
754 PolytomousKeyNode parentNode, String num,
755 List<PolytomousKeyNode> childList) {
756 if (parentNode != null){
757 for (PolytomousKeyNode childNode : childList){
758 parentNode.addChild(childNode);
759 }
760 }else if (isNotBlank(num)){
761 UnmatchedLeadsKey unmatchedKey = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
762 Set<PolytomousKeyNode> nodes = state.getUnmatchedLeads().getNodes(unmatchedKey);
763 for(PolytomousKeyNode nodeToMatch: nodes){
764 for (PolytomousKeyNode childNode : childList){
765 nodeToMatch.addChild(childNode);
766 }
767 state.getUnmatchedLeads().removeNode(unmatchedKey, nodeToMatch);
768 }
769 }else{
770 String message = "Parent num could not be matched. Please check if num (%s) is correct";
771 message = String.format(message, num);
772 fireWarningEvent(message, parentEvent, 6);
773 }
774 }
775
776 private void handleQuestion(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, List<PolytomousKeyNode> nodesList) throws XMLStreamException {
777 // attributes
778 Map<String, Attribute> attributes = getAttributes(parentEvent);
779 //needed only for data lineage
780 String questionNum = getAndRemoveRequiredAttributeValue(parentEvent, attributes, NUM);
781
782 PolytomousKeyNode myNode = PolytomousKeyNode.NewInstance();
783 myNode.setKey(state.getCurrentKey()); //to avoid NPE while computing num in PolytomousKeyNode in case this node is not matched correctly with a parent
784 nodesList.add(myNode);
785
786 while (reader.hasNext()) {
787 XMLEvent next = readNoWhitespace(reader);
788 if (isMyEndingElement(next, parentEvent)) {
789 return;
790 } else if (isStartingElement(next, TEXT)) {
791 String text = getCData(state, reader, next);
792 KeyStatement statement = KeyStatement.NewInstance(text);
793 myNode.setStatement(statement);
794 } else if (isStartingElement(next, COUPLET)) {
795 //TODO test
796 handleCouplet(state, reader, next, myNode);
797 } else if (isStartingElement(next, TO_COUPLET)) {
798 handleToCouplet(state, reader, next, myNode);
799 } else if (isStartingElement(next, TO_TAXON)) {
800 handleToTaxon(state, reader, next, myNode);
801
802 } else if (isStartingElement(next, TO_KEY)) {
803 //TODO
804 handleNotYetImplementedElement(next);
805 } else if (isEndingElement(next, TO_KEY)){
806 //TODO
807 popUnimplemented(next.asEndElement());
808 } else if (isStartingElement(next, KEYNOTES)) {
809 //TODO
810 handleNotYetImplementedElement(next);
811 } else if (isEndingElement(next, KEYNOTES)){
812 //TODO
813 popUnimplemented(next.asEndElement());
814 } else {
815 handleUnexpectedElement(next);
816 }
817 }
818 throw new IllegalStateException("<question> has no closing tag");
819 }
820
821 private void handleToCouplet(MarkupImportState state, XMLEventReader reader, XMLEvent next, PolytomousKeyNode node) throws XMLStreamException {
822 String num = getOnlyAttribute(next, NUM, true);
823 String cData = getCData(state, reader, next, false);
824 if (isNotBlank(cData) && ! cData.equals(num)){
825 String message = "CData ('%s') not handled in <toCouplet>";
826 message = String.format(message, cData);
827 fireWarningEvent(message, next, 4);
828 }
829 UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(state.getCurrentKey(), num);
830 state.getUnmatchedLeads().addKey(unmatched, node);
831 }
832
833 private void handleToTaxon(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, PolytomousKeyNode node) throws XMLStreamException {
834 Map<String, Attribute> attributes = getAttributes(parentEvent);
835 String num = getAndRemoveAttributeValue(attributes, NUM);
836 String taxonStr = getCData(state, reader, parentEvent, false);
837 //TODO ?
838 taxonStr = makeTaxonKey(taxonStr, state.getCurrentTaxon());
839 UnmatchedLeadsKey unmatched = UnmatchedLeadsKey.NewInstance(num, taxonStr);
840 state.getUnmatchedLeads().addKey(unmatched, node);
841 return;
842 }
843
844 private String makeTaxonKey(String strGoto, Taxon taxon) {
845 String result = "";
846 if (strGoto == null){
847 return "";
848 }
849
850 NonViralName<?> name = CdmBase.deproxy(taxon.getName(), NonViralName.class);
851 String strGenusName = name.getGenusOrUninomial();
852
853
854 strGoto = strGoto.replaceAll("\\([^\\(\\)]*\\)", ""); //replace all brackets
855 strGoto = strGoto.replaceAll("\\s+", " "); //replace multiple whitespaces by exactly one whitespace
856
857 strGoto = strGoto.trim();
858 String[] split = strGoto.split("\\s");
859 for (int i = 0; i<split.length; i++){
860 String single = split[i];
861 if (isGenusAbbrev(single, strGenusName)){
862 split[i] = strGenusName;
863 }
864 if (isInfraSpecificMarker(single)){
865 String strSpeciesEpi = name.getSpecificEpithet();
866 if (isBlank(result)){
867 result += strGenusName + " " + strSpeciesEpi;
868 }
869 }
870 result = (result + " " + split[i]).trim();
871 }
872 return result;
873 }
874
875
876 private boolean isInfraSpecificMarker(String single) {
877 try {
878 if (Rank.getRankByAbbreviation(single).isInfraSpecific()){
879 return true;
880 }else{
881 return false;
882 }
883 } catch (UnknownCdmTypeException e) {
884 return false;
885 }
886 }
887
888 private boolean isGenusAbbrev(String single, String strGenusName) {
889 if (! single.matches("[A-Z]\\.?")) {
890 return false;
891 }else if (single.length() == 0 || strGenusName == null || strGenusName.length() == 0){
892 return false;
893 }else{
894 return single.charAt(0) == strGenusName.charAt(0);
895 }
896 }
897
898 /**
899 * @param state
900 * @param reader
901 * @param taxon
902 * @param taxonTitle
903 * @param next
904 * @throws XMLStreamException
905 */
906 private void makeKeyWriter(MarkupImportState state, XMLEventReader reader, Taxon taxon, String taxonTitle, XMLEvent next) throws XMLStreamException {
907 WriterDataHolder writer = handleWriter(state, reader, next);
908 taxon.addExtension(writer.extension);
909 // TODO what if taxonTitle comes later
910 if (StringUtils.isNotBlank(taxonTitle)
911 && writer.extension != null) {
912 Reference<?> sec = ReferenceFactory.newBookSection();
913 sec.setTitle(taxonTitle);
914 TeamOrPersonBase<?> author = createAuthor(writer.writer);
915 sec.setAuthorTeam(author);
916 sec.setInReference(state.getConfig()
917 .getSourceReference());
918 taxon.setSec(sec);
919 registerFootnotes(state, sec, writer.footnotes);
920 } else {
921 String message = "No taxontitle exists for writer";
922 fireWarningEvent(message, next, 6);
923 }
924 }
925
926 private String handleNotes(MarkupImportState state, XMLEventReader reader,
927 XMLEvent parentEvent) throws XMLStreamException {
928 checkNoAttributes(parentEvent);
929
930 String text = "";
931 while (reader.hasNext()) {
932 XMLEvent next = readNoWhitespace(reader);
933 if (isMyEndingElement(next, parentEvent)) {
934 return text;
935 } else if (next.isEndElement()) {
936 if (isEndingElement(next, HEADING)) {
937 popUnimplemented(next.asEndElement());
938 } else if (isEndingElement(next, WRITER)) {
939 popUnimplemented(next.asEndElement());
940 } else if (isEndingElement(next, NUM)) {
941 popUnimplemented(next.asEndElement());
942 } else {
943 handleUnexpectedEndElement(next.asEndElement());
944 }
945 } else if (next.isStartElement()) {
946 if (isStartingElement(next, HEADING)) {
947 handleNotYetImplementedElement(next);
948 } else if (isStartingElement(next, SUB_HEADING)) {
949 String subheading = getCData(state, reader, next).trim();
950 if (! isNoteHeading(subheading)) {
951 fireNotYetImplementedElement(next.getLocation(), next.asStartElement().getName(), 0);
952 }
953 } else if (isStartingElement(next, WRITER)) {
954 handleNotYetImplementedElement(next);
955 } else if (isStartingElement(next, NUM)) {
956 handleNotYetImplementedElement(next);
957 } else if (isStartingElement(next, STRING)) {
958 // TODO why multiple strings in schema?
959 text = makeNotesString(state, reader, text, next);
960 } else {
961 handleUnexpectedStartElement(next.asStartElement());
962 }
963 } else {
964 handleUnexpectedElement(next);
965 }
966 }
967 throw new IllegalStateException("<Notes> has no closing tag");
968 }
969
970 /**
971 * @param state
972 * @param reader
973 * @param text
974 * @param next
975 * @return
976 * @throws XMLStreamException
977 */
978 private String makeNotesString(MarkupImportState state, XMLEventReader reader, String text, XMLEvent next) throws XMLStreamException {
979 Map<String, String> stringMap = handleString(state, reader, next, null);
980 if (stringMap.size() == 0){
981 String message = "No text available in <notes>";
982 fireWarningEvent(message, next, 4);
983 }else if (stringMap.size() > 1){
984 String message = "Subheadings not yet supported in <notes>";
985 fireWarningEvent(message, next, 4);
986 }else{
987 String firstSubheading = stringMap.keySet().iterator().next();
988 if ( firstSubheading != null && ! isNoteHeading (firstSubheading) ) {
989 String message = "Subheadings not yet supported in <notes>";
990 fireWarningEvent(message, next, 4);
991 }
992 }
993 for (String subheading : stringMap.keySet()){
994 text += subheading;
995 text += stringMap.get(subheading);
996 }
997 return text;
998 }
999
1000 private boolean isNoteHeading(String heading) {
1001 String excludePattern = "(i?)(Notes?):?";
1002 return heading.matches(excludePattern);
1003 }
1004
1005 /**
1006 * @param state
1007 * @param attributes
1008 */
1009 private Taxon createTaxonAndName(MarkupImportState state,
1010 Map<String, Attribute> attributes) {
1011 NonViralName<?> name;
1012 Rank rank = Rank.SPECIES(); // default
1013 boolean isCultivar = checkAndRemoveAttributeValue(attributes, CLASS,
1014 "cultivated");
1015 if (isCultivar) {
1016 name = CultivarPlantName.NewInstance(rank);
1017 } else {
1018 name = createNameByCode(state, rank);
1019 }
1020 Taxon taxon = Taxon.NewInstance(name, state.getConfig()
1021 .getSourceReference());
1022 if (checkAndRemoveAttributeValue(attributes, CLASS, "dubious")) {
1023 taxon.setDoubtful(true);
1024 } else if (checkAndRemoveAttributeValue(attributes, CLASS, "excluded")) {
1025 taxon.setExcluded(true);
1026 }
1027 // TODO insufficient, new, expected
1028 handleNotYetImplementedAttribute(attributes, CLASS);
1029 // From old version
1030 // MarkerType markerType = getMarkerType(state, attrValue);
1031 // if (markerType == null){
1032 // logger.warn("Class attribute value for taxon not yet supported: " +
1033 // attrValue);
1034 // }else{
1035 // taxon.addMarker(Marker.NewInstance(markerType, true));
1036 // }
1037
1038 // save(name, state);
1039 // save(taxon, state);
1040 return taxon;
1041 }
1042
1043 /**
1044 * @param state
1045 * @param rank
1046 * @return
1047 */
1048 private NonViralName<?> createNameByCode(MarkupImportState state, Rank rank) {
1049 NonViralName<?> name;
1050 NomenclaturalCode nc = makeNomenclaturalCode(state);
1051 name = (NonViralName<?>) nc.getNewTaxonNameInstance(rank);
1052 return name;
1053 }
1054
1055 /**
1056 * @param state
1057 * @return
1058 */
1059 private NomenclaturalCode makeNomenclaturalCode(MarkupImportState state) {
1060 NomenclaturalCode nc = state.getConfig().getNomenclaturalCode();
1061 if (nc == null) {
1062 nc = NomenclaturalCode.ICBN; // default;
1063 }
1064 return nc;
1065 }
1066
1067 private String handleTaxonTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1068 //attributes
1069 String text = "";
1070 Map<String, Attribute> attributes = getAttributes(parentEvent);
1071 String rankAttr = getAndRemoveAttributeValue(attributes, RANK);
1072 Rank rank = makeRank(state, rankAttr, false);
1073 String num = getAndRemoveAttributeValue(attributes, NUM);
1074 state.setCurrentTaxonNum(num);
1075 checkNoAttributes(attributes, parentEvent);
1076
1077 // TODO handle attributes
1078 while (reader.hasNext()) {
1079 XMLEvent next = readNoWhitespace(reader);
1080 if (next.isEndElement()) {
1081 if (isMyEndingElement(next, parentEvent)) {
1082 Taxon taxon = state.getCurrentTaxon();
1083 String titleText = null;
1084 if (checkMandatoryText(text, parentEvent)) {
1085 titleText = normalize(text);
1086 UUID uuidTitle = MarkupTransformer.uuidTaxonTitle;
1087 ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "Taxon Title ","taxon title", "title");
1088 taxon.addExtension(titleText, titleExtension);
1089 }
1090 taxon.getName().setRank(rank);
1091 // TODO check title exists
1092 return titleText;
1093 } else {
1094 if (isEndingElement(next, FOOTNOTE)) {
1095 // NOT YET IMPLEMENTED
1096 popUnimplemented(next.asEndElement());
1097 } else {
1098 handleUnexpectedEndElement(next.asEndElement());
1099 state.setUnsuccessfull();
1100 }
1101 }
1102 } else if (next.isStartElement()) {
1103 if (isStartingElement(next, FOOTNOTE)) {
1104 handleNotYetImplementedElement(next);
1105 } else {
1106 handleUnexpectedStartElement(next);
1107 state.setUnsuccessfull();
1108 }
1109 } else if (next.isCharacters()) {
1110 text += next.asCharacters().getData();
1111
1112 } else {
1113 handleUnexpectedElement(next);
1114 state.setUnsuccessfull();
1115 }
1116 }
1117 return null;
1118
1119 }
1120
1121 private WriterDataHolder handleWriter(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1122 String text = "";
1123 checkNoAttributes(parentEvent);
1124 WriterDataHolder dataHolder = new WriterDataHolder();
1125 List<FootnoteDataHolder> footnotes = new ArrayList<FootnoteDataHolder>();
1126
1127 // TODO handle attributes
1128 while (reader.hasNext()) {
1129 XMLEvent next = readNoWhitespace(reader);
1130 if (isMyEndingElement(next, parentEvent)) {
1131 text = CdmUtils.removeBrackets(text);
1132 if (checkMandatoryText(text, parentEvent)) {
1133 text = normalize(text);
1134 dataHolder.writer = text;
1135 dataHolder.footnotes = footnotes;
1136
1137 // Extension
1138 UUID uuidWriterExtension = MarkupTransformer.uuidWriterExtension;
1139 ExtensionType writerExtensionType = this
1140 .getExtensionType(state, uuidWriterExtension,
1141 "Writer", "writer", "writer");
1142 Extension extension = Extension.NewInstance();
1143 extension.setType(writerExtensionType);
1144 extension.setValue(text);
1145 dataHolder.extension = extension;
1146
1147 // Annotation
1148 UUID uuidWriterAnnotation = MarkupTransformer.uuidWriterAnnotation;
1149 AnnotationType writerAnnotationType = this.getAnnotationType(state, uuidWriterAnnotation, "Writer", "writer", "writer", null);
1150 Annotation annotation = Annotation.NewInstance(text, writerAnnotationType, Language.DEFAULT());
1151 dataHolder.annotation = annotation;
1152
1153 return dataHolder;
1154 } else {
1155 return null;
1156 }
1157 } else if (isStartingElement(next, FOOTNOTE_REF)) {
1158 FootnoteDataHolder footNote = handleFootnoteRef(state, reader, next);
1159 if (footNote.isRef()) {
1160 footnotes.add(footNote);
1161 } else {
1162 logger.warn("Non ref footnotes not yet impelemnted");
1163 }
1164 } else if (next.isCharacters()) {
1165 text += next.asCharacters().getData();
1166
1167 } else {
1168 handleUnexpectedElement(next);
1169 state.setUnsuccessfull();
1170 }
1171 }
1172 throw new IllegalStateException("<writer> has no end tag");
1173 }
1174
1175 private void registerFootnotes(MarkupImportState state, AnnotatableEntity entity, List<FootnoteDataHolder> footnotes) {
1176 for (FootnoteDataHolder footNote : footnotes) {
1177 registerFootnoteDemand(state, entity, footNote);
1178 }
1179 }
1180
1181 private void registerGivenFootnote(MarkupImportState state, FootnoteDataHolder footnote) {
1182 state.registerFootnote(footnote);
1183 Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.id);
1184 if (demands != null) {
1185 for (AnnotatableEntity entity : demands) {
1186 attachFootnote(state, entity, footnote);
1187 }
1188 }
1189 }
1190
1191 private void registerGivenFigure(MarkupImportState state, String id, Media figure) {
1192 state.registerFigure(id, figure);
1193 Set<AnnotatableEntity> demands = state.getFigureDemands(id);
1194 if (demands != null) {
1195 for (AnnotatableEntity entity : demands) {
1196 attachFigure(state, entity, figure);
1197 }
1198 }
1199 }
1200
1201 private void registerFootnoteDemand(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
1202 FootnoteDataHolder existingFootnote = state.getFootnote(footnote.ref);
1203 if (existingFootnote != null) {
1204 attachFootnote(state, entity, existingFootnote);
1205 } else {
1206 Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.ref);
1207 if (demands == null) {
1208 demands = new HashSet<AnnotatableEntity>();
1209 state.putFootnoteDemands(footnote.ref, demands);
1210 }
1211 demands.add(entity);
1212 }
1213 }
1214
1215 private void registerFigureDemand(MarkupImportState state, AnnotatableEntity entity, String figureRef) {
1216 Media existingFigure = state.getFigure(figureRef);
1217 if (existingFigure != null) {
1218 attachFigure(state, entity, existingFigure);
1219 } else {
1220 Set<AnnotatableEntity> demands = state.getFigureDemands(figureRef);
1221 if (demands == null) {
1222 demands = new HashSet<AnnotatableEntity>();
1223 state.putFigureDemands(figureRef, demands);
1224 }
1225 demands.add(entity);
1226 }
1227 }
1228
1229 private void attachFootnote(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
1230 AnnotationType annotationType = this.getAnnotationType(state, MarkupTransformer.uuidFootnote, "Footnote", "An e-flora footnote", "fn", null);
1231 Annotation annotation = Annotation.NewInstance(footnote.string,
1232 annotationType, Language.DEFAULT());
1233 // TODO transient objects
1234 entity.addAnnotation(annotation);
1235 save(entity, state);
1236 }
1237
1238 private void attachFigure(MarkupImportState state,
1239 AnnotatableEntity entity, Media figure) {
1240 // IdentifiableEntity<?> toSave;
1241 if (entity.isInstanceOf(TextData.class)) {
1242 TextData deb = CdmBase.deproxy(entity, TextData.class);
1243 deb.addMedia(figure);
1244 // toSave = ((TaxonDescription)deb.getInDescription()).getTaxon();
1245 } else if (entity.isInstanceOf(IdentifiableMediaEntity.class)) {
1246 IdentifiableMediaEntity<?> ime = CdmBase.deproxy(entity,
1247 IdentifiableMediaEntity.class);
1248 ime.addMedia(figure);
1249 // toSave = ime;
1250 } else {
1251 String message = "Unsupported entity to attach media: %s";
1252 message = String.format(message, entity.getClass().getName());
1253 // toSave = null;
1254 }
1255 save(entity, state);
1256 }
1257
1258 private void handleFigure(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1259 // FigureDataHolder result = new FigureDataHolder();
1260
1261 Map<String, Attribute> attributes = getAttributes(parentEvent);
1262 String id = getAndRemoveAttributeValue(attributes, ID);
1263 String type = getAndRemoveAttributeValue(attributes, TYPE);
1264 checkNoAttributes(attributes, parentEvent);
1265
1266 String urlString = null;
1267 String legendString = null;
1268 String titleString = null;
1269 String numString = null;
1270 String text = null;
1271 while (reader.hasNext()) {
1272 XMLEvent next = readNoWhitespace(reader);
1273 if (isMyEndingElement(next, parentEvent)) {
1274 makeFigure(state, id, type, urlString, legendString, titleString, numString, next);
1275 return;
1276 } else if (isStartingElement(next, FIGURE_LEGEND)) {
1277 // TODO same as figurestring ?
1278 legendString = handleFootnoteString(state, reader, next);
1279 } else if (isStartingElement(next, FIGURE_TITLE)) {
1280 titleString = getCData(state, reader, next);
1281 } else if (isStartingElement(next, URL)) {
1282 String localUrl = getCData(state, reader, next);
1283 urlString = CdmUtils.Nz(state.getBaseMediaUrl()) + localUrl;
1284 } else if (isStartingElement(next, NUM)) {
1285 numString = getCData(state, reader, next);
1286 } else if (next.isCharacters()) {
1287 text += next.asCharacters().getData();
1288 } else {
1289 fireUnexpectedEvent(next, 0);
1290 }
1291 }
1292 throw new IllegalStateException("<figure> has no end tag");
1293 }
1294
1295 /**
1296 * @param state
1297 * @param id
1298 * @param type
1299 * @param urlString
1300 * @param legendString
1301 * @param titleString
1302 * @param numString
1303 * @param next
1304 */
1305 private void makeFigure(MarkupImportState state, String id, String type, String urlString,
1306 String legendString, String titleString, String numString, XMLEvent next) {
1307 Media media = null;
1308 boolean isFigure = false;
1309 try {
1310 //TODO maybe everything is a figure as it is all taken from a book
1311 if ("lineart".equals(type)) {
1312 isFigure = true;
1313 // media = Figure.NewInstance(url.toURI(), null, null, null);
1314 } else if (type == null || "photo".equals(type)
1315 || "signature".equals(type)
1316 || "others".equals(type)) {
1317 } else {
1318 String message = "Unknown figure type '%s'";
1319 message = String.format(message, type);
1320 fireWarningEvent(message, next, 2);
1321 }
1322 media = getImageMedia(urlString, READ_MEDIA_DATA, isFigure);
1323
1324 if (media != null){
1325 // title
1326 if (StringUtils.isNotBlank(titleString)) {
1327 media.putTitle(Language.DEFAULT(), titleString);
1328 }
1329 // legend
1330 if (StringUtils.isNotBlank(legendString)) {
1331 media.addDescription(legendString, Language.DEFAULT());
1332 }
1333 if (StringUtils.isNotBlank(numString)) {
1334 // TODO use concrete source (e.g. DAPHNIPHYLLACEAE in FM
1335 // vol.13)
1336 Reference<?> citation = state.getConfig().getSourceReference();
1337 media.addSource(numString, "num", citation, null);
1338 // TODO name used in source if available
1339 }
1340 // TODO which citation
1341 if (StringUtils.isNotBlank(id)) {
1342 media.addSource(id, null, state.getConfig().getSourceReference(), null);
1343 } else {
1344 String message = "Figure id should never be empty or null";
1345 fireWarningEvent(message, next, 6);
1346 }
1347
1348 // text
1349 // do nothing
1350
1351 }
1352 } catch (MalformedURLException e) {
1353 String message = "Media uri has incorrect syntax: %s";
1354 message = String.format(message, urlString);
1355 fireWarningEvent(message, next, 4);
1356 // } catch (URISyntaxException e) {
1357 // String message = "Media uri has incorrect syntax: %s";
1358 // message = String.format(message, urlString);
1359 // fireWarningEvent(message, next, 4);
1360 }
1361
1362 registerGivenFigure(state, id, media);
1363 }
1364
1365 private FigureDataHolder handleFigureRef(MarkupImportState state,
1366 XMLEventReader reader, XMLEvent parentEvent)
1367 throws XMLStreamException {
1368 FigureDataHolder result = new FigureDataHolder();
1369 Map<String, Attribute> attributes = getAttributes(parentEvent);
1370 result.ref = getAndRemoveAttributeValue(attributes, REF);
1371 checkNoAttributes(attributes, parentEvent);
1372
1373 // text is not handled, needed only for debugging purposes
1374 String text = "";
1375 while (reader.hasNext()) {
1376 XMLEvent next = readNoWhitespace(reader);
1377 if (isMyEndingElement(next, parentEvent)) {
1378 return result;
1379 } else if (isStartingElement(next, NUM)) {
1380 String num = getCData(state, reader, next);
1381 result.num = num; // num is not handled during import
1382 } else if (isStartingElement(next, FIGURE_PART)) {
1383 result.figurePart = getCData(state, reader, next);
1384 } else if (next.isCharacters()) {
1385 text += next.asCharacters().getData();
1386 } else {
1387 fireUnexpectedEvent(next, 0);
1388 }
1389 }
1390 throw new IllegalStateException("<figureRef> has no end tag");
1391 }
1392
1393 private FootnoteDataHolder handleFootnote(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1394 FootnoteDataHolder result = new FootnoteDataHolder();
1395 Map<String, Attribute> attributes = getAttributes(parentEvent);
1396 result.id = getAndRemoveAttributeValue(attributes, ID);
1397 // result.ref = getAndRemoveAttributeValue(attributes, REF);
1398 checkNoAttributes(attributes, parentEvent);
1399
1400 while (reader.hasNext()) {
1401 XMLEvent next = readNoWhitespace(reader);
1402 if (isStartingElement(next, FOOTNOTE_STRING)) {
1403 String string = handleFootnoteString(state, reader, next);
1404 result.string = string;
1405 } else if (isMyEndingElement(next, parentEvent)) {
1406 return result;
1407 } else {
1408 fireUnexpectedEvent(next, 0);
1409 }
1410 }
1411 return result;
1412 }
1413
1414 private FootnoteDataHolder handleFootnoteRef(MarkupImportState state,
1415 XMLEventReader reader, XMLEvent parentEvent)
1416 throws XMLStreamException {
1417 FootnoteDataHolder result = new FootnoteDataHolder();
1418 Map<String, Attribute> attributes = getAttributes(parentEvent);
1419 result.ref = getAndRemoveAttributeValue(attributes, REF);
1420 checkNoAttributes(attributes, parentEvent);
1421
1422 // text is not handled, needed only for debugging purposes
1423 String text = "";
1424 while (reader.hasNext()) {
1425 XMLEvent next = readNoWhitespace(reader);
1426 // if (isStartingElement(next, FOOTNOTE_STRING)){
1427 // String string = handleFootnoteString(state, reader, next);
1428 // result.string = string;
1429 // }else
1430 if (isMyEndingElement(next, parentEvent)) {
1431 return result;
1432 } else if (next.isCharacters()) {
1433 text += next.asCharacters().getData();
1434
1435 } else {
1436 fireUnexpectedEvent(next, 0);
1437 }
1438 }
1439 return result;
1440 }
1441
1442 private void handleNomenclature(MarkupImportState state,
1443 XMLEventReader reader, XMLEvent parentEvent)
1444 throws XMLStreamException {
1445 checkNoAttributes(parentEvent);
1446
1447 while (reader.hasNext()) {
1448 XMLEvent next = readNoWhitespace(reader);
1449 if (isStartingElement(next, HOMOTYPES)) {
1450 handleHomotypes(state, reader, next.asStartElement());
1451 } else if (isMyEndingElement(next, parentEvent)) {
1452 return;
1453 } else {
1454 fireSchemaConflictEventExpectedStartTag(HOMOTYPES, reader);
1455 state.setUnsuccessfull();
1456 }
1457 }
1458 return;
1459 }
1460
1461 private String handleFootnoteString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1462 boolean isTextMode = true;
1463 String text = "";
1464 while (reader.hasNext()) {
1465 XMLEvent next = readNoWhitespace(reader);
1466 if (isMyEndingElement(next, parentEvent)) {
1467 return text;
1468 } else if (next.isEndElement()) {
1469 if (isEndingElement(next, FULL_NAME)) {
1470 popUnimplemented(next.asEndElement());
1471 } else if (isEndingElement(next, BR)) {
1472 isTextMode = true;
1473 } else if (isHtml(next)) {
1474 text += getXmlTag(next);
1475 } else {
1476 handleUnexpectedEndElement(next.asEndElement());
1477 }
1478 } else if (next.isStartElement()) {
1479 if (isStartingElement(next, FULL_NAME)) {
1480 handleNotYetImplementedElement(next);
1481 } else if (isStartingElement(next, GATHERING)) {
1482 text += handleInLineGathering(state, reader, next);
1483 } else if (isStartingElement(next, REFERENCES)) {
1484 text += " " + handleInLineReferences(state, reader, next)+ " ";
1485 } else if (isStartingElement(next, BR)) {
1486 text += "<br/>";
1487 isTextMode = false;
1488 } else if (isHtml(next)) {
1489 text += getXmlTag(next);
1490 } else {
1491 handleUnexpectedStartElement(next.asStartElement());
1492 }
1493 } else if (next.isCharacters()) {
1494 if (!isTextMode) {
1495 String message = "footnoteString is not in text mode";
1496 fireWarningEvent(message, next, 6);
1497 } else {
1498 text += next.asCharacters().getData().trim();
1499 // getCData(state, reader, next); does not work as we have inner tags like <references>
1500 }
1501 } else {
1502 handleUnexpectedEndElement(next.asEndElement());
1503 }
1504 }
1505 throw new IllegalStateException("<footnoteString> has no closing tag");
1506
1507 }
1508
1509 private String handleInLineGathering(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1510 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(DerivedUnitType.DerivedUnit.FieldObservation);
1511 handleGathering(state, reader, parentEvent, null, facade);
1512 FieldObservation fieldObservation = facade.innerFieldObservation();
1513 String result = "<cdm:specimen uuid='%s'>%s</specimen>";
1514 result = String.format(result, fieldObservation.getUuid(), fieldObservation.getTitleCache());
1515 save(fieldObservation, state);
1516 return result;
1517 }
1518
1519 private String handleInLineReferences(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1520 checkNoAttributes(parentEvent);
1521
1522 boolean hasReference = false;
1523 String text = "";
1524 while (reader.hasNext()) {
1525 XMLEvent next = readNoWhitespace(reader);
1526 if (isMyEndingElement(next, parentEvent)) {
1527 checkMandatoryElement(hasReference, parentEvent.asStartElement(), REFERENCE);
1528 return text;
1529 } else if (isStartingElement(next, REFERENCE)) {
1530 text += handleInLineReference(state, reader, next);
1531 hasReference = true;
1532 } else {
1533 handleUnexpectedElement(next);
1534 }
1535 }
1536 throw new IllegalStateException("<References> has no closing tag");
1537 }
1538
1539 private String handleInLineReference(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1540 Reference<?> reference = handleReference(state, reader, parentEvent);
1541 String result = "<cdm:ref uuid='%s'>%s</ref>";
1542 result = String.format(result, reference.getUuid(), reference.getTitleCache());
1543 save(reference, state);
1544 return result;
1545 }
1546
1547 private Reference<?> handleReference(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1548 checkNoAttributes(parentEvent);
1549
1550 boolean hasRefPart = false;
1551 Map<String, String> refMap = new HashMap<String, String>();
1552 while (reader.hasNext()) {
1553 XMLEvent next = readNoWhitespace(reader);
1554 if (isMyEndingElement(next, parentEvent)) {
1555 checkMandatoryElement(hasRefPart, parentEvent.asStartElement(),
1556 REF_PART);
1557 Reference<?> reference = createReference(state, refMap, next);
1558 return reference;
1559 } else if (isStartingElement(next, REF_PART)) {
1560 handleRefPart(state, reader, next, refMap);
1561 hasRefPart = true;
1562 } else {
1563 handleUnexpectedElement(next);
1564 }
1565 }
1566 // TODO handle missing end element
1567 throw new IllegalStateException("<Reference> has no closing tag");
1568 }
1569
1570 private void handleHomotypes(MarkupImportState state,
1571 XMLEventReader reader, StartElement parentEvent)
1572 throws XMLStreamException {
1573 checkNoAttributes(parentEvent);
1574
1575 HomotypicalGroup homotypicalGroup = null;
1576
1577 boolean hasNom = false;
1578 while (reader.hasNext()) {
1579 XMLEvent next = readNoWhitespace(reader);
1580 if (next.isEndElement()) {
1581 if (isMyEndingElement(next, parentEvent)) {
1582 checkMandatoryElement(hasNom, parentEvent, NOM);
1583 return;
1584 } else {
1585 if (isEndingElement(next, NAME_TYPE)) {
1586 state.setNameType(false);
1587 } else if (isEndingElement(next, NOTES)) {
1588 // NOT YET IMPLEMENTED
1589 popUnimplemented(next.asEndElement());
1590 } else {
1591 handleUnexpectedEndElement(next.asEndElement());
1592 }
1593 }
1594 } else if (next.isStartElement()) {
1595 if (isStartingElement(next, NOM)) {
1596 NonViralName<?> name = handleNom(state, reader, next,
1597 homotypicalGroup);
1598 homotypicalGroup = name.getHomotypicalGroup();
1599 hasNom = true;
1600 } else if (isStartingElement(next, NAME_TYPE)) {
1601 state.setNameType(true);
1602 handleNameType(state, reader, next, homotypicalGroup);
1603 } else if (isStartingElement(next, SPECIMEN_TYPE)) {
1604 handleSpecimenType(state, reader, next, homotypicalGroup);
1605 } else if (isStartingElement(next, NOTES)) {
1606 handleNotYetImplementedElement(next);
1607 } else {
1608 handleUnexpectedStartElement(next);
1609 }
1610 } else {
1611 handleUnexpectedElement(next);
1612 }
1613 }
1614 // TODO handle missing end element
1615 throw new IllegalStateException("Homotypes has no closing tag");
1616
1617 }
1618
1619 private void handleNameType(MarkupImportState state, XMLEventReader reader,
1620 XMLEvent parentEvent, HomotypicalGroup homotypicalGroup)
1621 throws XMLStreamException {
1622 Map<String, Attribute> attributes = getAttributes(parentEvent);
1623 String typeStatus = getAndRemoveAttributeValue(attributes, TYPE_STATUS);
1624 checkNoAttributes(attributes, parentEvent);
1625
1626 NameTypeDesignationStatus status;
1627 try {
1628 status = NameTypeParser.parseNameTypeStatus(typeStatus);
1629 } catch (UnknownCdmTypeException e) {
1630 String message = "Type status could not be recognized: %s";
1631 message = String.format(message, typeStatus);
1632 fireWarningEvent(message, parentEvent, 4);
1633 status = null;
1634 }
1635
1636 boolean hasNom = false;
1637 while (reader.hasNext()) {
1638 XMLEvent next = readNoWhitespace(reader);
1639 if (next.isEndElement()) {
1640 if (isMyEndingElement(next, parentEvent)) {
1641 checkMandatoryElement(hasNom, parentEvent.asStartElement(),
1642 NOM);
1643 state.setNameType(false);
1644 return;
1645 } else {
1646 if (isEndingElement(next, ACCEPTED_NAME)) {
1647 // NOT YET IMPLEMENTED
1648 popUnimplemented(next.asEndElement());
1649 } else {
1650 handleUnexpectedEndElement(next.asEndElement());
1651 }
1652 }
1653 } else if (next.isStartElement()) {
1654 if (isStartingElement(next, NOM)) {
1655 // TODO should we check if the type is always a species, is
1656 // this a rule?
1657 NonViralName<?> speciesName = handleNom(state, reader,
1658 next, null);
1659 for (TaxonNameBase<?, ?> name : homotypicalGroup
1660 .getTypifiedNames()) {
1661 name.addNameTypeDesignation(speciesName, null, null,
1662 null, status, false, false, false, false);
1663 }
1664 hasNom = true;
1665 } else if (isStartingElement(next, ACCEPTED_NAME)) {
1666 handleNotYetImplementedElement(next);
1667 } else {
1668 handleUnexpectedStartElement(next);
1669 }
1670 } else {
1671 handleUnexpectedElement(next);
1672 }
1673 }
1674 // TODO handle missing end element
1675 throw new IllegalStateException("Homotypes has no closing tag");
1676
1677 }
1678
1679 private void handleSpecimenType(MarkupImportState state,
1680 XMLEventReader reader, XMLEvent parentEvent,
1681 HomotypicalGroup homotypicalGroup) throws XMLStreamException {
1682 // attributes
1683 Map<String, Attribute> attributes = getAttributes(parentEvent);
1684 String typeStatus = getAndRemoveAttributeValue(attributes, TYPE_STATUS);
1685 String notSeen = getAndRemoveAttributeValue(attributes, NOT_SEEN);
1686 String unknown = getAndRemoveAttributeValue(attributes, UNKNOWN);
1687 String notFound = getAndRemoveAttributeValue(attributes, NOT_FOUND);
1688 String destroyed = getAndRemoveAttributeValue(attributes, DESTROYED);
1689 String lost = getAndRemoveAttributeValue(attributes, LOST);
1690 checkNoAttributes(attributes, parentEvent);
1691 if (StringUtils.isNotEmpty(typeStatus)) {
1692 // TODO
1693 // currently not needed
1694 } else if (StringUtils.isNotEmpty(notSeen)) {
1695 handleNotYetImplementedAttribute(attributes, NOT_SEEN);
1696 } else if (StringUtils.isNotEmpty(unknown)) {
1697 handleNotYetImplementedAttribute(attributes, UNKNOWN);
1698 } else if (StringUtils.isNotEmpty(notFound)) {
1699 handleNotYetImplementedAttribute(attributes, NOT_FOUND);
1700 } else if (StringUtils.isNotEmpty(destroyed)) {
1701 handleNotYetImplementedAttribute(attributes, DESTROYED);
1702 } else if (StringUtils.isNotEmpty(lost)) {
1703 handleNotYetImplementedAttribute(attributes, LOST);
1704 }
1705
1706 NonViralName<?> firstName = null;
1707 Set<TaxonNameBase> names = homotypicalGroup.getTypifiedNames();
1708 if (names.isEmpty()) {
1709 String message = "There is no name in a homotypical group. Can't create the specimen type";
1710 fireWarningEvent(message, parentEvent, 8);
1711 } else {
1712 firstName = CdmBase.deproxy(names.iterator().next(),
1713 NonViralName.class);
1714 }
1715
1716 DerivedUnitFacade facade = DerivedUnitFacade
1717 .NewInstance(DerivedUnitType.Specimen);
1718 String text = "";
1719 // elements
1720 while (reader.hasNext()) {
1721 XMLEvent next = readNoWhitespace(reader);
1722 if (next.isEndElement()) {
1723 if (isMyEndingElement(next, parentEvent)) {
1724 makeSpecimenType(state, facade, text, firstName,
1725 parentEvent);
1726 return;
1727 } else {
1728 if (isEndingElement(next, FULL_TYPE)) {
1729 // NOT YET IMPLEMENTED
1730 popUnimplemented(next.asEndElement());
1731 } else if (isEndingElement(next, TYPE_STATUS)) {
1732 // NOT YET IMPLEMENTED
1733 popUnimplemented(next.asEndElement());
1734 } else if (isEndingElement(next, ORIGINAL_DETERMINATION)) {
1735 // NOT YET IMPLEMENTED
1736 popUnimplemented(next.asEndElement());
1737 } else if (isEndingElement(next, SPECIMEN_TYPE)) {
1738 // NOT YET IMPLEMENTED
1739 popUnimplemented(next.asEndElement());
1740 } else if (isEndingElement(next, CITATION)) {
1741 // NOT YET IMPLEMENTED
1742 popUnimplemented(next.asEndElement());
1743 } else if (isEndingElement(next, NOTES)) {
1744 // NOT YET IMPLEMENTED
1745 popUnimplemented(next.asEndElement());
1746 } else if (isEndingElement(next, ANNOTATION)) {
1747 // NOT YET IMPLEMENTED
1748 popUnimplemented(next.asEndElement());
1749 } else {
1750 handleUnexpectedEndElement(next.asEndElement());
1751 }
1752 }
1753 } else if (next.isStartElement()) {
1754 if (isStartingElement(next, FULL_TYPE)) {
1755 handleNotYetImplementedElement(next);
1756 // homotypicalGroup = handleNom(state, reader, next, taxon,
1757 // homotypicalGroup);
1758 } else if (isStartingElement(next, TYPE_STATUS)) {
1759 handleNotYetImplementedElement(next);
1760 } else if (isStartingElement(next, GATHERING)) {
1761 handleGathering(state, reader, next, homotypicalGroup, facade);
1762 } else if (isStartingElement(next, ORIGINAL_DETERMINATION)) {
1763 handleNotYetImplementedElement(next);
1764 } else if (isStartingElement(next, SPECIMEN_TYPE)) {
1765 handleNotYetImplementedElement(next);
1766 } else if (isStartingElement(next, NOTES)) {
1767 handleNotYetImplementedElement(next);
1768 } else if (isStartingElement(next, ANNOTATION)) {
1769 handleNotYetImplementedElement(next);
1770 } else {
1771 handleUnexpectedStartElement(next);
1772 }
1773 } else if (next.isCharacters()) {
1774 text += next.asCharacters().getData();
1775 } else {
1776 handleUnexpectedElement(next);
1777 }
1778 }
1779 // TODO handle missing end element
1780 throw new IllegalStateException("Specimen type has no closing tag");
1781 }
1782
1783 private void makeSpecimenType(MarkupImportState state,
1784 DerivedUnitFacade facade, String text, NonViralName name,
1785 XMLEvent parentEvent) {
1786 text = text.trim();
1787 // remove brackets
1788 if (text.matches("^\\(.*\\)\\.?$")) {
1789 text = text.replaceAll("\\.", "");
1790 text = text.substring(1, text.length() - 1);
1791 }
1792 String[] split = text.split("[;,]");
1793 for (String str : split) {
1794 str = str.trim();
1795 boolean addToAllNamesInGroup = true;
1796 TypeInfo typeInfo = makeSpecimenTypeTypeInfo(str, parentEvent);
1797 SpecimenTypeDesignationStatus typeStatus = typeInfo.status;
1798 Collection collection = createCollection(typeInfo.collectionString);
1799
1800 // TODO improve cache strategy handling
1801 DerivedUnitBase typeSpecimen = facade.addDuplicate(collection,
1802 null, null, null, null);
1803 typeSpecimen.setCacheStrategy(new DerivedUnitFacadeCacheStrategy());
1804 name.addSpecimenTypeDesignation((Specimen) typeSpecimen, typeStatus, null, null, null, false, addToAllNamesInGroup);
1805 }
1806 }
1807
1808 private Collection createCollection(String code) {
1809 // TODO deduplicate
1810 // TODO code <-> name
1811 Collection result = Collection.NewInstance();
1812 result.setCode(code);
1813 return result;
1814 }
1815
1816 private TypeInfo makeSpecimenTypeTypeInfo(String originalString,
1817 XMLEvent event) {
1818 TypeInfo result = new TypeInfo();
1819 String[] split = originalString.split("\\s+");
1820 for (String str : split) {
1821 if (str.matches(SpecimenTypeParser.typeTypePattern)) {
1822 SpecimenTypeDesignationStatus status;
1823 try {
1824 status = SpecimenTypeParser.parseSpecimenTypeStatus(str);
1825 } catch (UnknownCdmTypeException e) {
1826 String message = "Specimen type status '%s' not recognized by parser";
1827 message = String.format(message, str);
1828 fireWarningEvent(message, event, 4);
1829 status = null;
1830 }
1831 result.status = status;
1832 } else if (str.matches(SpecimenTypeParser.collectionPattern)) {
1833 result.collectionString = str;
1834 } else {
1835 String message = "Type part '%s' could not be recognized";
1836 message = String.format(message, str);
1837 fireWarningEvent(message, event, 2);
1838 }
1839 }
1840
1841 return result;
1842 }
1843
1844
1845 private void handleGathering(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, HomotypicalGroup homotypicalGroup, DerivedUnitFacade facade) throws XMLStreamException {
1846 checkNoAttributes(parentEvent);
1847 boolean hasCollector = false;
1848 boolean hasFieldNum = false;
1849
1850 // elements
1851 while (reader.hasNext()) {
1852 XMLEvent next = readNoWhitespace(reader);
1853 if (next.isEndElement()) {
1854 if (isMyEndingElement(next, parentEvent)) {
1855 checkMandatoryElement(hasCollector,parentEvent.asStartElement(), COLLECTOR);
1856 checkMandatoryElement(hasFieldNum,parentEvent.asStartElement(), FIELD_NUM);
1857 return;
1858 } else {
1859 if (isEndingElement(next, ALTERNATIVE_COLLECTOR)) {
1860 // NOT YET IMPLEMENTED
1861 popUnimplemented(next.asEndElement());
1862 } else if (isEndingElement(next, ALTERNATIVE_FIELD_NUM)) {
1863 // NOT YET IMPLEMENTED
1864 popUnimplemented(next.asEndElement());
1865 } else if (isEndingElement(next, COLLECTION_TYPE_STATUS)) {
1866 // NOT YET IMPLEMENTED
1867 popUnimplemented(next.asEndElement());
1868 } else if (isEndingElement(next,
1869 ALTERNATIVE_COLLECTION_TYPE_STATUS)) {
1870 // NOT YET IMPLEMENTED
1871 popUnimplemented(next.asEndElement());
1872 } else if (isEndingElement(next, SUB_COLLECTION)) {
1873 // NOT YET IMPLEMENTED
1874 popUnimplemented(next.asEndElement());
1875 } else if (isEndingElement(next, DATES)) {
1876 // NOT YET IMPLEMENTED
1877 popUnimplemented(next.asEndElement());
1878 } else if (isEndingElement(next, NOTES)) {
1879 // NOT YET IMPLEMENTED
1880 popUnimplemented(next.asEndElement());
1881 } else {
1882 handleUnexpectedEndElement(next.asEndElement());
1883 }
1884 }
1885 } else if (next.isStartElement()) {
1886 if (isStartingElement(next, COLLECTOR)) {
1887 hasCollector = true;
1888 String collectorStr = getCData(state, reader, next);
1889 AgentBase<?> collector = createCollector(collectorStr);
1890 facade.setCollector(collector);
1891 } else if (isStartingElement(next, ALTERNATIVE_COLLECTOR)) {
1892 handleNotYetImplementedElement(next);
1893 } else if (isStartingElement(next, FIELD_NUM)) {
1894 hasFieldNum = true;
1895 String fieldNumStr = getCData(state, reader, next);
1896 facade.setFieldNumber(fieldNumStr);
1897 } else if (isStartingElement(next, ALTERNATIVE_FIELD_NUM)) {
1898 handleNotYetImplementedElement(next);
1899 } else if (isStartingElement(next, COLLECTION_TYPE_STATUS)) {
1900 handleNotYetImplementedElement(next);
1901 } else if (isStartingElement(next,
1902 ALTERNATIVE_COLLECTION_TYPE_STATUS)) {
1903 handleNotYetImplementedElement(next);
1904 } else if (isStartingElement(next, SUB_COLLECTION)) {
1905 handleNotYetImplementedElement(next);
1906 } else if (isStartingElement(next, LOCALITY)) {
1907 handleLocality(state, reader, next, facade);
1908 } else if (isStartingElement(next, DATES)) {
1909 handleNotYetImplementedElement(next);
1910 } else if (isStartingElement(next, NOTES)) {
1911 handleNotYetImplementedElement(next);
1912 } else {
1913 handleUnexpectedStartElement(next);
1914 }
1915 } else {
1916 handleUnexpectedElement(next);
1917 }
1918 }
1919 // TODO handle missing end element
1920 throw new IllegalStateException("Collection has no closing tag");
1921
1922 }
1923
1924 private void handleLocality(MarkupImportState state, XMLEventReader reader,XMLEvent parentEvent, DerivedUnitFacade facade)throws XMLStreamException {
1925 String classValue = getClassOnlyAttribute(parentEvent);
1926 boolean isLocality = false;
1927 NamedAreaLevel areaLevel = null;
1928 if ("locality".equalsIgnoreCase(classValue)) {
1929 isLocality = true;
1930 } else {
1931 areaLevel = makeNamedAreaLevel(state, classValue, parentEvent);
1932 }
1933
1934 String text = "";
1935 // elements
1936 while (reader.hasNext()) {
1937 XMLEvent next = readNoWhitespace(reader);
1938 if (next.isEndElement()) {
1939 if (isMyEndingElement(next, parentEvent)) {
1940 if (StringUtils.isNotBlank(text)) {
1941 text = normalize(text);
1942 if (isLocality) {
1943 facade.setLocality(text);
1944 } else {
1945 text = CdmUtils.removeTrailingDot(text);
1946 NamedArea area = makeArea(state, text, areaLevel);
1947 facade.addCollectingArea(area);
1948 }
1949 }
1950 // TODO
1951 return;
1952 } else {
1953 if (isEndingElement(next, ALTITUDE)) {
1954 // NOT YET IMPLEMENTED
1955 popUnimplemented(next.asEndElement());
1956 } else if (isEndingElement(next, COORDINATES)) {
1957 // NOT YET IMPLEMENTED
1958 popUnimplemented(next.asEndElement());
1959 } else if (isEndingElement(next, ANNOTATION)) {
1960 // NOT YET IMPLEMENTED
1961 popUnimplemented(next.asEndElement());
1962 } else {
1963 handleUnexpectedEndElement(next.asEndElement());
1964 }
1965 }
1966 } else if (next.isStartElement()) {
1967 if (isStartingElement(next, ALTITUDE)) {
1968 handleNotYetImplementedElement(next);
1969 // homotypicalGroup = handleNom(state, reader, next, taxon,
1970 // homotypicalGroup);
1971 } else if (isStartingElement(next, COORDINATES)) {
1972 handleNotYetImplementedElement(next);
1973 } else if (isStartingElement(next, ANNOTATION)) {
1974 handleNotYetImplementedElement(next);
1975 } else {
1976 handleUnexpectedStartElement(next);
1977 }
1978 } else if (next.isCharacters()) {
1979 text += next.asCharacters().getData();
1980 } else {
1981 handleUnexpectedElement(next);
1982 }
1983 }
1984 throw new IllegalStateException("<SpecimenType> has no closing tag");
1985 }
1986
1987 // private NamedArea createArea(String text, NamedAreaLevel areaLevel, MarkupImportState state) {
1988 // NamedArea area = NamedArea.NewInstance(text, text, null);
1989 // area.setLevel(areaLevel);
1990 // save(area, state);
1991 // return area;
1992 // }
1993
1994 private AgentBase<?> createCollector(String collectorStr) {
1995 return createAuthor(collectorStr);
1996 }
1997
1998 private String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent next) throws XMLStreamException {
1999 return getCData(state, reader, next, true);
2000 }
2001
2002 /**
2003 * Reads character data. Any element other than character data or the ending
2004 * tag will fire an unexpected element event.
2005 *
2006 * @param state
2007 * @param reader
2008 * @param next
2009 * @return
2010 * @throws XMLStreamException
2011 */
2012 private String getCData(MarkupImportState state, XMLEventReader reader, XMLEvent next,boolean checkAttributes) throws XMLStreamException {
2013 if (checkAttributes){
2014 checkNoAttributes(next);
2015 }
2016
2017 String text = "";
2018 while (reader.hasNext()) {
2019 XMLEvent myNext = readNoWhitespace(reader);
2020 if (isMyEndingElement(myNext, next)) {
2021 return text;
2022 } else if (myNext.isCharacters()) {
2023 text += myNext.asCharacters().getData();
2024 } else {
2025 handleUnexpectedElement(myNext);
2026 }
2027 }
2028 throw new IllegalStateException("Event has no closing tag");
2029
2030 }
2031
2032 /**
2033 * Creates the name defined by a nom tag. Adds it to the given homotypical
2034 * group (if not null).
2035 *
2036 * @param state
2037 * @param reader
2038 * @param parentEvent
2039 * @param homotypicalGroup
2040 * @return
2041 * @throws XMLStreamException
2042 */
2043 private NonViralName<?> handleNom(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
2044 HomotypicalGroup homotypicalGroup) throws XMLStreamException {
2045 boolean isSynonym = false;
2046 boolean isNameType = state.isNameType();
2047 // attributes
2048 String classValue = getClassOnlyAttribute(parentEvent);
2049 NonViralName<?> name;
2050 if (!isNameType && ACCEPTED.equalsIgnoreCase(classValue)) {
2051 isSynonym = false;
2052 name = createName(state, homotypicalGroup, isSynonym);
2053 } else if (!isNameType && SYNONYM.equalsIgnoreCase(classValue)) {
2054 isSynonym = true;
2055 name = createName(state, homotypicalGroup, isSynonym);
2056 } else if (isNameType && NAME_TYPE.equalsIgnoreCase(classValue)) {
2057 // TODO do we need to define the rank here?
2058 name = createNameByCode(state, null);
2059 } else {
2060 fireUnexpectedAttributeValue(parentEvent, CLASS, classValue);
2061 name = createNameByCode(state, null);
2062 }
2063
2064 Map<String, String> nameMap = new HashMap<String, String>();
2065
2066 while (reader.hasNext()) {
2067 XMLEvent next = readNoWhitespace(reader);
2068 if (next.isEndElement()) {
2069 if (isMyEndingElement(next, parentEvent)) {
2070 // fill the name with all data gathered
2071 fillName(state, nameMap, name, next);
2072 return name;
2073 } else {
2074 if (isEndingElement(next, FULL_NAME)) {
2075 // NOT YET IMPLEMENTED
2076 popUnimplemented(next.asEndElement());
2077 } else if (isEndingElement(next, NUM)) {
2078 // NOT YET IMPLEMENTED
2079 popUnimplemented(next.asEndElement());
2080 } else if (isEndingElement(next, HOMONYM)) {
2081 // NOT YET IMPLEMENTED
2082 popUnimplemented(next.asEndElement());
2083 } else if (isEndingElement(next, NOTES)) {
2084 // NOT YET IMPLEMENTED
2085 popUnimplemented(next.asEndElement());
2086 } else if (isEndingElement(next, ANNOTATION)) {
2087 // NOT YET IMPLEMENTED
2088 popUnimplemented(next.asEndElement());
2089 } else {
2090 handleUnexpectedEndElement(next.asEndElement());
2091 }
2092 }
2093 } else if (next.isStartElement()) {
2094 if (isStartingElement(next, FULL_NAME)) {
2095 handleNotYetImplementedElement(next);
2096 // homotypicalGroup = handleNom(state, reader, next, taxon,
2097 // homotypicalGroup);
2098 } else if (isStartingElement(next, NUM)) {
2099 handleNotYetImplementedElement(next);
2100 } else if (isStartingElement(next, NAME)) {
2101 handleName(state, reader, next, nameMap);
2102 } else if (isStartingElement(next, CITATION)) {
2103 handleCitation(state, reader, next, name);
2104 } else if (isStartingElement(next, HOMONYM)) {
2105 handleNotYetImplementedElement(next);
2106 } else if (isStartingElement(next, NOTES)) {
2107 handleNotYetImplementedElement(next);
2108 } else if (isStartingElement(next, ANNOTATION)) {
2109 handleNotYetImplementedElement(next);
2110 } else {
2111 handleUnexpectedStartElement(next);
2112 }
2113 } else {
2114 handleUnexpectedElement(next);
2115 }
2116 }
2117 // TODO handle missing end element
2118 throw new IllegalStateException("Nom has no closing tag");
2119
2120 }
2121
2122 private void fillName(MarkupImportState state, Map<String, String> nameMap,
2123 NonViralName name, XMLEvent event) {
2124
2125 // Ranks: family, subfamily, tribus, genus, subgenus, section,
2126 // subsection, species, subspecies, variety, subvariety, forma
2127 // infrank, paraut, author, infrparaut, infraut, status, notes
2128
2129 String infrank = getAndRemoveMapKey(nameMap, INFRANK);
2130 String authorStr = getAndRemoveMapKey(nameMap, AUTHOR);
2131 String paraut = getAndRemoveMapKey(nameMap, PARAUT);
2132
2133 String infrParAut = getAndRemoveMapKey(nameMap, INFRPARAUT);
2134 String infrAut = getAndRemoveMapKey(nameMap, INFRAUT);
2135
2136 String statusStr = getAndRemoveMapKey(nameMap, STATUS);
2137 String notes = getAndRemoveMapKey(nameMap, NOTES);
2138
2139 makeRankDecision(state, nameMap, name, event, infrank);
2140
2141 // test consistency of rank and authors
2142 testRankAuthorConsistency(name, event, authorStr, paraut, infrParAut,infrAut);
2143
2144 // authors
2145 makeNomenclaturalAuthors(name, event, authorStr, paraut, infrParAut,infrAut);
2146
2147 // status
2148 // TODO handle pro parte, pro syn. etc.
2149 if (StringUtils.isNotBlank(statusStr)) {
2150 String proPartePattern = "(pro parte|p.p.)";
2151 if (statusStr.matches(proPartePattern)) {
2152 state.setProParte(true);
2153 }
2154 try {
2155 // TODO handle trim earlier
2156 statusStr = statusStr.trim();
2157 NomenclaturalStatusType nomStatusType = NomenclaturalStatusType
2158 .getNomenclaturalStatusTypeByAbbreviation(statusStr);
2159 name.addStatus(NomenclaturalStatus.NewInstance(nomStatusType));
2160 } catch (UnknownCdmTypeException e) {
2161 String message = "Status '%s' could not be recognized";
2162 message = String.format(message, statusStr);
2163 fireWarningEvent(message, event, 4);
2164 }
2165 }
2166
2167 // notes
2168 if (StringUtils.isNotBlank(notes)) {
2169 handleNotYetImplementedAttributeValue(event, CLASS, NOTES);
2170 }
2171
2172 return;
2173 }
2174
2175 /**
2176 * @param state
2177 * @param nameMap
2178 * @param name
2179 * @param event
2180 * @param infrankStr
2181 */
2182 private void makeRankDecision(MarkupImportState state,
2183 Map<String, String> nameMap, NonViralName<?> name, XMLEvent event,
2184 String infrankStr) {
2185 // TODO ranks
2186 for (String key : nameMap.keySet()) {
2187 Rank rank = makeRank(state, key, false);
2188 if (rank == null) {
2189 handleNotYetImplementedAttributeValue(event, CLASS, key);
2190 } else {
2191 if (name.getRank() == null || rank.isLower(name.getRank())) {
2192 name.setRank(rank);
2193 }
2194 String value = nameMap.get(key);
2195 if (rank.isSupraGeneric() || rank.isGenus()) {
2196 name.setGenusOrUninomial(value);
2197 } else if (rank.isInfraGeneric()) {
2198 name.setInfraGenericEpithet(value);
2199 } else if (rank.isSpecies()) {
2200 name.setSpecificEpithet(value);
2201 } else if (rank.isInfraSpecific()) {
2202 name.setInfraSpecificEpithet(value);
2203 } else {
2204 String message = "Invalid rank '%s'. Can't decide which epithet to fill with '%s'";
2205 message = String.format(message, rank.getTitleCache(),value);
2206 fireWarningEvent(message, event, 4);
2207 }
2208 }
2209
2210 }
2211 // handle given infrank marker
2212 if (StringUtils.isNotBlank(infrankStr)) {
2213 Rank infRank = makeRank(state, infrankStr, true);
2214
2215 if (infRank == null) {
2216 String message = "Infrank '%s' rank not recognized";
2217 message = String.format(message, infrankStr);
2218 fireWarningEvent(message, event, 4);
2219 } else {
2220 if (name.getRank() == null) {
2221 name.setRank(infRank);
2222 } else if (infRank.isLower(name.getRank())) {
2223 String message = "InfRank '%s' is lower than existing rank ";
2224 message = String.format(message, infrankStr);
2225 fireWarningEvent(message, event, 2);
2226 name.setRank(infRank);
2227 } else if (infRank.equals(name.getRank())) {
2228 // nothing
2229 } else {
2230 String message = "InfRank '%s' is higher than existing rank ";
2231 message = String.format(message, infrankStr);
2232 fireWarningEvent(message, event, 2);
2233 }
2234 }
2235 }
2236 }
2237
2238 /**
2239 * @param name
2240 * @param event
2241 * @param authorStr
2242 * @param paraut
2243 * @param infrParAut
2244 * @param infrAut
2245 */
2246 private void makeNomenclaturalAuthors(NonViralName name, XMLEvent event,
2247 String authorStr, String paraut, String infrParAut, String infrAut) {
2248 if (name.getRank() != null && name.getRank().isInfraSpecific()) {
2249 if (StringUtils.isNotBlank(infrAut)) {
2250 INomenclaturalAuthor[] authorAndEx = authorAndEx(infrAut, event);
2251 name.setCombinationAuthorTeam(authorAndEx[0]);
2252 name.setExCombinationAuthorTeam(authorAndEx[1]);
2253 }
2254 if (StringUtils.isNotBlank(infrParAut)) {
2255 INomenclaturalAuthor[] authorAndEx = authorAndEx(infrParAut, event);
2256 name.setBasionymAuthorTeam(authorAndEx[0]);
2257 name.setExBasionymAuthorTeam(authorAndEx[1]);
2258 }
2259 } else {
2260 if (name.getRank() == null){
2261 String message = "No rank defined. Check correct usage of authors!";
2262 fireWarningEvent(message, event, 4);
2263 if (isNotBlank(infrParAut) || isNotBlank(infrAut)){
2264 authorStr = infrAut;
2265 paraut = infrParAut;
2266 }
2267 }
2268 if (StringUtils.isNotBlank(authorStr)) {
2269 INomenclaturalAuthor[] authorAndEx = authorAndEx(authorStr, event);
2270 name.setCombinationAuthorTeam(authorAndEx[0]);
2271 name.setExCombinationAuthorTeam(authorAndEx[1]);
2272 }
2273 if (StringUtils.isNotBlank(paraut)) {
2274 INomenclaturalAuthor[] authorAndEx = authorAndEx(paraut, event);
2275 name.setBasionymAuthorTeam(authorAndEx[0]);
2276 name.setExBasionymAuthorTeam(authorAndEx[1]);
2277 }
2278 }
2279 }
2280
2281 private TeamOrPersonBase[] authorAndEx(String authorAndEx, XMLEvent xmlEvent) {
2282 authorAndEx = authorAndEx.trim();
2283 TeamOrPersonBase[] result = new TeamOrPersonBase[2];
2284
2285 String[] split = authorAndEx.split("\\sex\\s");
2286 if (split.length > 2) {
2287 String message = "There is more then 1 ' ex ' in author string. Can't separate author and ex-author";
2288 fireWarningEvent(message, xmlEvent, 4);
2289 result[0] = createAuthor(authorAndEx);
2290 } else if (split.length == 2) {
2291 result[0] = createAuthor(split[1]);
2292 result[1] = createAuthor(split[0]);
2293 } else {
2294 result[0] = createAuthor(split[0]);
2295 }
2296 return result;
2297 }
2298
2299 /**
2300 * Tests if the names rank is consistent with the given author strings.
2301 * @param name
2302 * @param event
2303 * @param authorStr
2304 * @param paraut
2305 * @param infrParAut
2306 * @param infrAut
2307 */
2308 private void testRankAuthorConsistency(NonViralName name, XMLEvent event,
2309 String authorStr, String paraut, String infrParAut, String infrAut) {
2310 if (name.getRank() == null){
2311 return;
2312 }
2313 if (name.getRank().isInfraSpecific()) {
2314 if (StringUtils.isBlank(infrParAut)
2315 && StringUtils.isNotBlank(infrAut)
2316 && (StringUtils.isNotBlank(paraut) || StringUtils.isNotBlank(authorStr))) {
2317 String message = "Rank is infraspecicific but has only specific or higher author(s)";
2318 fireWarningEvent(message, event, 4);
2319 }
2320 } else {
2321 // is not infraspecific
2322 if (StringUtils.isNotBlank(infrParAut) || StringUtils.isNotBlank(infrAut)) {
2323 String message = "Rank is not infraspecicific but name has infra author(s)";
2324 fireWarningEvent(message, event, 4);
2325 }
2326 }
2327 }
2328
2329 /**
2330 * Returns the (empty) name with the correct homotypical group depending on
2331 * the taxon status. Throws NPE if no currentTaxon is set in state.
2332 *
2333 * @param state
2334 * @param homotypicalGroup
2335 * @param isSynonym
2336 * @return
2337 */
2338 private NonViralName<?> createName(MarkupImportState state,
2339 HomotypicalGroup homotypicalGroup, boolean isSynonym) {
2340 NonViralName<?> name;
2341 Taxon taxon = state.getCurrentTaxon();
2342 if (isSynonym) {
2343 Rank defaultRank = Rank.SPECIES(); // can be any
2344 name = createNameByCode(state, defaultRank);
2345 if (homotypicalGroup != null) {
2346 name.setHomotypicalGroup(homotypicalGroup);
2347 }
2348 SynonymRelationshipType synonymType = SynonymRelationshipType
2349 .HETEROTYPIC_SYNONYM_OF();
2350 if (taxon.getHomotypicGroup().equals(homotypicalGroup)) {
2351 synonymType = SynonymRelationshipType.HOMOTYPIC_SYNONYM_OF();
2352 }
2353 taxon.addSynonymName(name, synonymType);
2354 } else {
2355 name = CdmBase.deproxy(taxon.getName(), NonViralName.class);
2356 }
2357 return name;
2358 }
2359
2360 private void handleName(MarkupImportState state, XMLEventReader reader,
2361 XMLEvent parentEvent, Map<String, String> nameMap)
2362 throws XMLStreamException {
2363 String classValue = getClassOnlyAttribute(parentEvent);
2364
2365 String text = "";
2366 while (reader.hasNext()) {
2367 XMLEvent next = readNoWhitespace(reader);
2368 if (isMyEndingElement(next, parentEvent)) {
2369 nameMap.put(classValue, text);
2370 return;
2371 } else if (next.isStartElement()) {
2372 if (isStartingElement(next, ANNOTATION)) {
2373 handleNotYetImplementedElement(next);
2374 } else {
2375 handleUnexpectedStartElement(next.asStartElement());
2376 }
2377 } else if (next.isCharacters()) {
2378 text += next.asCharacters().getData();
2379 } else {
2380 handleUnexpectedEndElement(next.asEndElement());
2381 }
2382 }
2383 throw new IllegalStateException("name has no closing tag");
2384
2385 }
2386
2387 /**
2388 * @param state
2389 * @param classValue
2390 * @param byAbbrev
2391 * @return
2392 */
2393 private Rank makeRank(MarkupImportState state, String value,
2394 boolean byAbbrev) {
2395 Rank rank = null;
2396 if (StringUtils.isBlank(value)) {
2397 return null;
2398 }
2399 try {
2400 boolean useUnknown = true;
2401 NomenclaturalCode nc = makeNomenclaturalCode(state);
2402 if (byAbbrev) {
2403 rank = Rank.getRankByAbbreviation(value, nc, useUnknown);
2404 } else {
2405 rank = Rank.getRankByEnglishName(value, nc, useUnknown);
2406 }
2407 if (rank.equals(Rank.UNKNOWN_RANK())) {
2408 rank = null;
2409 }
2410 } catch (UnknownCdmTypeException e) {
2411 // doNothing
2412 }
2413 return rank;
2414 }
2415
2416 // public void handleNameNotRank(MarkupImportState state, XMLEventReader
2417 // reader, XMLEvent parentEvent, String classValue, NonViralName name)
2418 // throws XMLStreamException {
2419 // if (ACCEPTED.equalsIgnoreCase(classValue)){
2420 // }else if (SYNONYM.equalsIgnoreCase(classValue)){
2421 // }else{
2422 // //TODO Not yet implemented
2423 // handleNotYetImplementedAttributeValue(parentEvent, CLASS, classValue);
2424 // }
2425 // }
2426
2427 private void handleCitation(MarkupImportState state, XMLEventReader reader,
2428 XMLEvent parentEvent, NonViralName name) throws XMLStreamException {
2429 String classValue = getClassOnlyAttribute(parentEvent);
2430
2431 state.setCitation(true);
2432 boolean hasRefPart = false;
2433 Map<String, String> refMap = new HashMap<String, String>();
2434 while (reader.hasNext()) {
2435 XMLEvent next = readNoWhitespace(reader);
2436 if (isMyEndingElement(next, parentEvent)) {
2437 checkMandatoryElement(hasRefPart, parentEvent.asStartElement(),
2438 REF_PART);
2439 Reference reference = createReference(state, refMap, next);
2440 String microReference = refMap.get(DETAILS);
2441 doCitation(state, name, classValue, reference, microReference,
2442 parentEvent);
2443 state.setCitation(false);
2444 return;
2445 } else if (isStartingElement(next, REF_PART)) {
2446 handleRefPart(state, reader, next, refMap);
2447 hasRefPart = true;
2448 } else {
2449 handleUnexpectedElement(next);
2450 }
2451 }
2452 throw new IllegalStateException("Citation has no closing tag");
2453
2454 }
2455
2456 private void handleRefPart(MarkupImportState state, XMLEventReader reader,XMLEvent parentEvent, Map<String, String> refMap) throws XMLStreamException {
2457 String classValue = getClassOnlyAttribute(parentEvent);
2458
2459 String text = "";
2460 while (reader.hasNext()) {
2461 XMLEvent next = readNoWhitespace(reader);
2462 if (isMyEndingElement(next, parentEvent)) {
2463 refMap.put(classValue, text);
2464 return;
2465 } else if (next.isStartElement()) {
2466 if (isStartingElement(next, ANNOTATION)) {
2467 handleNotYetImplementedElement(next);
2468 } else if (isStartingElement(next, ITALICS)) {
2469 handleNotYetImplementedElement(next);
2470 } else if (isStartingElement(next, BOLD)) {
2471 handleNotYetImplementedElement(next);
2472 } else {
2473 handleUnexpectedStartElement(next.asStartElement());
2474 }
2475 } else if (next.isCharacters()) {
2476 text += next.asCharacters().getData();
2477 } else {
2478 handleUnexpectedEndElement(next.asEndElement());
2479 }
2480 }
2481 throw new IllegalStateException("RefPart has no closing tag");
2482
2483 }
2484
2485 private Reference createReference(MarkupImportState state, Map<String, String> refMap, XMLEvent parentEvent) {
2486 // TODO
2487 Reference reference;
2488
2489 String type = getAndRemoveMapKey(refMap, PUBTYPE);
2490 String authorStr = getAndRemoveMapKey(refMap, AUTHOR);
2491 String titleStr = getAndRemoveMapKey(refMap, PUBTITLE);
2492 String titleCache = getAndRemoveMapKey(refMap, PUBFULLNAME);
2493 String volume = getAndRemoveMapKey(refMap, VOLUME);
2494 String edition = getAndRemoveMapKey(refMap, EDITION);
2495 String editors = getAndRemoveMapKey(refMap, EDITORS);
2496 String year = getAndRemoveMapKey(refMap, YEAR);
2497 String pubName = getAndRemoveMapKey(refMap, PUBNAME);
2498
2499 if (state.isCitation()) {
2500 if (volume != null || "journal".equalsIgnoreCase(type)) {
2501 IArticle article = ReferenceFactory.newArticle();
2502 if (pubName != null) {
2503 IJournal journal = ReferenceFactory.newJournal();
2504 journal.setTitle(pubName);
2505 article.setInJournal(journal);
2506 }
2507 reference = (Reference) article;
2508
2509 } else {
2510 // TODO
2511 Reference bookOrPartOf = ReferenceFactory.newGeneric();
2512 reference = bookOrPartOf;
2513 }
2514 // TODO use existing author from name or before
2515 TeamOrPersonBase author = createAuthor(authorStr);
2516 reference.setAuthorTeam(author);
2517
2518 } else {
2519 if (volume != null || "journal".equalsIgnoreCase(type)) {
2520 IArticle article = ReferenceFactory.newArticle();
2521 if (pubName != null) {
2522 IJournal journal = ReferenceFactory.newJournal();
2523 journal.setTitle(pubName);
2524 article.setInJournal(journal);
2525 }
2526 reference = (Reference) article;
2527
2528 } else {
2529 Reference bookOrPartOf = ReferenceFactory.newGeneric();
2530 reference = bookOrPartOf;
2531 }
2532
2533 // TODO type
2534 TeamOrPersonBase author = createAuthor(authorStr);
2535 reference.setAuthorTeam(author);
2536
2537 reference.setTitle(titleStr);
2538 if (StringUtils.isNotBlank(titleCache)) {
2539 reference.setTitleCache(titleCache, true);
2540 }
2541 reference.setEdition(edition);
2542 reference.setEditor(editors);
2543
2544 if (pubName != null) {
2545 Reference inReference;
2546 if (reference.getType().equals(ReferenceType.Article)) {
2547 inReference = ReferenceFactory.newJournal();
2548 } else {
2549 inReference = ReferenceFactory.newGeneric();
2550 }
2551 inReference.setTitle(pubName);
2552 reference.setInReference(inReference);
2553 }
2554 }
2555 reference.setVolume(volume);
2556 reference.setDatePublished(TimePeriod.parseString(year));
2557
2558 // TODO
2559 String[] unhandledList = new String[]{ALTERNATEPUBTITLE, ISSUE, NOTES, STATUS};
2560 for (String unhandled : unhandledList){
2561 String value = getAndRemoveMapKey(refMap, unhandled);
2562 if (isNotBlank(value)){
2563 this.handleNotYetImplementedAttributeValue(parentEvent, CLASS, unhandled);
2564 }
2565 }
2566
2567 for (String key : refMap.keySet()) {
2568 if (!DETAILS.equalsIgnoreCase(key)) {
2569 this.fireUnexpectedAttributeValue(parentEvent, CLASS, key);
2570 }
2571 }
2572
2573 return reference;
2574 }
2575
2576 private TeamOrPersonBase createAuthor(String authorTitle) {
2577 // TODO atomize and also use by name creation
2578 TeamOrPersonBase result = Team.NewTitledInstance(authorTitle,
2579 authorTitle);
2580 return result;
2581 }
2582
2583 private String getAndRemoveMapKey(Map<String, String> map, String key) {
2584 String result = map.get(key);
2585 map.remove(key);
2586 if (result != null) {
2587 result = normalize(result);
2588 }
2589 return StringUtils.stripToNull(result);
2590 }
2591
2592 private void doCitation(MarkupImportState state, NonViralName name,
2593 String classValue, Reference reference, String microCitation,
2594 XMLEvent parentEvent) {
2595 if (PUBLICATION.equalsIgnoreCase(classValue)) {
2596 name.setNomenclaturalReference(reference);
2597 name.setNomenclaturalMicroReference(microCitation);
2598 } else if (USAGE.equalsIgnoreCase(classValue)) {
2599 Taxon taxon = state.getCurrentTaxon();
2600 TaxonDescription td = this.getTaxonDescription(taxon, state
2601 .getConfig().getSourceReference(), false, true);
2602 TextData citation = TextData.NewInstance(Feature.CITATION());
2603 // TODO name used in source
2604 citation.addSource(null, null, reference, microCitation);
2605 td.addElement(citation);
2606 } else if (TYPE.equalsIgnoreCase(classValue)) {
2607 handleNotYetImplementedAttributeValue(parentEvent, CLASS,
2608 classValue);
2609 } else {
2610 // TODO Not yet implemented
2611 handleNotYetImplementedAttributeValue(parentEvent, CLASS,
2612 classValue);
2613 }
2614 }
2615
2616 private void handleFeature(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
2617 String classValue = getClassOnlyAttribute(parentEvent);
2618 Feature feature = makeFeature(classValue, state, parentEvent);
2619 Taxon taxon = state.getCurrentTaxon();
2620 TaxonDescription taxonDescription = getTaxonDescription(taxon, state.getConfig().getSourceReference(), NO_IMAGE_GALLERY, CREATE_NEW);
2621 // TextData figureHolderTextData = null; //for use with one TextData for
2622 // all figure only
2623
2624 boolean isDescription = feature.equals(Feature.DESCRIPTION());
2625 DescriptionElementBase lastDescriptionElement = null;
2626
2627 while (reader.hasNext()) {
2628 XMLEvent next = readNoWhitespace(reader);
2629 if (isMyEndingElement(next, parentEvent)) {
2630 return;
2631 } else if (isEndingElement(next, DISTRIBUTION_LIST) || isEndingElement(next, HABITAT_LIST)) {
2632 // only handle list elements
2633 } else if (isStartingElement(next, HEADING)) {
2634 makeFeatureHeading(state, reader, classValue, feature, next);
2635 } else if (isStartingElement(next, WRITER)) {
2636 makeFeatureWriter(state, reader, feature, taxon, next);
2637 // } else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
2638 // if (!feature.equals(Feature.DISTRIBUTION())) {
2639 // String message = "Distribution locality only allowed for feature of type 'distribution'";
2640 // fireWarningEvent(message, next, 4);
2641 // }
2642 // handleDistributionLocality(state, reader, next);
2643 } else if (isStartingElement(next, DISTRIBUTION_LIST) || isStartingElement(next, HABITAT_LIST)) {
2644 // only handle single list elements
2645 } else if (isStartingElement(next, HABITAT)) {
2646 if (!(feature.equals(Feature.HABITAT())
2647 || feature.equals(Feature.HABITAT_ECOLOGY())
2648 || feature.equals(Feature.ECOLOGY()))) {
2649 String message = "Habitat only allowed for feature of type 'habitat','habitat ecology' or 'ecology'";
2650 fireWarningEvent(message, next, 4);
2651 }
2652 handleHabitat(state, reader, next);
2653 } else if (isStartingElement(next, CHAR)) {
2654 TextData textData = handleChar(state, reader, next);
2655 taxonDescription.addElement(textData);
2656 } else if (isStartingElement(next, STRING)) {
2657 lastDescriptionElement = makeFeatureString(state, reader,feature, taxonDescription, lastDescriptionElement,next);
2658 } else if (isStartingElement(next, FIGURE_REF)) {
2659 lastDescriptionElement = makeFeatureFigureRef(state, reader, taxonDescription, isDescription, lastDescriptionElement, next);
2660 } else if (isStartingElement(next, REFERENCES)) {
2661 // TODO details/microcitation ??
2662
2663 List<Reference<?>> refs = handleReferences(state, reader, next);
2664 if (!refs.isEmpty()) {
2665 // TODO
2666 Reference<?> descriptionRef = state.getConfig().getSourceReference();
2667 TaxonDescription description = getTaxonDescription(taxon, descriptionRef, false, true);
2668 TextData featurePlaceholder = getFeaturePlaceholder(state, description, feature, true);
2669 for (Reference<?> citation : refs) {
2670 featurePlaceholder.addSource(null, null, citation, null);
2671 }
2672 } else {
2673 String message = "No reference found in references";
2674 fireWarningEvent(message, next, 6);
2675 }
2676 } else if (isStartingElement(next, NUM)) {
2677 //TODO
2678 handleNotYetImplementedElement(next);
2679 } else if (isEndingElement(next, NUM)) {
2680 //TODO
2681 popUnimplemented(next.asEndElement());
2682 } else {
2683 handleUnexpectedElement(next);
2684 }
2685 }
2686 throw new IllegalStateException("<Feature> has no closing tag");
2687 }
2688
2689 /**
2690 * @param state
2691 * @param reader
2692 * @param taxonDescription
2693 * @param isDescription
2694 * @param lastDescriptionElement
2695 * @param next
2696 * @return
2697 * @throws XMLStreamException
2698 */
2699 private DescriptionElementBase makeFeatureFigureRef(MarkupImportState state, XMLEventReader reader,TaxonDescription taxonDescription,
2700 boolean isDescription, DescriptionElementBase lastDescriptionElement, XMLEvent next)throws XMLStreamException {
2701 FigureDataHolder figureHolder = handleFigureRef(state, reader, next);
2702 Feature figureFeature = getFeature(state,MarkupTransformer.uuidFigures, "Figures", "Figures", "Fig.",null);
2703 if (isDescription) {
2704 TextData figureHolderTextData = null;
2705 // if (figureHolderTextData == null){
2706 figureHolderTextData = TextData.NewInstance(figureFeature);
2707 if (StringUtils.isNotBlank(figureHolder.num)) {
2708 String annotationText = "<num>" + figureHolder.num.trim()
2709 + "</num>";
2710 Annotation annotation = Annotation.NewInstance(annotationText,
2711 AnnotationType.TECHNICAL(), Language.DEFAULT());
2712 figureHolderTextData.addAnnotation(annotation);
2713 }
2714 if (StringUtils.isNotBlank(figureHolder.figurePart)) {
2715 String annotationText = "<figurePart>"+ figureHolder.figurePart.trim() + "</figurePart>";
2716 Annotation annotation = Annotation.NewInstance(annotationText,AnnotationType.EDITORIAL(), Language.DEFAULT());
2717 figureHolderTextData.addAnnotation(annotation);
2718 }
2719 // if (StringUtils.isNotBlank(figureText)){
2720 // figureHolderTextData.putText(Language.DEFAULT(), figureText);
2721 // }
2722 taxonDescription.addElement(figureHolderTextData);
2723 // }
2724 registerFigureDemand(state, figureHolderTextData, figureHolder.ref);
2725 } else {
2726 if (lastDescriptionElement == null) {
2727 String message = "No description element created yet that can be referred by figure. Create new TextData instead";
2728 fireWarningEvent(message, next, 4);
2729 lastDescriptionElement = TextData.NewInstance(figureFeature);
2730 taxonDescription.addElement(lastDescriptionElement);
2731 }
2732 registerFigureDemand(state, lastDescriptionElement,
2733 figureHolder.ref);
2734 }
2735 return lastDescriptionElement;
2736 }
2737
2738 /**
2739 * @param state
2740 * @param reader
2741 * @param feature
2742 * @param taxonDescription
2743 * @param lastDescriptionElement
2744 * @param distributionList
2745 * @param next
2746 * @return
2747 * @throws XMLStreamException
2748 */
2749 private DescriptionElementBase makeFeatureString(MarkupImportState state,XMLEventReader reader, Feature feature,
2750 TaxonDescription taxonDescription, DescriptionElementBase lastDescriptionElement, XMLEvent next) throws XMLStreamException {
2751 Map<String, String> subheadingMap = handleString(state, reader, next, feature);
2752 for (String subheading : subheadingMap.keySet()) {
2753 Feature subheadingFeature = feature;
2754 if (StringUtils.isNotBlank(subheading) && subheadingMap.size() > 1) {
2755 subheadingFeature = makeFeature(subheading, state, next);
2756 }
2757 TextData textData = TextData.NewInstance(subheadingFeature);
2758 textData.putText(Language.DEFAULT(), subheadingMap.get(subheading));
2759 taxonDescription.addElement(textData);
2760 // TODO how to handle figures when these data are split in
2761 // subheadings
2762 lastDescriptionElement = textData;
2763 }
2764 return lastDescriptionElement;
2765 }
2766
2767 /**
2768 * @param state
2769 * @param reader
2770 * @param feature
2771 * @param taxon
2772 * @param next
2773 * @throws XMLStreamException
2774 */
2775 private void makeFeatureWriter(MarkupImportState state,XMLEventReader reader, Feature feature, Taxon taxon, XMLEvent next) throws XMLStreamException {
2776 WriterDataHolder writer = handleWriter(state, reader, next);
2777 if (isNotBlank(writer.writer)) {
2778 // TODO
2779 Reference<?> ref = state.getConfig().getSourceReference();
2780 TaxonDescription description = getTaxonDescription(taxon, ref,
2781 false, true);
2782 TextData featurePlaceholder = getFeaturePlaceholder(state,
2783 description, feature, true);
2784 featurePlaceholder.addAnnotation(writer.annotation);
2785 registerFootnotes(state, featurePlaceholder, writer.footnotes);
2786 } else {
2787 String message = "Writer element is empty";
2788 fireWarningEvent(message, next, 4);
2789 }
2790 }
2791
2792 /**
2793 * @param state
2794 * @param reader
2795 * @param classValue
2796 * @param feature
2797 * @param next
2798 * @throws XMLStreamException
2799 */
2800 private void makeFeatureHeading(MarkupImportState state, XMLEventReader reader, String classValue, Feature feature, XMLEvent next) throws XMLStreamException {
2801 String heading = handleHeading(state, reader, next);
2802 if (StringUtils.isNotBlank(heading)) {
2803 if (!heading.equalsIgnoreCase(classValue)) {
2804 try {
2805 if (!feature.equals(state.getTransformer().getFeatureByKey(
2806 heading))) {
2807 UUID headerFeatureUuid = state.getTransformer()
2808 .getFeatureUuid(heading);
2809 if (!feature.getUuid().equals(headerFeatureUuid)) {
2810 String message = "Feature heading '%s' differs from feature class '%s' and can not be transformed to feature";
2811 message = String.format(message, heading,
2812 classValue);
2813 fireWarningEvent(message, next, 1);
2814 }
2815 }
2816 } catch (UndefinedTransformerMethodException e) {
2817 throw new RuntimeException(e);
2818 }
2819 } else {
2820 // do nothing
2821 }
2822 }
2823 }
2824
2825 private List<Reference<?>> handleReferences(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
2826 // attributes
2827 Map<String, Attribute> attributes = getAttributes(parentEvent);
2828 String bibliography = getAndRemoveAttributeValue(attributes,
2829 BIBLIOGRAPHY);
2830 String serialsAbbreviations = getAndRemoveAttributeValue(attributes,
2831 SERIALS_ABBREVIATIONS);
2832 if (isNotBlank(bibliography) || isNotBlank(serialsAbbreviations)) {
2833 String message = "Attributes not yet implemented for <references>";
2834 fireWarningEvent(message, parentEvent, 4);
2835 }
2836
2837 List<Reference<?>> result = new ArrayList<Reference<?>>();
2838
2839 // elements
2840 while (reader.hasNext()) {
2841 XMLEvent next = readNoWhitespace(reader);
2842 if (next.isEndElement()) {
2843 if (isMyEndingElement(next, parentEvent)) {
2844 return result;
2845 } else {
2846 if (isEndingElement(next, HEADING)) {
2847 // NOT YET IMPLEMENTED
2848 popUnimplemented(next.asEndElement());
2849 } else if (isEndingElement(next, WRITER)) {
2850 // NOT YET IMPLEMENTED
2851 popUnimplemented(next.asEndElement());
2852 } else if (isEndingElement(next, FOOTNOTE)) {
2853 // NOT YET IMPLEMENTED
2854 popUnimplemented(next.asEndElement());
2855 } else if (isEndingElement(next, STRING)) {
2856 // NOT YET IMPLEMENTED
2857 popUnimplemented(next.asEndElement());
2858 } else if (isEndingElement(next, REF_NUM)) {
2859 // NOT YET IMPLEMENTED
2860 popUnimplemented(next.asEndElement());
2861 } else {
2862 handleUnexpectedEndElement(next.asEndElement());
2863 }
2864 }
2865 } else if (next.isStartElement()) {
2866 if (isStartingElement(next, HEADING)) {
2867 handleNotYetImplementedElement(next);
2868 } else if (isStartingElement(next, SUB_HEADING)) {
2869 String subheading = getCData(state, reader, next).trim();
2870 String excludePattern = "(i?)(References?|Literature):?";
2871 if (!subheading.matches(excludePattern)) {
2872 fireNotYetImplementedElement(next.getLocation(), next.asStartElement().getName(), 0);
2873 }
2874 } else if (isStartingElement(next, WRITER)) {
2875 handleNotYetImplementedElement(next);
2876 } else if (isStartingElement(next, FOOTNOTE)) {
2877 handleNotYetImplementedElement(next);
2878 } else if (isStartingElement(next, STRING)) {
2879 handleNotYetImplementedElement(next);
2880 } else if (isStartingElement(next, REF_NUM)) {
2881 handleNotYetImplementedElement(next);
2882 } else if (isStartingElement(next, REFERENCE)) {
2883 Reference<?> ref = handleReference(state, reader, next);
2884 result.add(ref);
2885 } else {
2886 handleUnexpectedStartElement(next);
2887 }
2888 } else {
2889 handleUnexpectedElement(next);
2890 }
2891 }
2892 throw new IllegalStateException("<References> has no closing tag");
2893 }
2894
2895 private void handleHabitat(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
2896 checkNoAttributes(parentEvent);
2897 Taxon taxon = state.getCurrentTaxon();
2898 // TODO which ref to take?
2899 Reference<?> ref = state.getConfig().getSourceReference();
2900
2901 String text = "";
2902 while (reader.hasNext()) {
2903 XMLEvent next = readNoWhitespace(reader);
2904 if (isMyEndingElement(next, parentEvent)) {
2905 TaxonDescription description = getTaxonDescription(taxon, ref,
2906 false, true);
2907 UUID uuidExtractedHabitat = MarkupTransformer.uuidExtractedHabitat;
2908 Feature feature = getFeature(
2909 state,
2910 uuidExtractedHabitat,
2911 "Extracted Habitat",
2912 "An structured habitat that was extracted from a habitat text",
2913 "extr. habit.", null);
2914 TextData habitat = TextData.NewInstance(feature);
2915 habitat.putText(Language.DEFAULT(), text);
2916 description.addElement(habitat);
2917
2918 return;
2919 } else if (next.isStartElement()) {
2920 if (isStartingElement(next, ALTITUDE)) {
2921 text = text.trim() + getTaggedCData(state, reader, next);
2922 } else if (isStartingElement(next, LIFE_CYCLE_PERIODS)) {
2923 handleNotYetImplementedElement(next);
2924 } else {
2925 handleUnexpectedStartElement(next.asStartElement());
2926 }
2927 } else if (next.isCharacters()) {
2928 text += next.asCharacters().getData();
2929 } else {
2930 handleUnexpectedElement(next);
2931 }
2932 }
2933 throw new IllegalStateException("<Habitat> has no closing tag");
2934 }
2935
2936 private String getTaggedCData(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
2937 checkNoAttributes(parentEvent);
2938
2939 String text = getXmlTag(parentEvent);
2940 while (reader.hasNext()) {
2941 XMLEvent next = readNoWhitespace(reader);
2942 if (isMyEndingElement(next, parentEvent)) {
2943 text += getXmlTag(next);
2944 return text;
2945 } else if (next.isStartElement()) {
2946 text += getTaggedCData(state, reader, next);
2947 } else if (next.isEndElement()) {
2948 text += getTaggedCData(state, reader, next);
2949 } else if (next.isCharacters()) {
2950 text += next.asCharacters().getData();
2951 } else {
2952 handleUnexpectedEndElement(next.asEndElement());
2953 }
2954 }
2955 throw new IllegalStateException("Some tag has no closing tag");
2956 }
2957
2958 private String handleDistributionLocality(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
2959 Map<String, Attribute> attributes = getAttributes(parentEvent);
2960 String classValue = getAndRemoveRequiredAttributeValue(parentEvent, attributes, CLASS);
2961 String statusValue =getAndRemoveAttributeValue(attributes, STATUS);
2962 String frequencyValue =getAndRemoveAttributeValue(attributes, FREQUENCY);
2963
2964
2965 Taxon taxon = state.getCurrentTaxon();
2966 // TODO which ref to take?
2967 Reference<?> ref = state.getConfig().getSourceReference();
2968
2969 String text = "";
2970 while (reader.hasNext()) {
2971 XMLEvent next = readNoWhitespace(reader);
2972 if (isMyEndingElement(next, parentEvent)) {
2973 if (StringUtils.isNotBlank(text)) {
2974 String label = CdmUtils.removeTrailingDot(normalize(text));
2975 TaxonDescription description = getTaxonDescription(taxon, ref, false, true);
2976 NamedAreaLevel level = makeNamedAreaLevel(state,classValue, next);
2977
2978 //status
2979 PresenceAbsenceTermBase<?> status = null;
2980 if (isNotBlank(statusValue)){
2981 try {
2982 status = state.getTransformer().getPresenceTermByKey(statusValue);
2983 if (status == null){
2984 //TODO
2985 String message = "The status '%s' could not be transformed to an CDM status";
2986 fireWarningEvent(message, next, 4);
2987 }
2988 } catch (UndefinedTransformerMethodException e) {
2989 throw new RuntimeException(e);
2990 }
2991 }else{
2992 status = PresenceTerm.PRESENT();
2993 }
2994 //frequency
2995 if (isNotBlank(frequencyValue)){
2996 String message = "The frequency attribute is currently not yet available in CDM";
2997 fireWarningEvent(message, parentEvent, 6);
2998 }
2999
3000 NamedArea higherArea = null;
3001 List<NamedArea> areas = new ArrayList<NamedArea>();
3002
3003 String patSingleArea = "([^,\\(]{3,})";
3004 String patSeparator = "(,|\\sand\\s)";
3005 String hierarchiePattern = String.format("%s\\((%s(%s%s)*)\\)",patSingleArea, patSingleArea, patSeparator, patSingleArea);
3006 Pattern patHierarchie = Pattern.compile(hierarchiePattern, Pattern.CASE_INSENSITIVE);
3007 Matcher matcher = patHierarchie.matcher(label);
3008 if (matcher.matches()){
3009 String higherAreaStr = matcher.group(1).trim();
3010 higherArea = makeArea(state, higherAreaStr, level);
3011 String[] innerAreas = matcher.group(2).split(patSeparator);
3012 for (String innerArea : innerAreas){
3013 if (isNotBlank(innerArea)){
3014 NamedArea singleArea = makeArea(state, innerArea.trim(), level);
3015 areas.add(singleArea);
3016 NamedArea partOf = singleArea.getPartOf();
3017 // if (partOf == null){
3018 // singleArea.setPartOf(higherArea);
3019 // }
3020 }
3021 }
3022 }else{
3023 NamedArea singleArea = makeArea(state, label, level);
3024 areas.add(singleArea);
3025 }
3026
3027 for (NamedArea area : areas){
3028 //create distribution
3029 Distribution distribution = Distribution.NewInstance(area,status);
3030 description.addElement(distribution);
3031 }
3032 } else {
3033 String message = "Empty distribution locality";
3034 fireWarningEvent(message, next, 4);
3035 }
3036 return text;
3037 } else if (isStartingElement(next, COORDINATES)) {
3038 //TODO
3039 handleNotYetImplementedElement(next);
3040 } else if (isEndingElement(next, COORDINATES)) {
3041 //TODO
3042 popUnimplemented(next.asEndElement());
3043 } else if (next.isCharacters()) {
3044 text += next.asCharacters().getData();
3045 } else {
3046 handleUnexpectedEndElement(next.asEndElement());
3047 }
3048 }
3049 throw new IllegalStateException("<DistributionLocality> has no closing tag");
3050 }
3051
3052 /**
3053 * @param state
3054 * @param areaName
3055 * @param level
3056 * @return
3057 */
3058 private NamedArea makeArea(MarkupImportState state, String areaName, NamedAreaLevel level) {
3059
3060
3061 //TODO FM vocabulary
3062 TermVocabulary<NamedArea> voc = null;
3063 NamedAreaType areaType = null;
3064
3065 NamedArea area = null;
3066 try {
3067 area = state.getTransformer().getNamedAreaByKey(areaName);
3068 } catch (UndefinedTransformerMethodException e) {
3069 throw new RuntimeException(e);
3070 }
3071 if (area == null){
3072 boolean isNewInState = false;
3073 UUID uuid = state.getAreaUuid(areaName);
3074 if (uuid == null){
3075 isNewInState = true;
3076 //TODO just for testing -> make generic and move to better place
3077 if ("Bangka".equals(areaName)){
3078 String geoServiceLayer="vmap0_as_bnd_political_boundary_a";
3079 String layerFieldName ="nam";
3080 //TODO replace #
3081 String areaValue = "PULAU BANGKA#SUMATERA SELATAN";
3082 GeoServiceArea geoServiceArea = new GeoServiceArea();
3083 geoServiceArea.add(geoServiceLayer, layerFieldName, areaValue);
3084 try {
3085 String a = geoServiceArea.toXml();
3086 System.out.println(a);
3087 } catch (XMLStreamException e) {
3088 // TODO Auto-generated catch block
3089 e.printStackTrace();
3090 }
3091 // area = createNewArea(areaName,geoServiceLayer, layerFieldName, areaValue);
3092
3093 }
3094
3095 try {
3096 uuid = state.getTransformer().getNamedAreaUuid(areaName);
3097 } catch (UndefinedTransformerMethodException e) {
3098 throw new RuntimeException(e);
3099 }
3100 }
3101 TermMatchMode matchMode = TermMatchMode.UUID_LABEL;
3102 area = getNamedArea(state, uuid, areaName, areaName, areaName, areaType, level, voc, matchMode);
3103 if (isNewInState){
3104 state.putAreaUuid(areaName, area.getUuid());
3105 }
3106 }
3107 return area;
3108 }
3109
3110
3111 /**
3112 * @param state
3113 * @param levelString
3114 * @param next
3115 * @return
3116 */
3117 private NamedAreaLevel makeNamedAreaLevel(MarkupImportState state,
3118 String levelString, XMLEvent next) {
3119 NamedAreaLevel level;
3120 try {
3121 level = state.getTransformer().getNamedAreaLevelByKey(levelString);
3122 if (level == null) {
3123 UUID levelUuid = state.getTransformer().getNamedAreaLevelUuid(levelString);
3124 if (levelUuid == null) {
3125 String message = "Unknown distribution locality class (named area level): %s. Create new level instead.";
3126 message = String.format(message, levelString);
3127 fireWarningEvent(message, next, 6);
3128 }
3129 level = getNamedAreaLevel(state, levelUuid, levelString,
3130 levelString, levelString, null);
3131 }
3132 } catch (UndefinedTransformerMethodException e) {
3133 throw new RuntimeException(e);
3134 }
3135 return level;
3136 }
3137
3138 private String handleHeading(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
3139 checkNoAttributes(parentEvent);
3140
3141 String text = "";
3142 while (reader.hasNext()) {
3143 XMLEvent next = readNoWhitespace(reader);
3144 if (isMyEndingElement(next, parentEvent)) {
3145 return text;
3146 } else if (next.isStartElement()) {
3147 if (isStartingElement(next, FOOTNOTE)) {
3148 handleNotYetImplementedElement(next);
3149 } else {
3150 handleUnexpectedStartElement(next.asStartElement());
3151 }
3152 } else if (next.isCharacters()) {
3153 text += next.asCharacters().getData();
3154 } else {
3155 handleUnexpectedEndElement(next.asEndElement());
3156 }
3157 }
3158 throw new IllegalStateException("<String> has no closing tag");
3159
3160 }
3161
3162 /**
3163 * Handle string
3164 * @param state
3165 * @param reader
3166 * @param parentEvent
3167 * @param feature only needed for distributionLocalities
3168 * @return
3169 * @throws XMLStreamException
3170 */
3171 private Map<String, String> handleString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature feature)throws XMLStreamException {
3172 // attributes
3173 String classValue = getClassOnlyAttribute(parentEvent, false);
3174 if (StringUtils.isNotBlank(classValue)) {
3175 String message = "class attribute for <string> not yet implemented";
3176 fireWarningEvent(message, parentEvent, 2);
3177 }
3178
3179 // subheadings
3180 Map<String, String> subHeadingMap = new HashMap<String, String>();
3181 String currentSubheading = null;
3182
3183 boolean isTextMode = true;
3184 String text = "";
3185 while (reader.hasNext()) {
3186 XMLEvent next = readNoWhitespace(reader);
3187 if (isMyEndingElement(next, parentEvent)) {
3188 putCurrentSubheading(subHeadingMap, currentSubheading, text);
3189 return subHeadingMap;
3190 } else if (isStartingElement(next, BR)) {
3191 text += "<br/>";
3192 isTextMode = false;
3193 } else if (isEndingElement(next, BR)) {
3194 isTextMode = true;
3195 } else if (isHtml(next)) {
3196 text += getXmlTag(next);
3197 } else if (isStartingElement(next, SUB_HEADING)) {
3198 text = putCurrentSubheading(subHeadingMap,currentSubheading, text);
3199 // TODO footnotes
3200 currentSubheading = getCData(state, reader, next).trim();
3201 } else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
3202 if (feature != null && !feature.equals(Feature.DISTRIBUTION())) {
3203 String message = "Distribution locality only allowed for feature of type 'distribution'";
3204 fireWarningEvent(message, next, 4);
3205 }
3206 text += handleDistributionLocality(state, reader, next);
3207 } else if (next.isCharacters()) {
3208 if (!isTextMode) {
3209 String message = "String is not in text mode";
3210 fireWarningEvent(message, next, 6);
3211 } else {
3212 text += next.asCharacters().getData();
3213 }
3214 } else if (isStartingElement(next, HEADING)) {
3215 //TODO
3216 handleNotYetImplementedElement(next);
3217 } else if (isEndingElement(next, HEADING)) {
3218 //TODO
3219 popUnimplemented(next.asEndElement());
3220 } else if (isStartingElement(next, QUOTE)) {
3221 //TODO
3222 handleNotYetImplementedElement(next);
3223 } else if (isEndingElement(next, QUOTE)) {
3224 //TODO
3225 popUnimplemented(next.asEndElement());
3226 } else if (isStartingElement(next, DEDICATION)) {
3227 //TODO
3228 handleNotYetImplementedElement(next);
3229 } else if (isEndingElement(next, DEDICATION)) {
3230 //TODO
3231 popUnimplemented(next.asEndElement());
3232 } else if (isStartingElement(next, TAXONTYPE)) {
3233 //TODO
3234 handleNotYetImplementedElement(next);
3235 } else if (isEndingElement(next, TAXONTYPE)) {
3236 //TODO
3237 popUnimplemented(next.asEndElement());
3238 } else if (isStartingElement(next, FULL_NAME)) {
3239 //TODO
3240 handleNotYetImplementedElement(next);
3241 } else if (isEndingElement(next, FULL_NAME)) {
3242 //TODO
3243 popUnimplemented(next.asEndElement());
3244 }else if (isStartingElement(next, REFERENCES)) {
3245 //TODO
3246 handleNotYetImplementedElement(next);
3247 } else if (isEndingElement(next, REFERENCES)) {
3248 //TODO
3249 popUnimplemented(next.asEndElement());
3250 } else if (isStartingElement(next, GATHERING)) {
3251 //TODO
3252 handleNotYetImplementedElement(next);
3253 } else if (isEndingElement(next, GATHERING)) {
3254 //TODO
3255 popUnimplemented(next.asEndElement());
3256 } else if (isStartingElement(next, ANNOTATION)) {
3257 //TODO
3258 handleNotYetImplementedElement(next);
3259 } else if (isEndingElement(next, ANNOTATION)) {
3260 //TODO
3261 popUnimplemented(next.asEndElement());
3262 } else if (isStartingElement(next, HABITAT)) {
3263 //TODO
3264 handleNotYetImplementedElement(next);
3265 } else if (isEndingElement(next, HABITAT)) {
3266 //TODO
3267 popUnimplemented(next.asEndElement());
3268 } else if (isStartingElement(next, FIGURE_REF)) {
3269 //TODO
3270 handleNotYetImplementedElement(next);
3271 } else if (isEndingElement(next, FIGURE_REF)) {
3272 //TODO
3273 popUnimplemented(next.asEndElement());
3274 } else if (isStartingElement(next, FIGURE)) {
3275 //TODO
3276 handleNotYetImplementedElement(next);
3277 } else if (isEndingElement(next, FIGURE)) {
3278 //TODO
3279 popUnimplemented(next.asEndElement());
3280 } else if (isStartingElement(next, FOOTNOTE_REF)) {
3281 //TODO
3282 handleNotYetImplementedElement(next);
3283 } else if (isEndingElement(next, FOOTNOTE_REF)) {
3284 //TODO
3285 popUnimplemented(next.asEndElement());
3286 } else if (isStartingElement(next, FOOTNOTE)) {
3287 //TODO
3288 handleNotYetImplementedElement(next);
3289 } else if (isEndingElement(next, FOOTNOTE)) {
3290 //TODO
3291 popUnimplemented(next.asEndElement());
3292 } else if (isStartingElement(next, WRITER)) {
3293 //TODO
3294 handleNotYetImplementedElement(next);
3295 } else if (isEndingElement(next, WRITER)) {
3296 //TODO
3297 popUnimplemented(next.asEndElement());
3298 } else if (isStartingElement(next, DATES)) {
3299 //TODO
3300 handleNotYetImplementedElement(next);
3301 } else if (isEndingElement(next, DATES)) {
3302 //TODO
3303 popUnimplemented(next.asEndElement());
3304 } else {
3305 handleUnexpectedElement(next);
3306 }
3307 }
3308 throw new IllegalStateException("<String> has no closing tag");
3309 }
3310
3311 /**
3312 * @param subHeadingMap
3313 * @param currentSubheading
3314 * @param text
3315 * @return
3316 */
3317 private String putCurrentSubheading(Map<String, String> subHeadingMap, String currentSubheading, String text) {
3318 if (StringUtils.isNotBlank(text)) {
3319 text = removeStartingMinus(text);
3320 subHeadingMap.put(currentSubheading, text.trim());
3321 }
3322 return "";
3323 }
3324
3325 private String removeStartingMinus(String string) {
3326 string = replaceStart(string, "-");
3327 string = replaceStart(string, "\u002d");
3328 string = replaceStart(string, "\u2013");
3329 string = replaceStart(string, "\u2014");
3330 string = replaceStart(string, "--");
3331 return string;
3332 }
3333
3334 /**
3335 * @param value
3336 * @param replacementString
3337 */
3338 private String replaceStart(String value, String replacementString) {
3339 if (value.startsWith(replacementString) ){
3340 value = value.substring(replacementString.length()).trim();
3341 }
3342 while (value.startsWith("-") || value.startsWith("\u2014") ){
3343 value = value.substring("-".length()).trim();
3344 }
3345 return value;
3346 }
3347
3348 private String getXmlTag(XMLEvent event) {
3349 String result;
3350 if (event.isStartElement()) {
3351 result = "<" + event.asStartElement().getName().getLocalPart()
3352 + ">";
3353 } else if (event.isEndElement()) {
3354 result = "</" + event.asEndElement().getName().getLocalPart() + ">";
3355 } else {
3356 String message = "Only start or end elements are allowed as Html tags";
3357 throw new IllegalStateException(message);
3358 }
3359 return result;
3360 }
3361
3362 protected static final List<String> htmlList = Arrays.asList("sub", "sup",
3363 "ol", "ul", "li", "i", "b", "table", "br");
3364
3365 private boolean isHtml(XMLEvent event) {
3366 if (event.isStartElement()) {
3367 String tag = event.asStartElement().getName().getLocalPart();
3368 return htmlList.contains(tag);
3369 } else if (event.isEndElement()) {
3370 String tag = event.asEndElement().getName().getLocalPart();
3371 return htmlList.contains(tag);
3372 } else {
3373 return false;
3374 }
3375
3376 }
3377
3378 private TextData handleChar(MarkupImportState state, XMLEventReader reader,
3379 XMLEvent parentEvent) throws XMLStreamException {
3380 String classValue = getClassOnlyAttribute(parentEvent);
3381 Feature feature = makeFeature(classValue, state, parentEvent);
3382
3383 String text = "";
3384 while (reader.hasNext()) {
3385 XMLEvent next = readNoWhitespace(reader);
3386 if (isMyEndingElement(next, parentEvent)) {
3387 TextData textData = TextData.NewInstance(feature);
3388 textData.putText(Language.DEFAULT(), text);
3389 return textData;
3390 } else if (isStartingElement(next, FIGURE_REF)) {
3391 //TODO
3392 handleNotYetImplementedElement(next);
3393 } else if (isEndingElement(next, FIGURE_REF)) {
3394 //TODO
3395 popUnimplemented(next.asEndElement());
3396 } else if (next.isStartElement()) {
3397 if (isStartingElement(next, ANNOTATION)) {
3398 handleNotYetImplementedElement(next);
3399 } else if (isStartingElement(next, ITALICS)) {
3400 handleNotYetImplementedElement(next);
3401 } else if (isStartingElement(next, BOLD)) {
3402 handleNotYetImplementedElement(next);
3403 } else {
3404 handleUnexpectedStartElement(next.asStartElement());
3405 }
3406 } else if (next.isCharacters()) {
3407 text += next.asCharacters().getData();
3408 } else {
3409 handleUnexpectedEndElement(next.asEndElement());
3410 }
3411 }
3412 throw new IllegalStateException("RefPart has no closing tag");
3413 }
3414
3415 /**
3416 * @param classValue
3417 * @param state
3418 * @param parentEvent
3419 * @return
3420 * @throws UndefinedTransformerMethodException
3421 */
3422 private Feature makeFeature(String classValue, MarkupImportState state, XMLEvent parentEvent) {
3423 UUID uuid;
3424 try {
3425 Feature feature = state.getTransformer().getFeatureByKey(classValue);
3426 if (feature != null) {
3427 return feature;
3428 }
3429 uuid = state.getTransformer().getFeatureUuid(classValue);
3430 if (uuid == null) {
3431 // TODO
3432 String message = "Uuid is not defined for %s";
3433 message = String.format(message, classValue);
3434 fireWarningEvent(message, parentEvent, 8);
3435 }
3436 String featureText = StringUtils.capitalize(classValue);
3437
3438 // TODO eFlora vocabulary
3439 TermVocabulary<Feature> voc = null;
3440 feature = getFeature(state, uuid, featureText, featureText, classValue, voc);
3441 if (feature == null) {
3442 throw new NullPointerException(classValue + " not recognized as a feature");
3443 }
3444 return feature;
3445 } catch (Exception e) {
3446 String message = "Could not create feature for %s: %s";
3447 message = String.format(message, classValue, e.getMessage());
3448 fireWarningEvent(message, parentEvent, 4);
3449 return Feature.UNKNOWN();
3450 }
3451 }
3452
3453 /**
3454 * This comes from the old version, needs to be checked on need
3455 *
3456 * @param state
3457 */
3458 private void doAllTheOldOtherStuff(MarkupImportState state) {
3459 state.putTree(null, null);
3460 if (unmatchedLeads == null) {
3461 unmatchedLeads = UnmatchedLeads.NewInstance();
3462 }
3463 state.setUnmatchedLeads(unmatchedLeads);
3464
3465 // TransactionStatus tx = startTransaction();
3466 unmatchedLeads.saveToSession(getPolytomousKeyNodeService());
3467
3468 // TODO generally do not store the reference object in the config
3469 Reference sourceReference = state.getConfig().getSourceReference();
3470 getReferenceService().saveOrUpdate(sourceReference);
3471 }
3472
3473 /*
3474 * (non-Javadoc)
3475 *
3476 * @see
3477 * eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common
3478 * .IImportConfigurator)
3479 */
3480 protected boolean isIgnore(MarkupImportState state) {
3481 return !state.getConfig().isDoTaxa();
3482 }
3483
3484 }