fix characters problem in KeyImport
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupDocumentImportNoComponent.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.markup;
11
12 import java.net.MalformedURLException;
13 import java.net.URL;
14 import java.util.ArrayList;
15 import java.util.Arrays;
16 import java.util.HashMap;
17 import java.util.HashSet;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.UUID;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import javax.xml.stream.Location;
26 import javax.xml.stream.XMLEventReader;
27 import javax.xml.stream.XMLStreamException;
28 import javax.xml.stream.events.Attribute;
29 import javax.xml.stream.events.StartElement;
30 import javax.xml.stream.events.XMLEvent;
31
32 import org.apache.commons.lang.StringUtils;
33 import org.apache.log4j.Logger;
34
35 import eu.etaxonomy.cdm.common.CdmUtils;
36 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
37 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
38 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
39 import eu.etaxonomy.cdm.model.common.Annotation;
40 import eu.etaxonomy.cdm.model.common.AnnotationType;
41 import eu.etaxonomy.cdm.model.common.CdmBase;
42 import eu.etaxonomy.cdm.model.common.Extension;
43 import eu.etaxonomy.cdm.model.common.ExtensionType;
44 import eu.etaxonomy.cdm.model.common.Language;
45 import eu.etaxonomy.cdm.model.common.TermVocabulary;
46 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
47 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
48 import eu.etaxonomy.cdm.model.description.Distribution;
49 import eu.etaxonomy.cdm.model.description.Feature;
50 import eu.etaxonomy.cdm.model.description.PolytomousKey;
51 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
52 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
53 import eu.etaxonomy.cdm.model.description.PresenceTerm;
54 import eu.etaxonomy.cdm.model.description.TaxonDescription;
55 import eu.etaxonomy.cdm.model.description.TextData;
56 import eu.etaxonomy.cdm.model.location.NamedArea;
57 import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
58 import eu.etaxonomy.cdm.model.media.IdentifiableMediaEntity;
59 import eu.etaxonomy.cdm.model.media.Media;
60 import eu.etaxonomy.cdm.model.name.CultivarPlantName;
61 import eu.etaxonomy.cdm.model.name.NonViralName;
62 import eu.etaxonomy.cdm.model.name.Rank;
63 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
64 import eu.etaxonomy.cdm.model.reference.Reference;
65 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
66 import eu.etaxonomy.cdm.model.taxon.Classification;
67 import eu.etaxonomy.cdm.model.taxon.Taxon;
68 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
69 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
70
71
72 /**
73 * @author a.mueller
74 *
75 */
76 public class MarkupDocumentImportNoComponent extends MarkupImportBase {
77 private static final Logger logger = Logger.getLogger(MarkupDocumentImportNoComponent.class);
78
79 private MarkupKeyImport keyImport;
80 private MarkupSpecimenImport specimenImport;
81
82 private MarkupNomenclatureImport nomenclatureImport;
83
84 public MarkupDocumentImportNoComponent(MarkupDocumentImport docImport) {
85 super(docImport);
86 this.keyImport = new MarkupKeyImport(docImport);
87 this.specimenImport = new MarkupSpecimenImport(docImport);
88 nomenclatureImport = new MarkupNomenclatureImport(docImport, keyImport, specimenImport);
89 }
90
91 public void doInvoke(MarkupImportState state) throws XMLStreamException {
92 XMLEventReader reader = state.getReader();
93
94 // publication (= root element)
95 String elName = PUBLICATION;
96 boolean hasPublication = false;
97
98 while (reader.hasNext()) {
99 XMLEvent nextEvent = reader.nextEvent();
100 if (isStartingElement(nextEvent, elName)) {
101 handlePublication(state, reader, nextEvent, elName);
102 hasPublication = true;
103 } else if (nextEvent.isEndDocument()) {
104 if (!hasPublication) {
105 String message = "No publication root element found";
106 fireWarningEvent(message, nextEvent, 8);
107 }
108 // done
109 } else {
110 fireSchemaConflictEventExpectedStartTag(elName, reader);
111 }
112 }
113
114
115 return;
116
117 }
118
119 private void handlePublication(MarkupImportState state, XMLEventReader reader, XMLEvent currentEvent, String elName) throws XMLStreamException {
120
121 // attributes
122 StartElement element = currentEvent.asStartElement();
123 Map<String, Attribute> attributes = getAttributes(element);
124 String lang = getAndRemoveAttributeValue(attributes, "lang");
125 if (lang != null){
126 Language language = getTermService().getLanguageByIso(lang);
127 state.setDefaultLanguage(language);
128 }
129
130 handleUnexpectedAttributes(element.getLocation(), attributes, "noNamespaceSchemaLocation");
131
132 while (reader.hasNext()) {
133 XMLEvent event = readNoWhitespace(reader);
134 // TODO cardinality of alternative
135 if (event.isEndElement()) {
136 if (isEndingElement(event, elName)) {
137 return;
138 } else {
139 if (isEndingElement(event, BIOGRAPHIES)) {
140 // NOT YET IMPLEMENTED
141 popUnimplemented(event.asEndElement());
142 } else if (isEndingElement(event, REFERENCES)) {
143 // NOT YET IMPLEMENTED
144 popUnimplemented(event.asEndElement());
145 } else if (isEndingElement(event, TEXT_SECTION)) {
146 // NOT YET IMPLEMENTED
147 popUnimplemented(event.asEndElement());
148 } else if (isEndingElement(event, ADDENDA)) {
149 // NOT YET IMPLEMENTED
150 popUnimplemented(event.asEndElement());
151 } else {
152 handleUnexpectedElement(event);
153 }
154 }
155 } else if (event.isStartElement()) {
156 if (isStartingElement(event, META_DATA)) {
157 handleMetaData(state, reader, event);
158 } else if (isStartingElement(event, TREATMENT)) {
159 handleTreatment(state, reader, event);
160 } else if (isStartingElement(event, BIOGRAPHIES)) {
161 handleNotYetImplementedElement(event);
162 } else if (isStartingElement(event, REFERENCES)) {
163 handleNotYetImplementedElement(event);
164 } else if (isStartingElement(event, TEXT_SECTION)) {
165 handleNotYetImplementedElement(event);
166 } else if (isStartingElement(event, ADDENDA)) {
167 handleNotYetImplementedElement(event);
168 } else {
169 handleUnexpectedStartElement(event);
170 }
171 } else {
172 handleUnexpectedElement(event);
173 }
174 }
175 throw new IllegalStateException("Publication has no ending element");
176 }
177
178 private void handleMetaData(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
179 checkNoAttributes(parentEvent);
180
181 while (reader.hasNext()) {
182 XMLEvent next = readNoWhitespace(reader);
183 if (isMyEndingElement(next, parentEvent)) {
184 return;
185 } else if (isStartingElement(next, DEFAULT_MEDIA_URL)) {
186 String baseUrl = getCData(state, reader, next);
187 try {
188 new URL(baseUrl);
189 state.setBaseMediaUrl(baseUrl);
190 } catch (MalformedURLException e) {
191 String message = "defaultMediaUrl '%s' is not a valid URL";
192 message = String.format(message, baseUrl);
193 fireWarningEvent(message, next, 8);
194 }
195 } else if (isStartingElement(next, MODS)){
196 handleNotYetImplementedElement(next);
197 } else {
198 handleUnexpectedElement(next);
199 }
200 }
201 throw new IllegalStateException("MetaData has no ending element");
202
203 }
204
205 private void handleTreatment(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
206 checkNoAttributes(parentEvent);
207 Taxon lastTaxon = null;
208 while (reader.hasNext()) {
209 XMLEvent next = readNoWhitespace(reader);
210 if (isMyEndingElement(next, parentEvent)) {
211 Set<PolytomousKeyNode> keyNodesToSave = state.getPolytomousKeyNodesToSave();
212 //better save the key then the nodes
213 Set<PolytomousKey> keySet = new HashSet<PolytomousKey>();
214 for (PolytomousKeyNode node : keyNodesToSave){
215 PolytomousKey key = node.getKey();
216 keySet.add(key);
217 }
218 save(keySet, state);
219 //unmatched key leads
220 UnmatchedLeads unmatched = state.getUnmatchedLeads();
221 if (unmatched.size() > 0){
222 String message = "The following key leads are unmatched: %s";
223 message = String.format(message, state.getUnmatchedLeads().toString());
224 fireWarningEvent(message, next, 6);
225 }
226 // save(keyNodesToSave, state);
227
228 return;
229 } else if (isStartingElement(next, TAXON)) {
230 Taxon thisTaxon = handleTaxon(state, reader, next.asStartElement());
231 doTaxonRelation(state, thisTaxon, lastTaxon, parentEvent.getLocation());
232 if (state.isTaxonInClassification() == true){
233 lastTaxon = thisTaxon;
234 // TODO for imports spanning multiple documents ?? Still needed?
235 state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
236 }
237 } else if (isStartingElement(next, ADDENDA)) {
238 handleNotYetImplementedElement(next);
239 } else {
240 handleUnexpectedElement(next);
241 }
242 }
243 return;
244 }
245
246 /**
247 * @param taxon
248 * @param lastTaxon
249 */
250 private void doTaxonRelation(MarkupImportState state, Taxon taxon, Taxon lastTaxon, Location dataLocation) {
251
252 if (state.isTaxonInClassification() == false){
253 return;
254 }
255
256 Classification tree = makeTree(state, dataLocation);
257 if (lastTaxon == null) {
258 tree.addChildTaxon(taxon, null, null, null);
259 return;
260 }
261 Rank thisRank = taxon.getName().getRank();
262 Rank lastRank = lastTaxon.getName().getRank();
263 if (lastRank == null){
264 String message = "Last rank was null. Can't create tree correctly";
265 fireWarningEvent(message, makeLocationStr(dataLocation), 12);
266 }
267 if (lastTaxon.getTaxonNodes().size() > 0) {
268 TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
269 if (thisRank == null){
270 String message = "Rank is undefined for taxon '%s'. Can't create classification without rank.";
271 message = String.format(message, taxon.getName().getTitleCache());
272 fireWarningEvent(message, makeLocationStr(dataLocation), 6);
273 }else if (thisRank.isLower(lastRank)) {
274 lastNode.addChildTaxon(taxon, null, null, null);
275 fillMissingEpithetsForTaxa(lastTaxon, taxon);
276 } else if (thisRank.equals(lastRank)) {
277 TaxonNode parent = lastNode.getParent();
278 if (parent != null) {
279 parent.addChildTaxon(taxon, null, null, null);
280 fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
281 } else {
282 tree.addChildTaxon(taxon, null, null, null);
283 }
284 } else if (thisRank.isHigher(lastRank)) {
285 doTaxonRelation(state, taxon, lastNode.getParent().getTaxon(), dataLocation);
286 // TaxonNode parentNode = handleTaxonRelation(state, taxon,
287 // lastNode.getParent().getTaxon());
288 // parentNode.addChildTaxon(taxon, null, null, null);
289 }
290 } else {
291 String message = "Last taxon has no node";
292 fireWarningEvent(message, makeLocationStr(dataLocation), 6);
293 }
294 }
295
296
297
298 /**
299 * @param state
300 * @param dataLocation
301 * @return
302 */
303 private Classification makeTree(MarkupImportState state, Location dataLocation) {
304 Classification result = state.getTree(null);
305 if (result == null) {
306 UUID uuid = state.getConfig().getClassificationUuid();
307 if (uuid == null) {
308 String message = "No classification uuid is defined";
309 fireWarningEvent(message, makeLocationStr(dataLocation), 6);
310 result = createNewClassification(state);
311 } else {
312 result = getClassificationService().find(uuid);
313 if (result == null) {
314 result = createNewClassification(state);
315 result.setUuid(uuid);
316 }
317 }
318 state.putTree(null, result);
319 }
320 save(result, state);
321 return result;
322 }
323
324 private Classification createNewClassification(MarkupImportState state) {
325 Classification result = Classification.NewInstance(state.getConfig().getClassificationName(), getDefaultLanguage(state));
326 state.putTree(null, result);
327 return result;
328 }
329
330 private Taxon handleTaxon(MarkupImportState state, XMLEventReader reader, StartElement parentEvent) throws XMLStreamException {
331 // TODO progress monitoring
332 Map<String, Attribute> attributes = getAttributes(parentEvent);
333 Taxon taxon = createTaxonAndName(state, attributes);
334 state.setCurrentTaxon(taxon);
335 state.addNewFeatureSorterLists(taxon.getUuid().toString());
336
337 boolean hasTitle = false;
338 boolean hasNomenclature = false;
339 String taxonTitle = null;
340
341 Reference<?> descriptionReference = state.getConfig().getSourceReference();
342 while (reader.hasNext()) {
343 XMLEvent next = readNoWhitespace(reader);
344 if (next.isEndElement()) {
345 if (isMyEndingElement(next, parentEvent)) {
346 // checkMandatoryElement(hasTitle, parentEvent, TAXONTITLE);
347 checkMandatoryElement(hasNomenclature, parentEvent, NOMENCLATURE);
348 boolean inClassification = getAndRemoveBooleanAttributeValue(next, attributes, "inClassification", true);
349 state.setTaxonInClassification(inClassification);
350 handleUnexpectedAttributes(parentEvent.getLocation(),attributes);
351 if (taxon.getName().getRank() == null){
352 String warning = "No rank exists for taxon " + taxon.getTitleCache();
353 fireWarningEvent(warning, next, 12);
354 taxon.getName().setRank(Rank.UNKNOWN_RANK());
355 }
356
357 keyImport.makeKeyNodes(state, parentEvent, taxonTitle);
358 state.setCurrentTaxon(null);
359 state.setCurrentTaxonNum(null);
360 if (taxon.getName().getRank().isHigher(Rank.GENUS())){
361 state.setLatestGenusEpithet(null);
362 }else{
363 state.setLatestGenusEpithet(((NonViralName<?>)taxon.getName()).getGenusOrUninomial());
364 }
365 save(taxon, state);
366 return taxon;
367 } else {
368 if (isEndingElement(next, HEADING)) {
369 // NOT YET IMPLEMENTED
370 popUnimplemented(next.asEndElement());
371 } else if (isEndingElement(next, TEXT_SECTION)) {
372 // NOT YET IMPLEMENTED
373 popUnimplemented(next.asEndElement());
374 } else if (isEndingElement(next, REFERENCES)) {
375 // NOT YET IMPLEMENTED
376 popUnimplemented(next.asEndElement());
377 } else if (isEndingElement(next, FIGURE_REF)) {
378 // NOT YET IMPLEMENTED
379 popUnimplemented(next.asEndElement());
380 } else {
381 handleUnexpectedEndElement(next.asEndElement());
382 }
383 }
384 } else if (next.isStartElement()) {
385 if (isStartingElement(next, HEADING)) {
386 handleNotYetImplementedElement(next);
387 } else if (isStartingElement(next, TAXONTITLE)) {
388 taxonTitle = handleTaxonTitle(state, reader, next);
389 hasTitle = true;
390 } else if (isStartingElement(next, WRITER)) {
391 makeKeyWriter(state, reader, taxon, taxonTitle, next);
392 } else if (isStartingElement(next, TEXT_SECTION)) {
393 handleNotYetImplementedElement(next);
394 } else if (isStartingElement(next, KEY)) {
395 keyImport.handleKey(state, reader, next);
396 } else if (isStartingElement(next, NOMENCLATURE)) {
397 nomenclatureImport.handleNomenclature(state, reader, next);
398 hasNomenclature = true;
399 } else if (isStartingElement(next, FEATURE)) {
400 handleFeature(state, reader, next);
401 } else if (isStartingElement(next, NOTES)) {
402 // TODO is this the correct way to handle notes?
403 String note = handleNotes(state, reader, next);
404
405 UUID notesUuid;
406 try {
407 notesUuid = state.getTransformer().getFeatureUuid("notes");
408 Feature feature = getFeature(state, notesUuid, "Notes", "Notes", "note", null);
409 TextData textData = TextData.NewInstance(feature);
410 textData.putText(getDefaultLanguage(state), note);
411 TaxonDescription description = getTaxonDescription(taxon, descriptionReference, false, true);
412 description.addElement(textData);
413 } catch (UndefinedTransformerMethodException e) {
414 String message = "getFeatureUuid method not yet implemented";
415 fireWarningEvent(message, next, 8);
416 }
417 } else if (isStartingElement(next, REFERENCES)) {
418 handleNotYetImplementedElement(next);
419 } else if (isStartingElement(next, FIGURE_REF)) {
420 TaxonDescription desc = getTaxonDescription(taxon, state.getConfig().getSourceReference(), IMAGE_GALLERY, CREATE_NEW);
421 TextData textData;
422 if (desc.getElements().isEmpty()){
423 textData = TextData.NewInstance(Feature.IMAGE());
424 desc.addElement(textData);
425 }
426 textData = (TextData)desc.getElements().iterator().next();
427 makeFeatureFigureRef(state, reader, desc, false, textData, next);
428 } else if (isStartingElement(next, FIGURE)) {
429 handleFigure(state, reader, next);
430 } else if (isStartingElement(next, FOOTNOTE)) {
431 FootnoteDataHolder footnote = handleFootnote(state, reader, next);
432 if (footnote.isRef()) {
433 String message = "Ref footnote not implemented here";
434 fireWarningEvent(message, next, 4);
435 } else {
436 registerGivenFootnote(state, footnote);
437 }
438 } else {
439 handleUnexpectedStartElement(next);
440 }
441 } else {
442 handleUnexpectedElement(next);
443 }
444 }
445 throw new IllegalStateException("<Taxon> has no closing tag");
446 }
447
448 /**
449 * @param state
450 * @param reader
451 * @param taxon
452 * @param taxonTitle
453 * @param next
454 * @throws XMLStreamException
455 */
456 private void makeKeyWriter(MarkupImportState state, XMLEventReader reader, Taxon taxon, String taxonTitle, XMLEvent next) throws XMLStreamException {
457 WriterDataHolder writer = handleWriter(state, reader, next);
458 taxon.addExtension(writer.extension);
459 // TODO what if taxonTitle comes later
460 if (StringUtils.isNotBlank(taxonTitle)
461 && writer.extension != null) {
462 Reference<?> sec = ReferenceFactory.newBookSection();
463 sec.setTitle(taxonTitle);
464 TeamOrPersonBase<?> author = createAuthor(writer.writer);
465 sec.setAuthorTeam(author);
466 sec.setInReference(state.getConfig()
467 .getSourceReference());
468 taxon.setSec(sec);
469 registerFootnotes(state, sec, writer.footnotes);
470 } else {
471 String message = "No taxontitle exists for writer";
472 fireWarningEvent(message, next, 6);
473 }
474 }
475
476 private String handleNotes(MarkupImportState state, XMLEventReader reader,
477 XMLEvent parentEvent) throws XMLStreamException {
478 checkNoAttributes(parentEvent);
479
480 String text = "";
481 while (reader.hasNext()) {
482 XMLEvent next = readNoWhitespace(reader);
483 if (isMyEndingElement(next, parentEvent)) {
484 return text;
485 } else if (next.isEndElement()) {
486 if (isEndingElement(next, HEADING)) {
487 popUnimplemented(next.asEndElement());
488 } else if (isEndingElement(next, WRITER)) {
489 popUnimplemented(next.asEndElement());
490 } else if (isEndingElement(next, NUM)) {
491 popUnimplemented(next.asEndElement());
492 } else {
493 handleUnexpectedEndElement(next.asEndElement());
494 }
495 } else if (next.isStartElement()) {
496 if (isStartingElement(next, HEADING)) {
497 handleNotYetImplementedElement(next);
498 } else if (isStartingElement(next, SUB_HEADING)) {
499 String subheading = getCData(state, reader, next).trim();
500 if (! isNoteHeading(subheading)) {
501 fireNotYetImplementedElement(next.getLocation(), next.asStartElement().getName(), 0);
502 }
503 } else if (isStartingElement(next, WRITER)) {
504 handleNotYetImplementedElement(next);
505 } else if (isStartingElement(next, NUM)) {
506 handleNotYetImplementedElement(next);
507 } else if (isStartingElement(next, STRING)) {
508 // TODO why multiple strings in schema?
509 text = makeNotesString(state, reader, text, next);
510 } else {
511 handleUnexpectedStartElement(next.asStartElement());
512 }
513 } else {
514 handleUnexpectedElement(next);
515 }
516 }
517 throw new IllegalStateException("<Notes> has no closing tag");
518 }
519
520 /**
521 * @param state
522 * @param reader
523 * @param text
524 * @param next
525 * @return
526 * @throws XMLStreamException
527 */
528 private String makeNotesString(MarkupImportState state, XMLEventReader reader, String text, XMLEvent next) throws XMLStreamException {
529 Map<String, String> stringMap = handleString(state, reader, next, null);
530 if (stringMap.size() == 0){
531 String message = "No text available in <notes>";
532 fireWarningEvent(message, next, 4);
533 }else if (stringMap.size() > 1){
534 String message = "Subheadings not yet supported in <notes>";
535 fireWarningEvent(message, next, 4);
536 }else{
537 String firstSubheading = stringMap.keySet().iterator().next();
538 if ( firstSubheading != null && ! isNoteHeading (firstSubheading) ) {
539 String message = "Subheadings not yet supported in <notes>";
540 fireWarningEvent(message, next, 4);
541 }
542 }
543 for (String subheading : stringMap.keySet()){
544 text += subheading;
545 text += stringMap.get(subheading);
546 }
547 return text;
548 }
549
550 private boolean isNoteHeading(String heading) {
551 String excludePattern = "(i?)(Notes?):?";
552 return heading.matches(excludePattern);
553 }
554
555 /**
556 * @param state
557 * @param attributes
558 */
559 private Taxon createTaxonAndName(MarkupImportState state,
560 Map<String, Attribute> attributes) {
561 NonViralName<?> name;
562 Rank rank = null; //Rank.SPECIES(); // default
563 boolean isCultivar = checkAndRemoveAttributeValue(attributes, CLASS, "cultivated");
564 if (isCultivar) {
565 name = CultivarPlantName.NewInstance(rank);
566 } else {
567 name = createNameByCode(state, rank);
568 }
569 Taxon taxon = Taxon.NewInstance(name, state.getConfig().getSourceReference());
570 if (checkAndRemoveAttributeValue(attributes, CLASS, "dubious")) {
571 taxon.setDoubtful(true);
572 } else if (checkAndRemoveAttributeValue(attributes, CLASS, "excluded")) {
573 taxon.setExcluded(true);
574 }
575 // TODO insufficient, new, expected
576 handleNotYetImplementedAttribute(attributes, CLASS);
577 // From old version
578 // MarkerType markerType = getMarkerType(state, attrValue);
579 // if (markerType == null){
580 // logger.warn("Class attribute value for taxon not yet supported: " +
581 // attrValue);
582 // }else{
583 // taxon.addMarker(Marker.NewInstance(markerType, true));
584 // }
585
586 // save(name, state);
587 // save(taxon, state);
588 return taxon;
589 }
590
591 private String handleTaxonTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
592 //attributes
593 String text = "";
594 Map<String, Attribute> attributes = getAttributes(parentEvent);
595 String rankAttr = getAndRemoveAttributeValue(attributes, RANK);
596 Rank rank = makeRank(state, rankAttr, false);
597 String num = getAndRemoveAttributeValue(attributes, NUM);
598 state.setCurrentTaxonNum(num);
599 checkNoAttributes(attributes, parentEvent);
600
601 // TODO handle attributes
602 while (reader.hasNext()) {
603 XMLEvent next = readNoWhitespace(reader);
604 if (next.isEndElement()) {
605 if (isMyEndingElement(next, parentEvent)) {
606 Taxon taxon = state.getCurrentTaxon();
607 String titleText = null;
608 if (checkMandatoryText(text, parentEvent)) {
609 titleText = normalize(text);
610 UUID uuidTitle = MarkupTransformer.uuidTaxonTitle;
611 ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "Taxon Title ","taxon title", "title");
612 taxon.addExtension(titleText, titleExtension);
613 }
614 taxon.getName().setRank(rank);
615 // TODO check title exists
616 return titleText;
617 } else {
618 if (isEndingElement(next, FOOTNOTE)) {
619 // NOT YET IMPLEMENTED
620 popUnimplemented(next.asEndElement());
621 } else {
622 handleUnexpectedEndElement(next.asEndElement());
623 state.setUnsuccessfull();
624 }
625 }
626 } else if (next.isStartElement()) {
627 if (isStartingElement(next, FOOTNOTE)) {
628 handleNotYetImplementedElement(next);
629 }else if (isStartingElement(next, FOOTNOTE_REF)) {
630 handleNotYetImplementedElement(next);
631 } else {
632 handleUnexpectedStartElement(next);
633 state.setUnsuccessfull();
634 }
635 } else if (next.isCharacters()) {
636 text += next.asCharacters().getData();
637
638 } else {
639 handleUnexpectedElement(next);
640 state.setUnsuccessfull();
641 }
642 }
643 return null;
644
645 }
646
647 private WriterDataHolder handleWriter(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
648 String text = "";
649 checkNoAttributes(parentEvent);
650 WriterDataHolder dataHolder = new WriterDataHolder();
651 List<FootnoteDataHolder> footnotes = new ArrayList<FootnoteDataHolder>();
652
653 // TODO handle attributes
654 while (reader.hasNext()) {
655 XMLEvent next = readNoWhitespace(reader);
656 if (isMyEndingElement(next, parentEvent)) {
657 text = CdmUtils.removeBrackets(text);
658 if (checkMandatoryText(text, parentEvent)) {
659 text = normalize(text);
660 dataHolder.writer = text;
661 dataHolder.footnotes = footnotes;
662
663 // Extension
664 UUID uuidWriterExtension = MarkupTransformer.uuidWriterExtension;
665 ExtensionType writerExtensionType = this
666 .getExtensionType(state, uuidWriterExtension,
667 "Writer", "writer", "writer");
668 Extension extension = Extension.NewInstance();
669 extension.setType(writerExtensionType);
670 extension.setValue(text);
671 dataHolder.extension = extension;
672
673 // Annotation
674 UUID uuidWriterAnnotation = MarkupTransformer.uuidWriterAnnotation;
675 AnnotationType writerAnnotationType = this.getAnnotationType(state, uuidWriterAnnotation, "Writer", "writer", "writer", null);
676 Annotation annotation = Annotation.NewInstance(text, writerAnnotationType, getDefaultLanguage(state));
677 dataHolder.annotation = annotation;
678
679 return dataHolder;
680 } else {
681 return null;
682 }
683 } else if (isStartingElement(next, FOOTNOTE_REF)) {
684 FootnoteDataHolder footNote = handleFootnoteRef(state, reader, next);
685 if (footNote.isRef()) {
686 footnotes.add(footNote);
687 } else {
688 logger.warn("Non ref footnotes not yet impelemnted");
689 }
690 } else if (next.isCharacters()) {
691 text += next.asCharacters().getData();
692
693 } else {
694 handleUnexpectedElement(next);
695 state.setUnsuccessfull();
696 }
697 }
698 throw new IllegalStateException("<writer> has no end tag");
699 }
700
701 private void registerFootnotes(MarkupImportState state, AnnotatableEntity entity, List<FootnoteDataHolder> footnotes) {
702 for (FootnoteDataHolder footNote : footnotes) {
703 registerFootnoteDemand(state, entity, footNote);
704 }
705 }
706
707 private void registerGivenFootnote(MarkupImportState state, FootnoteDataHolder footnote) {
708 state.registerFootnote(footnote);
709 Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.id);
710 if (demands != null) {
711 for (AnnotatableEntity entity : demands) {
712 attachFootnote(state, entity, footnote);
713 }
714 }
715 }
716
717 private void registerGivenFigure(MarkupImportState state, XMLEvent next, String id, Media figure) {
718 state.registerFigure(id, figure);
719 Set<AnnotatableEntity> demands = state.getFigureDemands(id);
720 if (demands != null) {
721 for (AnnotatableEntity entity : demands) {
722 attachFigure(state, next, entity, figure);
723 }
724 }
725 save(figure, state);
726 }
727
728 private void registerFootnoteDemand(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
729 FootnoteDataHolder existingFootnote = state.getFootnote(footnote.ref);
730 if (existingFootnote != null) {
731 attachFootnote(state, entity, existingFootnote);
732 } else {
733 Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.ref);
734 if (demands == null) {
735 demands = new HashSet<AnnotatableEntity>();
736 state.putFootnoteDemands(footnote.ref, demands);
737 }
738 demands.add(entity);
739 }
740 }
741
742 private void registerFigureDemand(MarkupImportState state, XMLEvent next, AnnotatableEntity entity, String figureRef) {
743 Media existingFigure = state.getFigure(figureRef);
744 if (existingFigure != null) {
745 attachFigure(state, next, entity, existingFigure);
746 } else {
747 Set<AnnotatableEntity> demands = state.getFigureDemands(figureRef);
748 if (demands == null) {
749 demands = new HashSet<AnnotatableEntity>();
750 state.putFigureDemands(figureRef, demands);
751 }
752 demands.add(entity);
753 }
754 }
755
756 private void attachFootnote(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
757 AnnotationType annotationType = this.getAnnotationType(state, MarkupTransformer.uuidFootnote, "Footnote", "An e-flora footnote", "fn", null);
758 Annotation annotation = Annotation.NewInstance(footnote.string, annotationType, getDefaultLanguage(state));
759 // TODO transient objects
760 entity.addAnnotation(annotation);
761 save(entity, state);
762 }
763
764 private void attachFigure(MarkupImportState state, XMLEvent next, AnnotatableEntity entity, Media figure) {
765 // IdentifiableEntity<?> toSave;
766 if (entity.isInstanceOf(TextData.class)) {
767 TextData deb = CdmBase.deproxy(entity, TextData.class);
768 deb.addMedia(figure);
769 // toSave = ((TaxonDescription)deb.getInDescription()).getTaxon();
770 } else if (entity.isInstanceOf(SpecimenOrObservationBase.class)) {
771 String message = "figures for specimen should be handled as Textdata";
772 fireWarningEvent(message, next, 4);
773 // toSave = ime;
774 } else if (entity.isInstanceOf(IdentifiableMediaEntity.class)) {
775 IdentifiableMediaEntity<?> ime = CdmBase.deproxy(entity, IdentifiableMediaEntity.class);
776 ime.addMedia(figure);
777 // toSave = ime;
778 } else {
779 String message = "Unsupported entity to attach media: %s";
780 message = String.format(message, entity.getClass().getName());
781 // toSave = null;
782 }
783 save(entity, state);
784 }
785
786 private Media handleFigure(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
787 // FigureDataHolder result = new FigureDataHolder();
788
789 Map<String, Attribute> attributes = getAttributes(parentEvent);
790 String id = getAndRemoveAttributeValue(attributes, ID);
791 String type = getAndRemoveAttributeValue(attributes, TYPE);
792 String urlAttr = getAndRemoveAttributeValue(attributes, URL);
793 checkNoAttributes(attributes, parentEvent);
794
795 String urlString = null;
796 String legendString = null;
797 String titleString = null;
798 String numString = null;
799 String text = null;
800 if (isNotBlank(urlAttr)){
801 urlString = CdmUtils.Nz(state.getBaseMediaUrl()) + urlAttr;
802 }
803 while (reader.hasNext()) {
804 XMLEvent next = readNoWhitespace(reader);
805 if (isMyEndingElement(next, parentEvent)) {
806 if (isNotBlank(text)){
807 fireWarningEvent("Text not yet handled for figures: " + text, next, 4);
808 }
809 Media media = makeFigure(state, id, type, urlString, legendString, titleString, numString, next);
810 return media;
811 } else if (isStartingElement(next, FIGURE_LEGEND)) {
812 // TODO same as figure string ?
813 legendString = handleFootnoteString(state, reader, next);
814 } else if (isStartingElement(next, FIGURE_TITLE)) {
815 titleString = getCData(state, reader, next);
816 } else if (isStartingElement(next, URL)) {
817 String localUrl = getCData(state, reader, next);
818 String url = CdmUtils.Nz(state.getBaseMediaUrl()) + localUrl;
819 if (isBlank(urlString)){
820 urlString = url;
821 }
822 if (! url.equals(urlString)){
823 String message = "URL attribute and URL element differ. Attribute: %s, Element: %s";
824 fireWarningEvent(String.format(message, urlString, url), next, 2);
825 }
826 } else if (isStartingElement(next, NUM)) {
827 numString = getCData(state, reader, next);
828 } else if (next.isCharacters()) {
829 text += CdmUtils.concat("", text, next.asCharacters().getData());
830 } else {
831 fireUnexpectedEvent(next, 0);
832 }
833 }
834 throw new IllegalStateException("<figure> has no end tag");
835 }
836
837 /**
838 * @param state
839 * @param id
840 * @param type
841 * @param urlString
842 * @param legendString
843 * @param titleString
844 * @param numString
845 * @param next
846 */
847 private Media makeFigure(MarkupImportState state, String id, String type, String urlString,
848 String legendString, String titleString, String numString, XMLEvent next) {
849 Media media = null;
850 boolean isFigure = false;
851 try {
852 //TODO maybe everything is a figure as it is all taken from a book
853 if ("lineart".equals(type)) {
854 isFigure = true;
855 // media = Figure.NewInstance(url.toURI(), null, null, null);
856 } else if (type == null || "photo".equals(type)
857 || "signature".equals(type)
858 || "others".equals(type)) {
859 //TODO
860 } else {
861 String message = "Unknown figure type '%s'";
862 message = String.format(message, type);
863 fireWarningEvent(message, next, 2);
864 }
865 media = docImport.getImageMedia(urlString, docImport.getReadMediaData(), isFigure);
866
867 if (media != null){
868 // title
869 if (StringUtils.isNotBlank(titleString)) {
870 media.putTitle(getDefaultLanguage(state), titleString);
871 }
872 // legend
873 if (StringUtils.isNotBlank(legendString)) {
874 media.addDescription(legendString, getDefaultLanguage(state));
875 }
876 if (StringUtils.isNotBlank(numString)) {
877 // TODO use concrete source (e.g. DAPHNIPHYLLACEAE in FM
878 // vol.13)
879 Reference<?> citation = state.getConfig().getSourceReference();
880 media.addSource(numString, "num", citation, null);
881 // TODO name used in source if available
882 }
883 // TODO which citation
884 if (StringUtils.isNotBlank(id)) {
885 media.addSource(id, null, state.getConfig().getSourceReference(), null);
886 } else {
887 String message = "Figure id should never be empty or null";
888 fireWarningEvent(message, next, 6);
889 }
890
891 // text
892 // do nothing
893 registerGivenFigure(state, next, id, media);
894
895 }else{
896 String message = "No media found: ";
897 fireWarningEvent(message, next, 4);
898 }
899 } catch (MalformedURLException e) {
900 String message = "Media uri has incorrect syntax: %s";
901 message = String.format(message, urlString);
902 fireWarningEvent(message, next, 4);
903 // } catch (URISyntaxException e) {
904 // String message = "Media uri has incorrect syntax: %s";
905 // message = String.format(message, urlString);
906 // fireWarningEvent(message, next, 4);
907 }
908
909 return media;
910 }
911
912 private FigureDataHolder handleFigureRef(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent)
913 throws XMLStreamException {
914 FigureDataHolder result = new FigureDataHolder();
915 Map<String, Attribute> attributes = getAttributes(parentEvent);
916 result.ref = getAndRemoveAttributeValue(attributes, REF);
917 checkNoAttributes(attributes, parentEvent);
918
919 // text is not handled, needed only for debugging purposes
920 String text = "";
921 while (reader.hasNext()) {
922 XMLEvent next = readNoWhitespace(reader);
923 if (isMyEndingElement(next, parentEvent)) {
924 return result;
925 } else if (isStartingElement(next, NUM)) {
926 String num = getCData(state, reader, next);
927 result.num = num; // num is not handled during import
928 } else if (isStartingElement(next, FIGURE_PART)) {
929 result.figurePart = getCData(state, reader, next);
930 } else if (next.isCharacters()) {
931 text += next.asCharacters().getData();
932 } else {
933 fireUnexpectedEvent(next, 0);
934 }
935 }
936 throw new IllegalStateException("<figureRef> has no end tag");
937 }
938
939 private FootnoteDataHolder handleFootnote(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
940 FootnoteDataHolder result = new FootnoteDataHolder();
941 Map<String, Attribute> attributes = getAttributes(parentEvent);
942 result.id = getAndRemoveAttributeValue(attributes, ID);
943 // result.ref = getAndRemoveAttributeValue(attributes, REF);
944 checkNoAttributes(attributes, parentEvent);
945
946 while (reader.hasNext()) {
947 XMLEvent next = readNoWhitespace(reader);
948 if (isStartingElement(next, FOOTNOTE_STRING)) {
949 String string = handleFootnoteString(state, reader, next);
950 result.string = string;
951 } else if (isMyEndingElement(next, parentEvent)) {
952 return result;
953 } else {
954 fireUnexpectedEvent(next, 0);
955 }
956 }
957 return result;
958 }
959
960 private FootnoteDataHolder handleFootnoteRef(MarkupImportState state,
961 XMLEventReader reader, XMLEvent parentEvent)
962 throws XMLStreamException {
963 FootnoteDataHolder result = new FootnoteDataHolder();
964 Map<String, Attribute> attributes = getAttributes(parentEvent);
965 result.ref = getAndRemoveAttributeValue(attributes, REF);
966 checkNoAttributes(attributes, parentEvent);
967
968 // text is not handled, needed only for debugging purposes
969 String text = "";
970 while (reader.hasNext()) {
971 XMLEvent next = readNoWhitespace(reader);
972 // if (isStartingElement(next, FOOTNOTE_STRING)){
973 // String string = handleFootnoteString(state, reader, next);
974 // result.string = string;
975 // }else
976 if (isMyEndingElement(next, parentEvent)) {
977 return result;
978 } else if (next.isCharacters()) {
979 text += next.asCharacters().getData();
980
981 } else {
982 fireUnexpectedEvent(next, 0);
983 }
984 }
985 return result;
986 }
987
988
989
990 private String handleFootnoteString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
991 boolean isTextMode = true;
992 String text = "";
993 while (reader.hasNext()) {
994 XMLEvent next = readNoWhitespace(reader);
995 if (isMyEndingElement(next, parentEvent)) {
996 return text;
997 } else if (next.isEndElement()) {
998 if (isEndingElement(next, FULL_NAME)) {
999 popUnimplemented(next.asEndElement());
1000 } else if (isEndingElement(next, BR)) {
1001 isTextMode = true;
1002 } else if (isHtml(next)) {
1003 text += getXmlTag(next);
1004 } else {
1005 handleUnexpectedEndElement(next.asEndElement());
1006 }
1007 } else if (next.isStartElement()) {
1008 if (isStartingElement(next, FULL_NAME)) {
1009 handleNotYetImplementedElement(next);
1010 } else if (isStartingElement(next, GATHERING)) {
1011 text += specimenImport.handleInLineGathering(state, reader, next);
1012 } else if (isStartingElement(next, REFERENCES)) {
1013 text += " " + handleInLineReferences(state, reader, next)+ " ";
1014 } else if (isStartingElement(next, BR)) {
1015 text += "<br/>";
1016 isTextMode = false;
1017 } else if (isStartingElement(next, NOMENCLATURE)) {
1018 handleNotYetImplementedElement(next);
1019 } else if (isHtml(next)) {
1020 text += getXmlTag(next);
1021 } else {
1022 handleUnexpectedStartElement(next.asStartElement());
1023 }
1024 } else if (next.isCharacters()) {
1025 if (!isTextMode) {
1026 String message = "footnoteString is not in text mode";
1027 fireWarningEvent(message, next, 6);
1028 } else {
1029 text += next.asCharacters().getData().trim();
1030 // getCData(state, reader, next); does not work as we have inner tags like <references>
1031 }
1032 } else {
1033 handleUnexpectedEndElement(next.asEndElement());
1034 }
1035 }
1036 throw new IllegalStateException("<footnoteString> has no closing tag");
1037
1038 }
1039
1040 private String handleInLineReferences(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1041 checkNoAttributes(parentEvent);
1042
1043 boolean hasReference = false;
1044 String text = "";
1045 while (reader.hasNext()) {
1046 XMLEvent next = readNoWhitespace(reader);
1047 if (isMyEndingElement(next, parentEvent)) {
1048 checkMandatoryElement(hasReference, parentEvent.asStartElement(), REFERENCE);
1049 return text;
1050 } else if (isStartingElement(next, REFERENCE)) {
1051 text += handleInLineReference(state, reader, next);
1052 hasReference = true;
1053 } else {
1054 handleUnexpectedElement(next);
1055 }
1056 }
1057 throw new IllegalStateException("<References> has no closing tag");
1058 }
1059
1060 private String handleInLineReference(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1061 Reference<?> reference = nomenclatureImport.handleReference(state, reader, parentEvent);
1062 String result = "<cdm:ref uuid='%s'>%s</ref>";
1063 result = String.format(result, reference.getUuid(), reference.getTitleCache());
1064 save(reference, state);
1065 return result;
1066 }
1067
1068 private void handleFeature(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1069 Map<String, Attribute> attrs = getAttributes(parentEvent);
1070 Boolean isFreetext = getAndRemoveBooleanAttributeValue(parentEvent, attrs, IS_FREETEXT, false);
1071 String classValue =getAndRemoveRequiredAttributeValue(parentEvent, attrs, CLASS);
1072 checkNoAttributes(attrs, parentEvent);
1073
1074
1075 Feature feature = makeFeature(classValue, state, parentEvent, null);
1076 Taxon taxon = state.getCurrentTaxon();
1077 TaxonDescription taxonDescription = getTaxonDescription(taxon, state.getConfig().getSourceReference(), NO_IMAGE_GALLERY, CREATE_NEW);
1078 // TextData figureHolderTextData = null; //for use with one TextData for
1079 // all figure only
1080
1081 boolean isDescription = feature.equals(Feature.DESCRIPTION());
1082 DescriptionElementBase lastDescriptionElement = null;
1083
1084 while (reader.hasNext()) {
1085 XMLEvent next = readNoWhitespace(reader);
1086 if (isMyEndingElement(next, parentEvent)) {
1087 state.putFeatureToGeneralSorterList(feature);
1088 return;
1089 } else if (isEndingElement(next, DISTRIBUTION_LIST) || isEndingElement(next, HABITAT_LIST)) {
1090 // only handle list elements
1091 } else if (isStartingElement(next, HEADING)) {
1092 makeFeatureHeading(state, reader, classValue, feature, next);
1093 } else if (isStartingElement(next, WRITER)) {
1094 makeFeatureWriter(state, reader, feature, taxon, next);
1095 // } else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
1096 // if (!feature.equals(Feature.DISTRIBUTION())) {
1097 // String message = "Distribution locality only allowed for feature of type 'distribution'";
1098 // fireWarningEvent(message, next, 4);
1099 // }
1100 // handleDistributionLocality(state, reader, next);
1101 } else if (isStartingElement(next, DISTRIBUTION_LIST) || isStartingElement(next, HABITAT_LIST)) {
1102 // only handle single list elements
1103 } else if (isStartingElement(next, HABITAT)) {
1104 if (!(feature.equals(Feature.HABITAT())
1105 || feature.equals(Feature.HABITAT_ECOLOGY())
1106 || feature.equals(Feature.ECOLOGY()))) {
1107 String message = "Habitat only allowed for feature of type 'habitat','habitat ecology' or 'ecology'";
1108 fireWarningEvent(message, next, 4);
1109 }
1110 handleHabitat(state, reader, next);
1111 } else if (isStartingElement(next, CHAR)) {
1112 List<TextData> textDataList = handleChar(state, reader, next, null);
1113 for (TextData textData : textDataList){
1114 taxonDescription.addElement(textData);
1115 }
1116 } else if (isStartingElement(next, STRING)) {
1117 lastDescriptionElement = makeFeatureString(state, reader,feature, taxonDescription, lastDescriptionElement,next, isFreetext);
1118 } else if (isStartingElement(next, FIGURE_REF)) {
1119 lastDescriptionElement = makeFeatureFigureRef(state, reader, taxonDescription, isDescription, lastDescriptionElement, next);
1120 } else if (isStartingElement(next, REFERENCES)) {
1121 // TODO details/microcitation ??
1122
1123 List<Reference<?>> refs = handleReferences(state, reader, next);
1124 if (!refs.isEmpty()) {
1125 // TODO
1126 Reference<?> descriptionRef = state.getConfig().getSourceReference();
1127 TaxonDescription description = getTaxonDescription(taxon, descriptionRef, false, true);
1128 TextData featurePlaceholder = docImport.getFeaturePlaceholder(state, description, feature, true);
1129 for (Reference<?> citation : refs) {
1130 featurePlaceholder.addSource(null, null, citation, null);
1131 }
1132 } else {
1133 String message = "No reference found in references";
1134 fireWarningEvent(message, next, 6);
1135 }
1136 } else if (isStartingElement(next, NUM)) {
1137 //TODO
1138 handleNotYetImplementedElement(next);
1139 } else if (isEndingElement(next, NUM)) {
1140 //TODO
1141 popUnimplemented(next.asEndElement());
1142 } else {
1143 handleUnexpectedElement(next);
1144 }
1145 }
1146 throw new IllegalStateException("<Feature> has no closing tag");
1147 }
1148
1149 /**
1150 * @param state
1151 * @param reader
1152 * @param taxonDescription
1153 * @param isDescription
1154 * @param lastDescriptionElement
1155 * @param next
1156 * @return
1157 * @throws XMLStreamException
1158 */
1159 private DescriptionElementBase makeFeatureFigureRef(MarkupImportState state, XMLEventReader reader,TaxonDescription taxonDescription,
1160 boolean isDescription, DescriptionElementBase lastDescriptionElement, XMLEvent next) throws XMLStreamException {
1161 FigureDataHolder figureHolder = handleFigureRef(state, reader, next);
1162 Feature figureFeature = getFeature(state, MarkupTransformer.uuidFigures, "Figures", "Figures", "Fig.",null);
1163 if (isDescription) {
1164 TextData figureHolderTextData = null;
1165 // if (figureHolderTextData == null){
1166 figureHolderTextData = TextData.NewInstance(figureFeature);
1167 if (StringUtils.isNotBlank(figureHolder.num)) {
1168 String annotationText = "<num>" + figureHolder.num.trim() + "</num>";
1169 Annotation annotation = Annotation.NewInstance(annotationText, AnnotationType.TECHNICAL(), getDefaultLanguage(state));
1170 figureHolderTextData.addAnnotation(annotation);
1171 }
1172 if (StringUtils.isNotBlank(figureHolder.figurePart)) {
1173 String annotationText = "<figurePart>"+ figureHolder.figurePart.trim() + "</figurePart>";
1174 Annotation annotation = Annotation.NewInstance(annotationText,AnnotationType.EDITORIAL(), getDefaultLanguage(state));
1175 figureHolderTextData.addAnnotation(annotation);
1176 }
1177 // if (StringUtils.isNotBlank(figureText)){
1178 // figureHolderTextData.putText(language, figureText);
1179 // }
1180 taxonDescription.addElement(figureHolderTextData);
1181 // }
1182 registerFigureDemand(state, next, figureHolderTextData, figureHolder.ref);
1183 } else {
1184 if (lastDescriptionElement == null) {
1185 String message = "No description element created yet that can be referred by figure. Create new TextData instead";
1186 fireWarningEvent(message, next, 4);
1187 lastDescriptionElement = TextData.NewInstance(figureFeature);
1188 taxonDescription.addElement(lastDescriptionElement);
1189 }
1190 registerFigureDemand(state, next, lastDescriptionElement, figureHolder.ref);
1191 }
1192 return lastDescriptionElement;
1193 }
1194
1195 /**
1196 * @param state
1197 * @param reader
1198 * @param feature
1199 * @param taxonDescription
1200 * @param lastDescriptionElement
1201 * @param distributionList
1202 * @param next
1203 * @return
1204 * @throws XMLStreamException
1205 */
1206 private DescriptionElementBase makeFeatureString(MarkupImportState state,XMLEventReader reader, Feature feature,
1207 TaxonDescription taxonDescription, DescriptionElementBase lastDescriptionElement, XMLEvent next, Boolean isFreetext) throws XMLStreamException {
1208
1209 //for specimen only
1210 if (feature.equals(Feature.SPECIMEN()) || feature.equals(Feature.MATERIALS_EXAMINED())){
1211
1212 List<DescriptionElementBase> specimens = specimenImport.handleMaterialsExamined(state, reader, next, feature);
1213 for (DescriptionElementBase specimen : specimens){
1214 taxonDescription.addElement(specimen);
1215 lastDescriptionElement = specimen;
1216 }
1217 state.setCurrentCollector(null);
1218
1219 return lastDescriptionElement;
1220 }else{
1221
1222 //others
1223 Map<String, String> subheadingMap = handleString(state, reader, next, feature);
1224 for (String subheading : subheadingMap.keySet()) {
1225 Feature subheadingFeature = feature;
1226 if (StringUtils.isNotBlank(subheading) && subheadingMap.size() > 1) {
1227 subheadingFeature = makeFeature(subheading, state, next, null);
1228 }
1229 if (feature.equals(Feature.COMMON_NAME()) && (isFreetext == null || !isFreetext)){
1230 List<DescriptionElementBase> commonNames = makeVernacular(state, subheading, subheadingMap.get(subheading));
1231 for (DescriptionElementBase commonName : commonNames){
1232 taxonDescription.addElement(commonName);
1233 lastDescriptionElement = commonName;
1234 }
1235 }else {
1236 TextData textData = TextData.NewInstance(subheadingFeature);
1237 textData.putText(getDefaultLanguage(state), subheadingMap.get(subheading));
1238 taxonDescription.addElement(textData);
1239 lastDescriptionElement = textData;
1240 // TODO how to handle figures when these data are split in
1241 // subheadings
1242 }
1243 }
1244 return lastDescriptionElement;
1245 }
1246 }
1247
1248 private List<DescriptionElementBase> makeVernacular(MarkupImportState state, String subheading, String commonNameString) throws XMLStreamException {
1249 List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
1250 String[] splits = commonNameString.split(",");
1251 for (String split : splits){
1252 split = split.trim();
1253 if (! split.matches(".*\\(.*\\)\\.?")){
1254 fireWarningEvent("Common name string '"+split+"' does not match given pattern", state.getReader().peek(), 4);
1255 }
1256
1257 String name = split.replaceAll("\\(.*\\)", "").replace(".", "").trim();
1258 String languageStr = split.replaceFirst(".*\\(", "").replaceAll("\\)\\.?", "").trim();
1259
1260 Language language = null;
1261 if (StringUtils.isNotBlank(languageStr)){
1262 try {
1263 UUID langUuid = state.getTransformer().getLanguageUuid(languageStr);
1264 TermVocabulary<?> voc = null;
1265 language = getLanguage(state, langUuid, languageStr, languageStr, null, voc);
1266 if (language == null){
1267 logger.warn("Language " + languageStr + " not recognized by transformer");
1268 }
1269 } catch (UndefinedTransformerMethodException e) {
1270 throw new RuntimeException(e);
1271 }
1272 }
1273 NamedArea area = null;
1274 CommonTaxonName commonTaxonName = CommonTaxonName.NewInstance(name, language, area);
1275 result.add(commonTaxonName);
1276 }
1277
1278 return result;
1279 }
1280
1281 /**
1282 * @param state
1283 * @param reader
1284 * @param feature
1285 * @param taxon
1286 * @param next
1287 * @throws XMLStreamException
1288 */
1289 private void makeFeatureWriter(MarkupImportState state,XMLEventReader reader, Feature feature, Taxon taxon, XMLEvent next) throws XMLStreamException {
1290 WriterDataHolder writer = handleWriter(state, reader, next);
1291 if (isNotBlank(writer.writer)) {
1292 // TODO
1293 Reference<?> ref = state.getConfig().getSourceReference();
1294 TaxonDescription description = getTaxonDescription(taxon, ref,
1295 false, true);
1296 TextData featurePlaceholder = docImport.getFeaturePlaceholder(state,
1297 description, feature, true);
1298 featurePlaceholder.addAnnotation(writer.annotation);
1299 registerFootnotes(state, featurePlaceholder, writer.footnotes);
1300 } else {
1301 String message = "Writer element is empty";
1302 fireWarningEvent(message, next, 4);
1303 }
1304 }
1305
1306 /**
1307 * @param state
1308 * @param reader
1309 * @param classValue
1310 * @param feature
1311 * @param next
1312 * @throws XMLStreamException
1313 */
1314 private void makeFeatureHeading(MarkupImportState state, XMLEventReader reader, String classValue, Feature feature, XMLEvent next) throws XMLStreamException {
1315 String heading = handleHeading(state, reader, next);
1316 if (StringUtils.isNotBlank(heading)) {
1317 if (!heading.equalsIgnoreCase(classValue)) {
1318 try {
1319 if (!feature.equals(state.getTransformer().getFeatureByKey(
1320 heading))) {
1321 UUID headerFeatureUuid = state.getTransformer()
1322 .getFeatureUuid(heading);
1323 if (!feature.getUuid().equals(headerFeatureUuid)) {
1324 String message = "Feature heading '%s' differs from feature class '%s' and can not be transformed to feature";
1325 message = String.format(message, heading,
1326 classValue);
1327 fireWarningEvent(message, next, 1);
1328 }
1329 }
1330 } catch (UndefinedTransformerMethodException e) {
1331 throw new RuntimeException(e);
1332 }
1333 } else {
1334 // do nothing
1335 }
1336 }
1337 }
1338
1339 private List<Reference<?>> handleReferences(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1340 // attributes
1341 Map<String, Attribute> attributes = getAttributes(parentEvent);
1342 String bibliography = getAndRemoveAttributeValue(attributes,
1343 BIBLIOGRAPHY);
1344 String serialsAbbreviations = getAndRemoveAttributeValue(attributes,
1345 SERIALS_ABBREVIATIONS);
1346 if (isNotBlank(bibliography) || isNotBlank(serialsAbbreviations)) {
1347 String message = "Attributes not yet implemented for <references>";
1348 fireWarningEvent(message, parentEvent, 4);
1349 }
1350
1351 List<Reference<?>> result = new ArrayList<Reference<?>>();
1352
1353 // elements
1354 while (reader.hasNext()) {
1355 XMLEvent next = readNoWhitespace(reader);
1356 if (next.isEndElement()) {
1357 if (isMyEndingElement(next, parentEvent)) {
1358 return result;
1359 } else {
1360 if (isEndingElement(next, HEADING)) {
1361 // NOT YET IMPLEMENTED
1362 popUnimplemented(next.asEndElement());
1363 } else if (isEndingElement(next, WRITER)) {
1364 // NOT YET IMPLEMENTED
1365 popUnimplemented(next.asEndElement());
1366 } else if (isEndingElement(next, FOOTNOTE)) {
1367 // NOT YET IMPLEMENTED
1368 popUnimplemented(next.asEndElement());
1369 } else if (isEndingElement(next, STRING)) {
1370 // NOT YET IMPLEMENTED
1371 popUnimplemented(next.asEndElement());
1372 } else if (isEndingElement(next, REF_NUM)) {
1373 // NOT YET IMPLEMENTED
1374 popUnimplemented(next.asEndElement());
1375 } else {
1376 handleUnexpectedEndElement(next.asEndElement());
1377 }
1378 }
1379 } else if (next.isStartElement()) {
1380 if (isStartingElement(next, HEADING)) {
1381 handleNotYetImplementedElement(next);
1382 } else if (isStartingElement(next, SUB_HEADING)) {
1383 String subheading = getCData(state, reader, next).trim();
1384 String excludePattern = "(i?)(References?|Literature):?";
1385 if (!subheading.matches(excludePattern)) {
1386 fireNotYetImplementedElement(next.getLocation(), next.asStartElement().getName(), 0);
1387 }
1388 } else if (isStartingElement(next, WRITER)) {
1389 handleNotYetImplementedElement(next);
1390 } else if (isStartingElement(next, FOOTNOTE)) {
1391 handleNotYetImplementedElement(next);
1392 } else if (isStartingElement(next, STRING)) {
1393 handleNotYetImplementedElement(next);
1394 } else if (isStartingElement(next, REF_NUM)) {
1395 handleNotYetImplementedElement(next);
1396 } else if (isStartingElement(next, REFERENCE)) {
1397 Reference<?> ref = nomenclatureImport.handleReference(state, reader, next);
1398 result.add(ref);
1399 } else {
1400 handleUnexpectedStartElement(next);
1401 }
1402 } else {
1403 handleUnexpectedElement(next);
1404 }
1405 }
1406 throw new IllegalStateException("<References> has no closing tag");
1407 }
1408
1409 private void handleHabitat(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1410 checkNoAttributes(parentEvent);
1411 Taxon taxon = state.getCurrentTaxon();
1412 // TODO which ref to take?
1413 Reference<?> ref = state.getConfig().getSourceReference();
1414
1415 String text = "";
1416 while (reader.hasNext()) {
1417 XMLEvent next = readNoWhitespace(reader);
1418 if (isMyEndingElement(next, parentEvent)) {
1419 TaxonDescription description = getTaxonDescription(taxon, ref,
1420 false, true);
1421 UUID uuidExtractedHabitat = MarkupTransformer.uuidExtractedHabitat;
1422 Feature feature = getFeature(
1423 state,
1424 uuidExtractedHabitat,
1425 "Extracted Habitat",
1426 "An structured habitat that was extracted from a habitat text",
1427 "extr. habit.", null);
1428 TextData habitat = TextData.NewInstance(feature);
1429 habitat.putText(getDefaultLanguage(state), text);
1430 description.addElement(habitat);
1431
1432 return;
1433 } else if (next.isStartElement()) {
1434 if (isStartingElement(next, ALTITUDE)) {
1435 text = text.trim() + getTaggedCData(state, reader, next);
1436 } else if (isStartingElement(next, LIFE_CYCLE_PERIODS)) {
1437 handleNotYetImplementedElement(next);
1438 } else {
1439 handleUnexpectedStartElement(next.asStartElement());
1440 }
1441 } else if (next.isCharacters()) {
1442 text += next.asCharacters().getData();
1443 } else {
1444 handleUnexpectedElement(next);
1445 }
1446 }
1447 throw new IllegalStateException("<Habitat> has no closing tag");
1448 }
1449
1450 private String getTaggedCData(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1451 checkNoAttributes(parentEvent);
1452
1453 String text = getXmlTag(parentEvent);
1454 while (reader.hasNext()) {
1455 XMLEvent next = readNoWhitespace(reader);
1456 if (isMyEndingElement(next, parentEvent)) {
1457 text += getXmlTag(next);
1458 return text;
1459 } else if (next.isStartElement()) {
1460 text += getTaggedCData(state, reader, next);
1461 } else if (next.isEndElement()) {
1462 text += getTaggedCData(state, reader, next);
1463 } else if (next.isCharacters()) {
1464 text += next.asCharacters().getData();
1465 } else {
1466 handleUnexpectedEndElement(next.asEndElement());
1467 }
1468 }
1469 throw new IllegalStateException("Some tag has no closing tag");
1470 }
1471
1472 private String handleDistributionLocality(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1473 Map<String, Attribute> attributes = getAttributes(parentEvent);
1474 String classValue = getAndRemoveRequiredAttributeValue(parentEvent, attributes, CLASS);
1475 String statusValue =getAndRemoveAttributeValue(attributes, STATUS);
1476 String frequencyValue =getAndRemoveAttributeValue(attributes, FREQUENCY);
1477
1478
1479 Taxon taxon = state.getCurrentTaxon();
1480 // TODO which ref to take?
1481 Reference<?> ref = state.getConfig().getSourceReference();
1482
1483 String text = "";
1484 while (reader.hasNext()) {
1485 XMLEvent next = readNoWhitespace(reader);
1486 if (isMyEndingElement(next, parentEvent)) {
1487 if (StringUtils.isNotBlank(text)) {
1488 String label = CdmUtils.removeTrailingDot(normalize(text));
1489 TaxonDescription description = getTaxonDescription(taxon, ref, false, true);
1490 NamedAreaLevel level = makeNamedAreaLevel(state,classValue, next);
1491
1492 //status
1493 PresenceAbsenceTermBase<?> status = null;
1494 if (isNotBlank(statusValue)){
1495 try {
1496 status = state.getTransformer().getPresenceTermByKey(statusValue);
1497 if (status == null){
1498 //TODO
1499 String message = "The presence/absence status '%s' could not be transformed to an CDM status";
1500 fireWarningEvent(String.format(message, statusValue), next, 4);
1501 }
1502 } catch (UndefinedTransformerMethodException e) {
1503 throw new RuntimeException(e);
1504 }
1505 }else{
1506 status = PresenceTerm.PRESENT();
1507 }
1508 //frequency
1509 if (isNotBlank(frequencyValue)){
1510 String message = "The frequency attribute is currently not yet available in CDM";
1511 fireWarningEvent(message, parentEvent, 6);
1512 }
1513
1514 NamedArea higherArea = null;
1515 List<NamedArea> areas = new ArrayList<NamedArea>();
1516
1517 String patSingleArea = "([^,\\(]{3,})";
1518 String patSeparator = "(,|\\sand\\s)";
1519 String hierarchiePattern = String.format("%s\\((%s(%s%s)*)\\)",patSingleArea, patSingleArea, patSeparator, patSingleArea);
1520 Pattern patHierarchie = Pattern.compile(hierarchiePattern, Pattern.CASE_INSENSITIVE);
1521 Matcher matcher = patHierarchie.matcher(label);
1522 if (matcher.matches()){
1523 String higherAreaStr = matcher.group(1).trim();
1524 higherArea = makeArea(state, higherAreaStr, level);
1525 String[] innerAreas = matcher.group(2).split(patSeparator);
1526 for (String innerArea : innerAreas){
1527 if (isNotBlank(innerArea)){
1528 NamedArea singleArea = makeArea(state, innerArea.trim(), level);
1529 areas.add(singleArea);
1530 NamedArea partOf = singleArea.getPartOf();
1531 // if (partOf == null){
1532 // singleArea.setPartOf(higherArea);
1533 // }
1534 }
1535 }
1536 }else{
1537 NamedArea singleArea = makeArea(state, label, level);
1538 areas.add(singleArea);
1539 }
1540
1541 for (NamedArea area : areas){
1542 //create distribution
1543 Distribution distribution = Distribution.NewInstance(area,status);
1544 description.addElement(distribution);
1545 }
1546 } else {
1547 String message = "Empty distribution locality";
1548 fireWarningEvent(message, next, 4);
1549 }
1550 return text;
1551 } else if (isStartingElement(next, COORDINATES)) {
1552 //TODO
1553 handleNotYetImplementedElement(next);
1554 } else if (isEndingElement(next, COORDINATES)) {
1555 //TODO
1556 popUnimplemented(next.asEndElement());
1557 } else if (next.isCharacters()) {
1558 text += next.asCharacters().getData();
1559 } else {
1560 handleUnexpectedElement(next);
1561 }
1562 }
1563 throw new IllegalStateException("<DistributionLocality> has no closing tag");
1564 }
1565
1566 private String handleHeading(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1567 checkNoAttributes(parentEvent);
1568
1569 String text = "";
1570 while (reader.hasNext()) {
1571 XMLEvent next = readNoWhitespace(reader);
1572 if (isMyEndingElement(next, parentEvent)) {
1573 return text;
1574 } else if (next.isStartElement()) {
1575 if (isStartingElement(next, FOOTNOTE)) {
1576 handleNotYetImplementedElement(next);
1577 } else {
1578 handleUnexpectedStartElement(next.asStartElement());
1579 }
1580 } else if (next.isCharacters()) {
1581 text += next.asCharacters().getData();
1582 } else {
1583 handleUnexpectedEndElement(next.asEndElement());
1584 }
1585 }
1586 throw new IllegalStateException("<String> has no closing tag");
1587
1588 }
1589
1590 /**
1591 * Handle string
1592 * @param state
1593 * @param reader
1594 * @param parentEvent
1595 * @param feature only needed for distributionLocalities
1596 * @return
1597 * @throws XMLStreamException
1598 */
1599 private Map<String, String> handleString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature feature)throws XMLStreamException {
1600 // attributes
1601 String classValue = getClassOnlyAttribute(parentEvent, false);
1602 if (StringUtils.isNotBlank(classValue)) {
1603 String message = "class attribute for <string> not yet implemented";
1604 fireWarningEvent(message, parentEvent, 2);
1605 }
1606
1607 // subheadings
1608 Map<String, String> subHeadingMap = new HashMap<String, String>();
1609 String currentSubheading = null;
1610
1611 boolean isTextMode = true;
1612 String text = "";
1613 while (reader.hasNext()) {
1614 XMLEvent next = readNoWhitespace(reader);
1615 if (isMyEndingElement(next, parentEvent)) {
1616 putCurrentSubheading(subHeadingMap, currentSubheading, text);
1617 return subHeadingMap;
1618 } else if (isStartingElement(next, BR)) {
1619 text += "<br/>";
1620 isTextMode = false;
1621 } else if (isEndingElement(next, BR)) {
1622 isTextMode = true;
1623 } else if (isHtml(next)) {
1624 text += getXmlTag(next);
1625 } else if (isStartingElement(next, SUB_HEADING)) {
1626 text = putCurrentSubheading(subHeadingMap,currentSubheading, text);
1627 // TODO footnotes
1628 currentSubheading = getCData(state, reader, next).trim();
1629 } else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
1630 if (feature != null && !feature.equals(Feature.DISTRIBUTION())) {
1631 String message = "Distribution locality only allowed for feature of type 'distribution'";
1632 fireWarningEvent(message, next, 4);
1633 }
1634 text += handleDistributionLocality(state, reader, next);
1635 } else if (next.isCharacters()) {
1636 if (! isTextMode) {
1637 String message = "String is not in text mode";
1638 fireWarningEvent(message, next, 6);
1639 } else {
1640 text += next.asCharacters().getData();
1641 }
1642 } else if (isStartingElement(next, HEADING)) {
1643 //TODO
1644 handleNotYetImplementedElement(next);
1645 } else if (isStartingElement(next, VERNACULAR_NAMES)) {
1646 //TODO
1647 handleNotYetImplementedElement(next);
1648 } else if (isEndingElement(next, HEADING)) {
1649 //TODO
1650 popUnimplemented(next.asEndElement());
1651 } else if (isStartingElement(next, QUOTE)) {
1652 //TODO
1653 handleNotYetImplementedElement(next);
1654 } else if (isEndingElement(next, QUOTE)) {
1655 //TODO
1656 popUnimplemented(next.asEndElement());
1657 } else if (isStartingElement(next, DEDICATION)) {
1658 //TODO
1659 handleNotYetImplementedElement(next);
1660 } else if (isEndingElement(next, DEDICATION)) {
1661 //TODO
1662 popUnimplemented(next.asEndElement());
1663 } else if (isStartingElement(next, TAXONTYPE)) {
1664 //TODO
1665 handleNotYetImplementedElement(next);
1666 } else if (isEndingElement(next, TAXONTYPE)) {
1667 //TODO
1668 popUnimplemented(next.asEndElement());
1669 } else if (isStartingElement(next, FULL_NAME)) {
1670 //TODO
1671 handleNotYetImplementedElement(next);
1672 } else if (isEndingElement(next, FULL_NAME)) {
1673 //TODO
1674 popUnimplemented(next.asEndElement());
1675 }else if (isStartingElement(next, REFERENCES)) {
1676 //TODO
1677 handleNotYetImplementedElement(next);
1678 } else if (isEndingElement(next, REFERENCES)) {
1679 //TODO
1680 popUnimplemented(next.asEndElement());
1681 } else if (isStartingElement(next, GATHERING)) {
1682 //TODO
1683 handleNotYetImplementedElement(next);
1684 } else if (isEndingElement(next, GATHERING)) {
1685 //TODO
1686 popUnimplemented(next.asEndElement());
1687 } else if (isStartingElement(next, ANNOTATION)) {
1688 //TODO //TODO test handleSimpleAnnotation
1689 handleNotYetImplementedElement(next);
1690 } else if (isEndingElement(next, ANNOTATION)) {
1691 //TODO
1692 popUnimplemented(next.asEndElement());
1693 } else if (isStartingElement(next, HABITAT)) {
1694 //TODO
1695 handleNotYetImplementedElement(next);
1696 } else if (isEndingElement(next, HABITAT)) {
1697 //TODO
1698 popUnimplemented(next.asEndElement());
1699 } else if (isStartingElement(next, FIGURE_REF)) {
1700 //TODO
1701 handleNotYetImplementedElement(next);
1702 } else if (isEndingElement(next, FIGURE_REF)) {
1703 //TODO
1704 popUnimplemented(next.asEndElement());
1705 } else if (isStartingElement(next, FIGURE)) {
1706 //TODO
1707 handleNotYetImplementedElement(next);
1708 } else if (isEndingElement(next, FIGURE)) {
1709 //TODO
1710 popUnimplemented(next.asEndElement());
1711 } else if (isStartingElement(next, FOOTNOTE_REF)) {
1712 //TODO
1713 handleNotYetImplementedElement(next);
1714 } else if (isEndingElement(next, FOOTNOTE_REF)) {
1715 //TODO
1716 popUnimplemented(next.asEndElement());
1717 } else if (isStartingElement(next, FOOTNOTE)) {
1718 //TODO
1719 handleNotYetImplementedElement(next);
1720 } else if (isEndingElement(next, FOOTNOTE)) {
1721 //TODO
1722 popUnimplemented(next.asEndElement());
1723 } else if (isStartingElement(next, WRITER)) {
1724 //TODO
1725 handleNotYetImplementedElement(next);
1726 } else if (isEndingElement(next, WRITER)) {
1727 //TODO
1728 popUnimplemented(next.asEndElement());
1729 } else if (isStartingElement(next, DATES)) {
1730 //TODO
1731 handleNotYetImplementedElement(next);
1732 } else if (isEndingElement(next, DATES)) {
1733 //TODO
1734 popUnimplemented(next.asEndElement());
1735 } else {
1736 handleUnexpectedElement(next);
1737 }
1738 }
1739 throw new IllegalStateException("<String> has no closing tag");
1740 }
1741
1742 /**
1743 * @param subHeadingMap
1744 * @param currentSubheading
1745 * @param text
1746 * @return
1747 */
1748 private String putCurrentSubheading(Map<String, String> subHeadingMap, String currentSubheading, String text) {
1749 if (StringUtils.isNotBlank(text)) {
1750 text = removeStartingMinus(text);
1751 subHeadingMap.put(currentSubheading, text.trim());
1752 }
1753 return "";
1754 }
1755
1756 private String removeStartingMinus(String string) {
1757 string = replaceStart(string, "-");
1758 string = replaceStart(string, "\u002d");
1759 string = replaceStart(string, "\u2013");
1760 string = replaceStart(string, "\u2014");
1761 string = replaceStart(string, "--");
1762 return string;
1763 }
1764
1765 /**
1766 * @param value
1767 * @param replacementString
1768 */
1769 private String replaceStart(String value, String replacementString) {
1770 if (value.startsWith(replacementString) ){
1771 value = value.substring(replacementString.length()).trim();
1772 }
1773 while (value.startsWith("-") || value.startsWith("\u2014") ){
1774 value = value.substring("-".length()).trim();
1775 }
1776 return value;
1777 }
1778
1779 private String getXmlTag(XMLEvent event) {
1780 String result;
1781 if (event.isStartElement()) {
1782 result = "<" + event.asStartElement().getName().getLocalPart()
1783 + ">";
1784 } else if (event.isEndElement()) {
1785 result = "</" + event.asEndElement().getName().getLocalPart() + ">";
1786 } else {
1787 String message = "Only start or end elements are allowed as Html tags";
1788 throw new IllegalStateException(message);
1789 }
1790 return result;
1791 }
1792
1793 protected static final List<String> htmlList = Arrays.asList("sub", "sup",
1794 "ol", "ul", "li", "i", "b", "table", "br","tr","td");
1795
1796 private boolean isHtml(XMLEvent event) {
1797 if (event.isStartElement()) {
1798 String tag = event.asStartElement().getName().getLocalPart();
1799 return htmlList.contains(tag);
1800 } else if (event.isEndElement()) {
1801 String tag = event.asEndElement().getName().getLocalPart();
1802 return htmlList.contains(tag);
1803 } else {
1804 return false;
1805 }
1806
1807 }
1808
1809 /**
1810 * Handle the char or subchar element. As
1811 * @param state the import state
1812 * @param reader
1813 * @param parentEvent
1814 * @param parentFeature in case of subchars we need to attache the newly created feature to a parent feature, should be <code>null</code>
1815 * for top level chars.
1816 * @return List of TextData. Not a single one as the recursive TextData will also be returned
1817 * @throws XMLStreamException
1818 */
1819 private List<TextData> handleChar(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature parentFeature) throws XMLStreamException {
1820 List<TextData> result = new ArrayList<TextData>();
1821 String classValue = getClassOnlyAttribute(parentEvent);
1822 Feature feature = makeFeature(classValue, state, parentEvent, parentFeature);
1823
1824 boolean isTextMode = true;
1825 String text = "";
1826 while (reader.hasNext()) {
1827 XMLEvent next = readNoWhitespace(reader);
1828 if (isMyEndingElement(next, parentEvent)) {
1829 state.putFeatureToCharSorterList(feature);
1830 TextData textData = TextData.NewInstance(feature);
1831 textData.putText(getDefaultLanguage(state), text);
1832 result.add(textData);
1833 return result;
1834 } else if (isStartingElement(next, FIGURE_REF)) {
1835 //TODO
1836 handleNotYetImplementedElement(next);
1837 } else if (isStartingElement(next, FOOTNOTE_REF)) {
1838 //TODO
1839 handleNotYetImplementedElement(next);
1840 } else if (isStartingElement(next, BR)) {
1841 text += "<br/>";
1842 isTextMode = false;
1843 } else if (isEndingElement(next, BR)) {
1844 isTextMode = true;
1845 } else if (isHtml(next)) {
1846 text += getXmlTag(next);
1847 } else if (next.isStartElement()) {
1848 if (isStartingElement(next, ANNOTATION)) {
1849 handleNotYetImplementedElement(next); //TODO test handleSimpleAnnotation
1850 } else if (isStartingElement(next, ITALICS)) {
1851 handleNotYetImplementedElement(next);
1852 } else if (isStartingElement(next, BOLD)) {
1853 handleNotYetImplementedElement(next);
1854 } else if (isStartingElement(next, FIGURE)) {
1855 handleFigure(state, reader, next);
1856 } else if (isStartingElement(next, SUB_CHAR)) {
1857 List<TextData> textData = handleChar(state, reader, next, feature);
1858 result.addAll(textData);
1859 } else if (isStartingElement(next, FOOTNOTE)) {
1860 FootnoteDataHolder footnote = handleFootnote(state, reader, next);
1861 if (footnote.isRef()) {
1862 String message = "Ref footnote not implemented here";
1863 fireWarningEvent(message, next, 4);
1864 } else {
1865 registerGivenFootnote(state, footnote);
1866 }
1867 } else {
1868 handleUnexpectedStartElement(next.asStartElement());
1869 }
1870 } else if (next.isCharacters()) {
1871 if (!isTextMode) {
1872 String message = "String is not in text mode";
1873 fireWarningEvent(message, next, 6);
1874 } else {
1875 text += next.asCharacters().getData();
1876 }
1877 } else {
1878 handleUnexpectedEndElement(next.asEndElement());
1879 }
1880 }
1881 throw new IllegalStateException("RefPart has no closing tag");
1882 }
1883
1884 /**
1885 * @param classValue
1886 * @param state
1887 * @param parentEvent
1888 * @param parentFeature
1889 * @return
1890 * @throws UndefinedTransformerMethodException
1891 */
1892 private Feature makeFeature(String classValue, MarkupImportState state, XMLEvent parentEvent, Feature parentFeature) {
1893 UUID uuid;
1894 try {
1895 String featureText = StringUtils.capitalize(classValue);
1896 if (parentFeature != null){
1897 featureText = "<%s>" + featureText;
1898 featureText = String.format(featureText, parentFeature.getTitleCache());
1899 classValue = "<%s>" + classValue;
1900 classValue = String.format(classValue, parentFeature.getTitleCache());
1901 }
1902
1903
1904 Feature feature = state.getTransformer().getFeatureByKey(classValue);
1905 if (feature != null) {
1906 return feature;
1907 }
1908 uuid = state.getTransformer().getFeatureUuid(classValue);
1909
1910 if (uuid == null){
1911 uuid = state.getUnknownFeatureUuid(classValue);
1912 }
1913
1914 if (uuid == null) {
1915 // TODO
1916 String message = "Uuid is not defined for '%s'";
1917 message = String.format(message, classValue);
1918 fireWarningEvent(message, parentEvent, 8);
1919 uuid = UUID.randomUUID();
1920 state.putUnknownFeatureUuid(classValue, uuid);
1921 }
1922
1923 // TODO eFlora vocabulary
1924 TermVocabulary<Feature> voc = null;
1925 feature = getFeature(state, uuid, featureText, featureText, classValue, voc);
1926 if (parentFeature != null){
1927 parentFeature.addIncludes(feature);
1928 save(parentFeature, state);
1929 }
1930 save(feature, state);
1931
1932 if (feature == null) {
1933 throw new NullPointerException(classValue + " not recognized as a feature");
1934 }
1935 // state.putFeatureToCurrentList(feature);
1936 return feature;
1937 } catch (Exception e) {
1938 String message = "Could not create feature for %s: %s";
1939 message = String.format(message, classValue, e.getMessage());
1940 fireWarningEvent(message, parentEvent, 4);
1941 state.putUnknownFeatureUuid(classValue, null);
1942 // e.printStackTrace();
1943 return Feature.UNKNOWN();
1944 }
1945 }
1946
1947 }