d0b83b22d9581a672de83bae1541ac7733d2be55
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupDocumentImportNoComponent.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.markup;
11
12 import java.net.MalformedURLException;
13 import java.net.URL;
14 import java.util.ArrayList;
15 import java.util.Arrays;
16 import java.util.HashMap;
17 import java.util.HashSet;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Set;
21 import java.util.UUID;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import javax.xml.stream.Location;
26 import javax.xml.stream.XMLEventReader;
27 import javax.xml.stream.XMLStreamException;
28 import javax.xml.stream.events.Attribute;
29 import javax.xml.stream.events.StartElement;
30 import javax.xml.stream.events.XMLEvent;
31
32 import org.apache.commons.lang.StringUtils;
33 import org.apache.log4j.Logger;
34
35 import eu.etaxonomy.cdm.common.CdmUtils;
36 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
37 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
38 import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
39 import eu.etaxonomy.cdm.model.common.Annotation;
40 import eu.etaxonomy.cdm.model.common.AnnotationType;
41 import eu.etaxonomy.cdm.model.common.CdmBase;
42 import eu.etaxonomy.cdm.model.common.Extension;
43 import eu.etaxonomy.cdm.model.common.ExtensionType;
44 import eu.etaxonomy.cdm.model.common.Language;
45 import eu.etaxonomy.cdm.model.common.TermVocabulary;
46 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
47 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
48 import eu.etaxonomy.cdm.model.description.Distribution;
49 import eu.etaxonomy.cdm.model.description.Feature;
50 import eu.etaxonomy.cdm.model.description.PolytomousKey;
51 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
52 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
53 import eu.etaxonomy.cdm.model.description.PresenceTerm;
54 import eu.etaxonomy.cdm.model.description.TaxonDescription;
55 import eu.etaxonomy.cdm.model.description.TextData;
56 import eu.etaxonomy.cdm.model.location.NamedArea;
57 import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
58 import eu.etaxonomy.cdm.model.media.IdentifiableMediaEntity;
59 import eu.etaxonomy.cdm.model.media.Media;
60 import eu.etaxonomy.cdm.model.name.CultivarPlantName;
61 import eu.etaxonomy.cdm.model.name.NonViralName;
62 import eu.etaxonomy.cdm.model.name.Rank;
63 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
64 import eu.etaxonomy.cdm.model.reference.Reference;
65 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
66 import eu.etaxonomy.cdm.model.taxon.Classification;
67 import eu.etaxonomy.cdm.model.taxon.Taxon;
68 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
69
70
71 /**
72 * @author a.mueller
73 *
74 */
75 public class MarkupDocumentImportNoComponent extends MarkupImportBase {
76 private static final Logger logger = Logger.getLogger(MarkupDocumentImportNoComponent.class);
77
78
79 private MarkupKeyImport keyImport;
80 private MarkupSpecimenImport specimenImport;
81
82 private MarkupNomenclatureImport nomenclatureImport;
83
84 public MarkupDocumentImportNoComponent(MarkupDocumentImport docImport) {
85 super(docImport);
86 keyImport = new MarkupKeyImport(docImport);
87 specimenImport = new MarkupSpecimenImport(docImport);
88 nomenclatureImport = new MarkupNomenclatureImport(docImport, keyImport, specimenImport);
89 }
90
91 public void doInvoke(MarkupImportState state) throws XMLStreamException {
92 XMLEventReader reader = state.getReader();
93
94 // publication (= root element)
95 String elName = PUBLICATION;
96 boolean hasPublication = false;
97
98 while (reader.hasNext()) {
99 XMLEvent nextEvent = reader.nextEvent();
100 if (isStartingElement(nextEvent, elName)) {
101 handlePublication(state, reader, nextEvent, elName);
102 hasPublication = true;
103 } else if (nextEvent.isEndDocument()) {
104 if (!hasPublication) {
105 String message = "No publication root element found";
106 fireWarningEvent(message, nextEvent, 8);
107 }
108 // done
109 } else {
110 fireSchemaConflictEventExpectedStartTag(elName, reader);
111 }
112 }
113
114
115 return;
116
117 }
118
119 private void handlePublication(MarkupImportState state, XMLEventReader reader, XMLEvent currentEvent, String elName) throws XMLStreamException {
120
121 // attributes
122 StartElement element = currentEvent.asStartElement();
123 Map<String, Attribute> attributes = getAttributes(element);
124 String lang = getAndRemoveAttributeValue(attributes, "lang");
125 if (lang != null){
126 Language language = getTermService().getLanguageByIso(lang);
127 state.setDefaultLanguage(language);
128 }
129
130 handleUnexpectedAttributes(element.getLocation(), attributes, "noNamespaceSchemaLocation");
131
132 while (reader.hasNext()) {
133 XMLEvent event = readNoWhitespace(reader);
134 // TODO cardinality of alternative
135 if (event.isEndElement()) {
136 if (isEndingElement(event, elName)) {
137 return;
138 } else {
139 if (isEndingElement(event, BIOGRAPHIES)) {
140 // NOT YET IMPLEMENTED
141 popUnimplemented(event.asEndElement());
142 } else if (isEndingElement(event, REFERENCES)) {
143 // NOT YET IMPLEMENTED
144 popUnimplemented(event.asEndElement());
145 } else if (isEndingElement(event, TEXT_SECTION)) {
146 // NOT YET IMPLEMENTED
147 popUnimplemented(event.asEndElement());
148 } else if (isEndingElement(event, ADDENDA)) {
149 // NOT YET IMPLEMENTED
150 popUnimplemented(event.asEndElement());
151 } else {
152 handleUnexpectedElement(event);
153 }
154 }
155 } else if (event.isStartElement()) {
156 if (isStartingElement(event, META_DATA)) {
157 handleMetaData(state, reader, event);
158 } else if (isStartingElement(event, TREATMENT)) {
159 handleTreatment(state, reader, event);
160 } else if (isStartingElement(event, BIOGRAPHIES)) {
161 handleNotYetImplementedElement(event);
162 } else if (isStartingElement(event, REFERENCES)) {
163 handleNotYetImplementedElement(event);
164 } else if (isStartingElement(event, TEXT_SECTION)) {
165 handleNotYetImplementedElement(event);
166 } else if (isStartingElement(event, ADDENDA)) {
167 handleNotYetImplementedElement(event);
168 } else {
169 handleUnexpectedStartElement(event);
170 }
171 } else {
172 handleUnexpectedElement(event);
173 }
174 }
175 throw new IllegalStateException("Publication has no ending element");
176 }
177
178 private void handleMetaData(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
179 checkNoAttributes(parentEvent);
180
181 while (reader.hasNext()) {
182 XMLEvent next = readNoWhitespace(reader);
183 if (isMyEndingElement(next, parentEvent)) {
184 return;
185 } else if (isStartingElement(next, DEFAULT_MEDIA_URL)) {
186 String baseUrl = getCData(state, reader, next);
187 try {
188 new URL(baseUrl);
189 state.setBaseMediaUrl(baseUrl);
190 } catch (MalformedURLException e) {
191 String message = "defaultMediaUrl '%s' is not a valid URL";
192 message = String.format(message, baseUrl);
193 fireWarningEvent(message, next, 8);
194 }
195 } else if (isStartingElement(next, MODS)){
196 handleNotYetImplementedElement(next);
197 } else {
198 handleUnexpectedElement(next);
199 }
200 }
201 throw new IllegalStateException("MetaData has no ending element");
202
203 }
204
205 private void handleTreatment(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
206 checkNoAttributes(parentEvent);
207 Taxon lastTaxon = null;
208 while (reader.hasNext()) {
209 XMLEvent next = readNoWhitespace(reader);
210 if (isMyEndingElement(next, parentEvent)) {
211 Set<PolytomousKeyNode> keyNodesToSave = state.getPolytomousKeyNodesToSave();
212 //better save the key then the nodes
213 Set<PolytomousKey> keySet = new HashSet<PolytomousKey>();
214 for (PolytomousKeyNode node : keyNodesToSave){
215 PolytomousKey key = node.getKey();
216 keySet.add(key);
217 }
218 save(keySet, state);
219 //unmatched key leads
220 UnmatchedLeads unmatched = state.getUnmatchedLeads();
221 if (unmatched.size() > 0){
222 String message = "The following key leads are unmatched: %s";
223 message = String.format(message, state.getUnmatchedLeads().toString());
224 fireWarningEvent(message, next, 6);
225 }
226 // save(keyNodesToSave, state);
227
228 return;
229 } else if (isStartingElement(next, TAXON)) {
230 Taxon thisTaxon = handleTaxon(state, reader, next.asStartElement());
231 doTaxonRelation(state, thisTaxon, lastTaxon, parentEvent.getLocation());
232 if (state.isTaxonInClassification() == true){
233 lastTaxon = thisTaxon;
234 // TODO for imports spanning multiple documents ?? Still needed?
235 state.getConfig().setLastTaxonUuid(lastTaxon.getUuid());
236 }
237 } else if (isStartingElement(next, ADDENDA)) {
238 handleNotYetImplementedElement(next);
239 } else {
240 handleUnexpectedElement(next);
241 }
242 }
243 return;
244 }
245
246 /**
247 * @param taxon
248 * @param lastTaxon
249 */
250 private void doTaxonRelation(MarkupImportState state, Taxon taxon, Taxon lastTaxon, Location dataLocation) {
251
252 if (state.isTaxonInClassification() == false){
253 return;
254 }
255
256 Classification tree = makeTree(state, dataLocation);
257 if (lastTaxon == null) {
258 tree.addChildTaxon(taxon, null, null, null);
259 return;
260 }
261 Rank thisRank = taxon.getName().getRank();
262 Rank lastRank = lastTaxon.getName().getRank();
263 if (lastTaxon.getTaxonNodes().size() > 0) {
264 TaxonNode lastNode = lastTaxon.getTaxonNodes().iterator().next();
265 if (thisRank == null){
266 String message = "Rank is undefined for taxon '%s'. Can't create classification without rank.";
267 message = String.format(message, taxon.getName().getTitleCache());
268 fireWarningEvent(message, makeLocationStr(dataLocation), 6);
269 }else if (thisRank.isLower(lastRank)) {
270 lastNode.addChildTaxon(taxon, null, null, null);
271 fillMissingEpithetsForTaxa(lastTaxon, taxon);
272 } else if (thisRank.equals(lastRank)) {
273 TaxonNode parent = lastNode.getParent();
274 if (parent != null) {
275 parent.addChildTaxon(taxon, null, null, null);
276 fillMissingEpithetsForTaxa(parent.getTaxon(), taxon);
277 } else {
278 tree.addChildTaxon(taxon, null, null, null);
279 }
280 } else if (thisRank.isHigher(lastRank)) {
281 doTaxonRelation(state, taxon, lastNode.getParent().getTaxon(), dataLocation);
282 // TaxonNode parentNode = handleTaxonRelation(state, taxon,
283 // lastNode.getParent().getTaxon());
284 // parentNode.addChildTaxon(taxon, null, null, null);
285 }
286 } else {
287 String message = "Last taxon has no node";
288 fireWarningEvent(message, makeLocationStr(dataLocation), 6);
289 }
290 }
291
292
293
294 /**
295 * @param state
296 * @param dataLocation
297 * @return
298 */
299 private Classification makeTree(MarkupImportState state, Location dataLocation) {
300 Classification result = state.getTree(null);
301 if (result == null) {
302 UUID uuid = state.getConfig().getClassificationUuid();
303 if (uuid == null) {
304 String message = "No classification uuid is defined";
305 fireWarningEvent(message, makeLocationStr(dataLocation), 6);
306 result = createNewClassification(state);
307 } else {
308 result = getClassificationService().find(uuid);
309 if (result == null) {
310 result = createNewClassification(state);
311 result.setUuid(uuid);
312 }
313 }
314 state.putTree(null, result);
315 }
316 save(result, state);
317 return result;
318 }
319
320 private Classification createNewClassification(MarkupImportState state) {
321 Classification result = Classification.NewInstance(state.getConfig().getClassificationName(), getDefaultLanguage(state));
322 state.putTree(null, result);
323 return result;
324 }
325
326 private Taxon handleTaxon(MarkupImportState state, XMLEventReader reader, StartElement parentEvent) throws XMLStreamException {
327 // TODO progress monitoring
328 Map<String, Attribute> attributes = getAttributes(parentEvent);
329 Taxon taxon = createTaxonAndName(state, attributes);
330 state.setCurrentTaxon(taxon);
331 state.addNewFeatureSorterLists(taxon.getUuid().toString());
332
333 boolean hasTitle = false;
334 boolean hasNomenclature = false;
335 String taxonTitle = null;
336
337 Reference<?> descriptionReference = state.getConfig().getSourceReference();
338 while (reader.hasNext()) {
339 XMLEvent next = readNoWhitespace(reader);
340 if (next.isEndElement()) {
341 if (isMyEndingElement(next, parentEvent)) {
342 // checkMandatoryElement(hasTitle, parentEvent, TAXONTITLE);
343 checkMandatoryElement(hasNomenclature, parentEvent, NOMENCLATURE);
344 boolean inClassification = getAndRemoveBooleanAttributeValue(next, attributes, "inClassification", true);
345 state.setTaxonInClassification(inClassification);
346 handleUnexpectedAttributes(parentEvent.getLocation(),attributes);
347 if (taxon.getName().getRank() == null){
348 String warning = "No rank exists for taxon " + taxon.getTitleCache();
349 fireWarningEvent(warning, next, 12);
350 taxon.getName().setRank(Rank.UNKNOWN_RANK());
351 }
352
353 keyImport.makeKeyNodes(state, parentEvent, taxonTitle);
354 state.setCurrentTaxon(null);
355 state.setCurrentTaxonNum(null);
356 if (taxon.getName().getRank().isHigher(Rank.GENUS())){
357 state.setLatestGenusEpithet(null);
358 }else{
359 state.setLatestGenusEpithet(((NonViralName<?>)taxon.getName()).getGenusOrUninomial());
360 }
361 save(taxon, state);
362 return taxon;
363 } else {
364 if (isEndingElement(next, HEADING)) {
365 // NOT YET IMPLEMENTED
366 popUnimplemented(next.asEndElement());
367 } else if (isEndingElement(next, TEXT_SECTION)) {
368 // NOT YET IMPLEMENTED
369 popUnimplemented(next.asEndElement());
370 } else if (isEndingElement(next, REFERENCES)) {
371 // NOT YET IMPLEMENTED
372 popUnimplemented(next.asEndElement());
373 } else if (isEndingElement(next, FIGURE_REF)) {
374 // NOT YET IMPLEMENTED
375 popUnimplemented(next.asEndElement());
376 } else {
377 handleUnexpectedEndElement(next.asEndElement());
378 }
379 }
380 } else if (next.isStartElement()) {
381 if (isStartingElement(next, HEADING)) {
382 handleNotYetImplementedElement(next);
383 } else if (isStartingElement(next, TAXONTITLE)) {
384 taxonTitle = handleTaxonTitle(state, reader, next);
385 hasTitle = true;
386 } else if (isStartingElement(next, WRITER)) {
387 makeKeyWriter(state, reader, taxon, taxonTitle, next);
388 } else if (isStartingElement(next, TEXT_SECTION)) {
389 handleNotYetImplementedElement(next);
390 } else if (isStartingElement(next, KEY)) {
391 keyImport.handleKey(state, reader, next);
392 } else if (isStartingElement(next, NOMENCLATURE)) {
393 nomenclatureImport.handleNomenclature(state, reader, next);
394 hasNomenclature = true;
395 } else if (isStartingElement(next, FEATURE)) {
396 handleFeature(state, reader, next);
397 } else if (isStartingElement(next, NOTES)) {
398 // TODO is this the correct way to handle notes?
399 String note = handleNotes(state, reader, next);
400
401 UUID notesUuid;
402 try {
403 notesUuid = state.getTransformer().getFeatureUuid("notes");
404 Feature feature = getFeature(state, notesUuid, "Notes", "Notes", "note", null);
405 TextData textData = TextData.NewInstance(feature);
406 textData.putText(getDefaultLanguage(state), note);
407 TaxonDescription description = getTaxonDescription(taxon, descriptionReference, false, true);
408 description.addElement(textData);
409 } catch (UndefinedTransformerMethodException e) {
410 String message = "getFeatureUuid method not yet implemented";
411 fireWarningEvent(message, next, 8);
412 }
413 } else if (isStartingElement(next, REFERENCES)) {
414 handleNotYetImplementedElement(next);
415 } else if (isStartingElement(next, FIGURE_REF)) {
416 TaxonDescription desc = getTaxonDescription(taxon, state.getConfig().getSourceReference(), IMAGE_GALLERY, CREATE_NEW);
417 TextData textData;
418 if (desc.getElements().isEmpty()){
419 textData = TextData.NewInstance(Feature.IMAGE());
420 desc.addElement(textData);
421 }
422 textData = (TextData)desc.getElements().iterator().next();
423 makeFeatureFigureRef(state, reader, desc, false, textData, next);
424 } else if (isStartingElement(next, FIGURE)) {
425 handleFigure(state, reader, next);
426 } else if (isStartingElement(next, FOOTNOTE)) {
427 FootnoteDataHolder footnote = handleFootnote(state, reader, next);
428 if (footnote.isRef()) {
429 String message = "Ref footnote not implemented here";
430 fireWarningEvent(message, next, 4);
431 } else {
432 registerGivenFootnote(state, footnote);
433 }
434 } else {
435 handleUnexpectedStartElement(next);
436 }
437 } else {
438 handleUnexpectedElement(next);
439 }
440 }
441 throw new IllegalStateException("<Taxon> has no closing tag");
442 }
443
444 /**
445 * @param state
446 * @param reader
447 * @param taxon
448 * @param taxonTitle
449 * @param next
450 * @throws XMLStreamException
451 */
452 private void makeKeyWriter(MarkupImportState state, XMLEventReader reader, Taxon taxon, String taxonTitle, XMLEvent next) throws XMLStreamException {
453 WriterDataHolder writer = handleWriter(state, reader, next);
454 taxon.addExtension(writer.extension);
455 // TODO what if taxonTitle comes later
456 if (StringUtils.isNotBlank(taxonTitle)
457 && writer.extension != null) {
458 Reference<?> sec = ReferenceFactory.newBookSection();
459 sec.setTitle(taxonTitle);
460 TeamOrPersonBase<?> author = createAuthor(writer.writer);
461 sec.setAuthorTeam(author);
462 sec.setInReference(state.getConfig()
463 .getSourceReference());
464 taxon.setSec(sec);
465 registerFootnotes(state, sec, writer.footnotes);
466 } else {
467 String message = "No taxontitle exists for writer";
468 fireWarningEvent(message, next, 6);
469 }
470 }
471
472 private String handleNotes(MarkupImportState state, XMLEventReader reader,
473 XMLEvent parentEvent) throws XMLStreamException {
474 checkNoAttributes(parentEvent);
475
476 String text = "";
477 while (reader.hasNext()) {
478 XMLEvent next = readNoWhitespace(reader);
479 if (isMyEndingElement(next, parentEvent)) {
480 return text;
481 } else if (next.isEndElement()) {
482 if (isEndingElement(next, HEADING)) {
483 popUnimplemented(next.asEndElement());
484 } else if (isEndingElement(next, WRITER)) {
485 popUnimplemented(next.asEndElement());
486 } else if (isEndingElement(next, NUM)) {
487 popUnimplemented(next.asEndElement());
488 } else {
489 handleUnexpectedEndElement(next.asEndElement());
490 }
491 } else if (next.isStartElement()) {
492 if (isStartingElement(next, HEADING)) {
493 handleNotYetImplementedElement(next);
494 } else if (isStartingElement(next, SUB_HEADING)) {
495 String subheading = getCData(state, reader, next).trim();
496 if (! isNoteHeading(subheading)) {
497 fireNotYetImplementedElement(next.getLocation(), next.asStartElement().getName(), 0);
498 }
499 } else if (isStartingElement(next, WRITER)) {
500 handleNotYetImplementedElement(next);
501 } else if (isStartingElement(next, NUM)) {
502 handleNotYetImplementedElement(next);
503 } else if (isStartingElement(next, STRING)) {
504 // TODO why multiple strings in schema?
505 text = makeNotesString(state, reader, text, next);
506 } else {
507 handleUnexpectedStartElement(next.asStartElement());
508 }
509 } else {
510 handleUnexpectedElement(next);
511 }
512 }
513 throw new IllegalStateException("<Notes> has no closing tag");
514 }
515
516 /**
517 * @param state
518 * @param reader
519 * @param text
520 * @param next
521 * @return
522 * @throws XMLStreamException
523 */
524 private String makeNotesString(MarkupImportState state, XMLEventReader reader, String text, XMLEvent next) throws XMLStreamException {
525 Map<String, String> stringMap = handleString(state, reader, next, null);
526 if (stringMap.size() == 0){
527 String message = "No text available in <notes>";
528 fireWarningEvent(message, next, 4);
529 }else if (stringMap.size() > 1){
530 String message = "Subheadings not yet supported in <notes>";
531 fireWarningEvent(message, next, 4);
532 }else{
533 String firstSubheading = stringMap.keySet().iterator().next();
534 if ( firstSubheading != null && ! isNoteHeading (firstSubheading) ) {
535 String message = "Subheadings not yet supported in <notes>";
536 fireWarningEvent(message, next, 4);
537 }
538 }
539 for (String subheading : stringMap.keySet()){
540 text += subheading;
541 text += stringMap.get(subheading);
542 }
543 return text;
544 }
545
546 private boolean isNoteHeading(String heading) {
547 String excludePattern = "(i?)(Notes?):?";
548 return heading.matches(excludePattern);
549 }
550
551 /**
552 * @param state
553 * @param attributes
554 */
555 private Taxon createTaxonAndName(MarkupImportState state,
556 Map<String, Attribute> attributes) {
557 NonViralName<?> name;
558 Rank rank = null; //Rank.SPECIES(); // default
559 boolean isCultivar = checkAndRemoveAttributeValue(attributes, CLASS, "cultivated");
560 if (isCultivar) {
561 name = CultivarPlantName.NewInstance(rank);
562 } else {
563 name = createNameByCode(state, rank);
564 }
565 Taxon taxon = Taxon.NewInstance(name, state.getConfig().getSourceReference());
566 if (checkAndRemoveAttributeValue(attributes, CLASS, "dubious")) {
567 taxon.setDoubtful(true);
568 } else if (checkAndRemoveAttributeValue(attributes, CLASS, "excluded")) {
569 taxon.setExcluded(true);
570 }
571 // TODO insufficient, new, expected
572 handleNotYetImplementedAttribute(attributes, CLASS);
573 // From old version
574 // MarkerType markerType = getMarkerType(state, attrValue);
575 // if (markerType == null){
576 // logger.warn("Class attribute value for taxon not yet supported: " +
577 // attrValue);
578 // }else{
579 // taxon.addMarker(Marker.NewInstance(markerType, true));
580 // }
581
582 // save(name, state);
583 // save(taxon, state);
584 return taxon;
585 }
586
587 private String handleTaxonTitle(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
588 //attributes
589 String text = "";
590 Map<String, Attribute> attributes = getAttributes(parentEvent);
591 String rankAttr = getAndRemoveAttributeValue(attributes, RANK);
592 Rank rank = makeRank(state, rankAttr, false);
593 String num = getAndRemoveAttributeValue(attributes, NUM);
594 state.setCurrentTaxonNum(num);
595 checkNoAttributes(attributes, parentEvent);
596
597 // TODO handle attributes
598 while (reader.hasNext()) {
599 XMLEvent next = readNoWhitespace(reader);
600 if (next.isEndElement()) {
601 if (isMyEndingElement(next, parentEvent)) {
602 Taxon taxon = state.getCurrentTaxon();
603 String titleText = null;
604 if (checkMandatoryText(text, parentEvent)) {
605 titleText = normalize(text);
606 UUID uuidTitle = MarkupTransformer.uuidTaxonTitle;
607 ExtensionType titleExtension = this.getExtensionType(state, uuidTitle, "Taxon Title ","taxon title", "title");
608 taxon.addExtension(titleText, titleExtension);
609 }
610 taxon.getName().setRank(rank);
611 // TODO check title exists
612 return titleText;
613 } else {
614 if (isEndingElement(next, FOOTNOTE)) {
615 // NOT YET IMPLEMENTED
616 popUnimplemented(next.asEndElement());
617 } else {
618 handleUnexpectedEndElement(next.asEndElement());
619 state.setUnsuccessfull();
620 }
621 }
622 } else if (next.isStartElement()) {
623 if (isStartingElement(next, FOOTNOTE)) {
624 handleNotYetImplementedElement(next);
625 }else if (isStartingElement(next, FOOTNOTE_REF)) {
626 handleNotYetImplementedElement(next);
627 } else {
628 handleUnexpectedStartElement(next);
629 state.setUnsuccessfull();
630 }
631 } else if (next.isCharacters()) {
632 text += next.asCharacters().getData();
633
634 } else {
635 handleUnexpectedElement(next);
636 state.setUnsuccessfull();
637 }
638 }
639 return null;
640
641 }
642
643 private WriterDataHolder handleWriter(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
644 String text = "";
645 checkNoAttributes(parentEvent);
646 WriterDataHolder dataHolder = new WriterDataHolder();
647 List<FootnoteDataHolder> footnotes = new ArrayList<FootnoteDataHolder>();
648
649 // TODO handle attributes
650 while (reader.hasNext()) {
651 XMLEvent next = readNoWhitespace(reader);
652 if (isMyEndingElement(next, parentEvent)) {
653 text = CdmUtils.removeBrackets(text);
654 if (checkMandatoryText(text, parentEvent)) {
655 text = normalize(text);
656 dataHolder.writer = text;
657 dataHolder.footnotes = footnotes;
658
659 // Extension
660 UUID uuidWriterExtension = MarkupTransformer.uuidWriterExtension;
661 ExtensionType writerExtensionType = this
662 .getExtensionType(state, uuidWriterExtension,
663 "Writer", "writer", "writer");
664 Extension extension = Extension.NewInstance();
665 extension.setType(writerExtensionType);
666 extension.setValue(text);
667 dataHolder.extension = extension;
668
669 // Annotation
670 UUID uuidWriterAnnotation = MarkupTransformer.uuidWriterAnnotation;
671 AnnotationType writerAnnotationType = this.getAnnotationType(state, uuidWriterAnnotation, "Writer", "writer", "writer", null);
672 Annotation annotation = Annotation.NewInstance(text, writerAnnotationType, getDefaultLanguage(state));
673 dataHolder.annotation = annotation;
674
675 return dataHolder;
676 } else {
677 return null;
678 }
679 } else if (isStartingElement(next, FOOTNOTE_REF)) {
680 FootnoteDataHolder footNote = handleFootnoteRef(state, reader, next);
681 if (footNote.isRef()) {
682 footnotes.add(footNote);
683 } else {
684 logger.warn("Non ref footnotes not yet impelemnted");
685 }
686 } else if (next.isCharacters()) {
687 text += next.asCharacters().getData();
688
689 } else {
690 handleUnexpectedElement(next);
691 state.setUnsuccessfull();
692 }
693 }
694 throw new IllegalStateException("<writer> has no end tag");
695 }
696
697 private void registerFootnotes(MarkupImportState state, AnnotatableEntity entity, List<FootnoteDataHolder> footnotes) {
698 for (FootnoteDataHolder footNote : footnotes) {
699 registerFootnoteDemand(state, entity, footNote);
700 }
701 }
702
703 private void registerGivenFootnote(MarkupImportState state, FootnoteDataHolder footnote) {
704 state.registerFootnote(footnote);
705 Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.id);
706 if (demands != null) {
707 for (AnnotatableEntity entity : demands) {
708 attachFootnote(state, entity, footnote);
709 }
710 }
711 }
712
713 private void registerGivenFigure(MarkupImportState state, XMLEvent next, String id, Media figure) {
714 state.registerFigure(id, figure);
715 Set<AnnotatableEntity> demands = state.getFigureDemands(id);
716 if (demands != null) {
717 for (AnnotatableEntity entity : demands) {
718 attachFigure(state, next, entity, figure);
719 }
720 }
721 save(figure, state);
722 }
723
724 private void registerFootnoteDemand(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
725 FootnoteDataHolder existingFootnote = state.getFootnote(footnote.ref);
726 if (existingFootnote != null) {
727 attachFootnote(state, entity, existingFootnote);
728 } else {
729 Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.ref);
730 if (demands == null) {
731 demands = new HashSet<AnnotatableEntity>();
732 state.putFootnoteDemands(footnote.ref, demands);
733 }
734 demands.add(entity);
735 }
736 }
737
738 private void registerFigureDemand(MarkupImportState state, XMLEvent next, AnnotatableEntity entity, String figureRef) {
739 Media existingFigure = state.getFigure(figureRef);
740 if (existingFigure != null) {
741 attachFigure(state, next, entity, existingFigure);
742 } else {
743 Set<AnnotatableEntity> demands = state.getFigureDemands(figureRef);
744 if (demands == null) {
745 demands = new HashSet<AnnotatableEntity>();
746 state.putFigureDemands(figureRef, demands);
747 }
748 demands.add(entity);
749 }
750 }
751
752 private void attachFootnote(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
753 AnnotationType annotationType = this.getAnnotationType(state, MarkupTransformer.uuidFootnote, "Footnote", "An e-flora footnote", "fn", null);
754 Annotation annotation = Annotation.NewInstance(footnote.string, annotationType, getDefaultLanguage(state));
755 // TODO transient objects
756 entity.addAnnotation(annotation);
757 save(entity, state);
758 }
759
760 private void attachFigure(MarkupImportState state, XMLEvent next, AnnotatableEntity entity, Media figure) {
761 // IdentifiableEntity<?> toSave;
762 if (entity.isInstanceOf(TextData.class)) {
763 TextData deb = CdmBase.deproxy(entity, TextData.class);
764 deb.addMedia(figure);
765 // toSave = ((TaxonDescription)deb.getInDescription()).getTaxon();
766 } else if (entity.isInstanceOf(SpecimenOrObservationBase.class)) {
767 String message = "figures for specimen should be handled as Textdata";
768 fireWarningEvent(message, next, 4);
769 // toSave = ime;
770 } else if (entity.isInstanceOf(IdentifiableMediaEntity.class)) {
771 IdentifiableMediaEntity<?> ime = CdmBase.deproxy(entity, IdentifiableMediaEntity.class);
772 ime.addMedia(figure);
773 // toSave = ime;
774 } else {
775 String message = "Unsupported entity to attach media: %s";
776 message = String.format(message, entity.getClass().getName());
777 // toSave = null;
778 }
779 save(entity, state);
780 }
781
782 private Media handleFigure(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
783 // FigureDataHolder result = new FigureDataHolder();
784
785 Map<String, Attribute> attributes = getAttributes(parentEvent);
786 String id = getAndRemoveAttributeValue(attributes, ID);
787 String type = getAndRemoveAttributeValue(attributes, TYPE);
788 String urlAttr = getAndRemoveAttributeValue(attributes, URL);
789 checkNoAttributes(attributes, parentEvent);
790
791 String urlString = null;
792 String legendString = null;
793 String titleString = null;
794 String numString = null;
795 String text = null;
796 if (isNotBlank(urlAttr)){
797 urlString = CdmUtils.Nz(state.getBaseMediaUrl()) + urlAttr;
798 }
799 while (reader.hasNext()) {
800 XMLEvent next = readNoWhitespace(reader);
801 if (isMyEndingElement(next, parentEvent)) {
802 if (isNotBlank(text)){
803 fireWarningEvent("Text not yet handled for figures: " + text, next, 4);
804 }
805 Media media = makeFigure(state, id, type, urlString, legendString, titleString, numString, next);
806 return media;
807 } else if (isStartingElement(next, FIGURE_LEGEND)) {
808 // TODO same as figure string ?
809 legendString = handleFootnoteString(state, reader, next);
810 } else if (isStartingElement(next, FIGURE_TITLE)) {
811 titleString = getCData(state, reader, next);
812 } else if (isStartingElement(next, URL)) {
813 String localUrl = getCData(state, reader, next);
814 String url = CdmUtils.Nz(state.getBaseMediaUrl()) + localUrl;
815 if (isBlank(urlString)){
816 urlString = url;
817 }
818 if (! url.equals(urlString)){
819 String message = "URL attribute and URL element differ. Attribute: %s, Element: %s";
820 fireWarningEvent(String.format(message, urlString, url), next, 2);
821 }
822 } else if (isStartingElement(next, NUM)) {
823 numString = getCData(state, reader, next);
824 } else if (next.isCharacters()) {
825 text += CdmUtils.concat("", text, next.asCharacters().getData());
826 } else {
827 fireUnexpectedEvent(next, 0);
828 }
829 }
830 throw new IllegalStateException("<figure> has no end tag");
831 }
832
833 /**
834 * @param state
835 * @param id
836 * @param type
837 * @param urlString
838 * @param legendString
839 * @param titleString
840 * @param numString
841 * @param next
842 */
843 private Media makeFigure(MarkupImportState state, String id, String type, String urlString,
844 String legendString, String titleString, String numString, XMLEvent next) {
845 Media media = null;
846 boolean isFigure = false;
847 try {
848 //TODO maybe everything is a figure as it is all taken from a book
849 if ("lineart".equals(type)) {
850 isFigure = true;
851 // media = Figure.NewInstance(url.toURI(), null, null, null);
852 } else if (type == null || "photo".equals(type)
853 || "signature".equals(type)
854 || "others".equals(type)) {
855 //TODO
856 } else {
857 String message = "Unknown figure type '%s'";
858 message = String.format(message, type);
859 fireWarningEvent(message, next, 2);
860 }
861 media = docImport.getImageMedia(urlString, docImport.getReadMediaData(), isFigure);
862
863 if (media != null){
864 // title
865 if (StringUtils.isNotBlank(titleString)) {
866 media.putTitle(getDefaultLanguage(state), titleString);
867 }
868 // legend
869 if (StringUtils.isNotBlank(legendString)) {
870 media.addDescription(legendString, getDefaultLanguage(state));
871 }
872 if (StringUtils.isNotBlank(numString)) {
873 // TODO use concrete source (e.g. DAPHNIPHYLLACEAE in FM
874 // vol.13)
875 Reference<?> citation = state.getConfig().getSourceReference();
876 media.addSource(numString, "num", citation, null);
877 // TODO name used in source if available
878 }
879 // TODO which citation
880 if (StringUtils.isNotBlank(id)) {
881 media.addSource(id, null, state.getConfig().getSourceReference(), null);
882 } else {
883 String message = "Figure id should never be empty or null";
884 fireWarningEvent(message, next, 6);
885 }
886
887 // text
888 // do nothing
889 registerGivenFigure(state, next, id, media);
890
891 }else{
892 String message = "No media found: ";
893 fireWarningEvent(message, next, 4);
894 }
895 } catch (MalformedURLException e) {
896 String message = "Media uri has incorrect syntax: %s";
897 message = String.format(message, urlString);
898 fireWarningEvent(message, next, 4);
899 // } catch (URISyntaxException e) {
900 // String message = "Media uri has incorrect syntax: %s";
901 // message = String.format(message, urlString);
902 // fireWarningEvent(message, next, 4);
903 }
904
905 return media;
906 }
907
908 private FigureDataHolder handleFigureRef(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent)
909 throws XMLStreamException {
910 FigureDataHolder result = new FigureDataHolder();
911 Map<String, Attribute> attributes = getAttributes(parentEvent);
912 result.ref = getAndRemoveAttributeValue(attributes, REF);
913 checkNoAttributes(attributes, parentEvent);
914
915 // text is not handled, needed only for debugging purposes
916 String text = "";
917 while (reader.hasNext()) {
918 XMLEvent next = readNoWhitespace(reader);
919 if (isMyEndingElement(next, parentEvent)) {
920 return result;
921 } else if (isStartingElement(next, NUM)) {
922 String num = getCData(state, reader, next);
923 result.num = num; // num is not handled during import
924 } else if (isStartingElement(next, FIGURE_PART)) {
925 result.figurePart = getCData(state, reader, next);
926 } else if (next.isCharacters()) {
927 text += next.asCharacters().getData();
928 } else {
929 fireUnexpectedEvent(next, 0);
930 }
931 }
932 throw new IllegalStateException("<figureRef> has no end tag");
933 }
934
935 private FootnoteDataHolder handleFootnote(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
936 FootnoteDataHolder result = new FootnoteDataHolder();
937 Map<String, Attribute> attributes = getAttributes(parentEvent);
938 result.id = getAndRemoveAttributeValue(attributes, ID);
939 // result.ref = getAndRemoveAttributeValue(attributes, REF);
940 checkNoAttributes(attributes, parentEvent);
941
942 while (reader.hasNext()) {
943 XMLEvent next = readNoWhitespace(reader);
944 if (isStartingElement(next, FOOTNOTE_STRING)) {
945 String string = handleFootnoteString(state, reader, next);
946 result.string = string;
947 } else if (isMyEndingElement(next, parentEvent)) {
948 return result;
949 } else {
950 fireUnexpectedEvent(next, 0);
951 }
952 }
953 return result;
954 }
955
956 private FootnoteDataHolder handleFootnoteRef(MarkupImportState state,
957 XMLEventReader reader, XMLEvent parentEvent)
958 throws XMLStreamException {
959 FootnoteDataHolder result = new FootnoteDataHolder();
960 Map<String, Attribute> attributes = getAttributes(parentEvent);
961 result.ref = getAndRemoveAttributeValue(attributes, REF);
962 checkNoAttributes(attributes, parentEvent);
963
964 // text is not handled, needed only for debugging purposes
965 String text = "";
966 while (reader.hasNext()) {
967 XMLEvent next = readNoWhitespace(reader);
968 // if (isStartingElement(next, FOOTNOTE_STRING)){
969 // String string = handleFootnoteString(state, reader, next);
970 // result.string = string;
971 // }else
972 if (isMyEndingElement(next, parentEvent)) {
973 return result;
974 } else if (next.isCharacters()) {
975 text += next.asCharacters().getData();
976
977 } else {
978 fireUnexpectedEvent(next, 0);
979 }
980 }
981 return result;
982 }
983
984
985
986 private String handleFootnoteString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
987 boolean isTextMode = true;
988 String text = "";
989 while (reader.hasNext()) {
990 XMLEvent next = readNoWhitespace(reader);
991 if (isMyEndingElement(next, parentEvent)) {
992 return text;
993 } else if (next.isEndElement()) {
994 if (isEndingElement(next, FULL_NAME)) {
995 popUnimplemented(next.asEndElement());
996 } else if (isEndingElement(next, BR)) {
997 isTextMode = true;
998 } else if (isHtml(next)) {
999 text += getXmlTag(next);
1000 } else {
1001 handleUnexpectedEndElement(next.asEndElement());
1002 }
1003 } else if (next.isStartElement()) {
1004 if (isStartingElement(next, FULL_NAME)) {
1005 handleNotYetImplementedElement(next);
1006 } else if (isStartingElement(next, GATHERING)) {
1007 text += specimenImport.handleInLineGathering(state, reader, next);
1008 } else if (isStartingElement(next, REFERENCES)) {
1009 text += " " + handleInLineReferences(state, reader, next)+ " ";
1010 } else if (isStartingElement(next, BR)) {
1011 text += "<br/>";
1012 isTextMode = false;
1013 } else if (isStartingElement(next, NOMENCLATURE)) {
1014 handleNotYetImplementedElement(next);
1015 } else if (isHtml(next)) {
1016 text += getXmlTag(next);
1017 } else {
1018 handleUnexpectedStartElement(next.asStartElement());
1019 }
1020 } else if (next.isCharacters()) {
1021 if (!isTextMode) {
1022 String message = "footnoteString is not in text mode";
1023 fireWarningEvent(message, next, 6);
1024 } else {
1025 text += next.asCharacters().getData().trim();
1026 // getCData(state, reader, next); does not work as we have inner tags like <references>
1027 }
1028 } else {
1029 handleUnexpectedEndElement(next.asEndElement());
1030 }
1031 }
1032 throw new IllegalStateException("<footnoteString> has no closing tag");
1033
1034 }
1035
1036 private String handleInLineReferences(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1037 checkNoAttributes(parentEvent);
1038
1039 boolean hasReference = false;
1040 String text = "";
1041 while (reader.hasNext()) {
1042 XMLEvent next = readNoWhitespace(reader);
1043 if (isMyEndingElement(next, parentEvent)) {
1044 checkMandatoryElement(hasReference, parentEvent.asStartElement(), REFERENCE);
1045 return text;
1046 } else if (isStartingElement(next, REFERENCE)) {
1047 text += handleInLineReference(state, reader, next);
1048 hasReference = true;
1049 } else {
1050 handleUnexpectedElement(next);
1051 }
1052 }
1053 throw new IllegalStateException("<References> has no closing tag");
1054 }
1055
1056 private String handleInLineReference(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1057 Reference<?> reference = nomenclatureImport.handleReference(state, reader, parentEvent);
1058 String result = "<cdm:ref uuid='%s'>%s</ref>";
1059 result = String.format(result, reference.getUuid(), reference.getTitleCache());
1060 save(reference, state);
1061 return result;
1062 }
1063
1064
1065 private void handleFeature(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1066 Map<String, Attribute> attrs = getAttributes(parentEvent);
1067 Boolean isFreetext = getAndRemoveBooleanAttributeValue(parentEvent, attrs, IS_FREETEXT, false);
1068 String classValue =getAndRemoveRequiredAttributeValue(parentEvent, attrs, CLASS);
1069 checkNoAttributes(attrs, parentEvent);
1070
1071
1072 Feature feature = makeFeature(classValue, state, parentEvent, null);
1073 Taxon taxon = state.getCurrentTaxon();
1074 TaxonDescription taxonDescription = getTaxonDescription(taxon, state.getConfig().getSourceReference(), NO_IMAGE_GALLERY, CREATE_NEW);
1075 // TextData figureHolderTextData = null; //for use with one TextData for
1076 // all figure only
1077
1078 boolean isDescription = feature.equals(Feature.DESCRIPTION());
1079 DescriptionElementBase lastDescriptionElement = null;
1080
1081 while (reader.hasNext()) {
1082 XMLEvent next = readNoWhitespace(reader);
1083 if (isMyEndingElement(next, parentEvent)) {
1084 state.putFeatureToGeneralSorterList(feature);
1085 return;
1086 } else if (isEndingElement(next, DISTRIBUTION_LIST) || isEndingElement(next, HABITAT_LIST)) {
1087 // only handle list elements
1088 } else if (isStartingElement(next, HEADING)) {
1089 makeFeatureHeading(state, reader, classValue, feature, next);
1090 } else if (isStartingElement(next, WRITER)) {
1091 makeFeatureWriter(state, reader, feature, taxon, next);
1092 // } else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
1093 // if (!feature.equals(Feature.DISTRIBUTION())) {
1094 // String message = "Distribution locality only allowed for feature of type 'distribution'";
1095 // fireWarningEvent(message, next, 4);
1096 // }
1097 // handleDistributionLocality(state, reader, next);
1098 } else if (isStartingElement(next, DISTRIBUTION_LIST) || isStartingElement(next, HABITAT_LIST)) {
1099 // only handle single list elements
1100 } else if (isStartingElement(next, HABITAT)) {
1101 if (!(feature.equals(Feature.HABITAT())
1102 || feature.equals(Feature.HABITAT_ECOLOGY())
1103 || feature.equals(Feature.ECOLOGY()))) {
1104 String message = "Habitat only allowed for feature of type 'habitat','habitat ecology' or 'ecology'";
1105 fireWarningEvent(message, next, 4);
1106 }
1107 handleHabitat(state, reader, next);
1108 } else if (isStartingElement(next, CHAR)) {
1109 List<TextData> textDataList = handleChar(state, reader, next, null);
1110 for (TextData textData : textDataList){
1111 taxonDescription.addElement(textData);
1112 }
1113 } else if (isStartingElement(next, STRING)) {
1114 lastDescriptionElement = makeFeatureString(state, reader,feature, taxonDescription, lastDescriptionElement,next, isFreetext);
1115 } else if (isStartingElement(next, FIGURE_REF)) {
1116 lastDescriptionElement = makeFeatureFigureRef(state, reader, taxonDescription, isDescription, lastDescriptionElement, next);
1117 } else if (isStartingElement(next, REFERENCES)) {
1118 // TODO details/microcitation ??
1119
1120 List<Reference<?>> refs = handleReferences(state, reader, next);
1121 if (!refs.isEmpty()) {
1122 // TODO
1123 Reference<?> descriptionRef = state.getConfig().getSourceReference();
1124 TaxonDescription description = getTaxonDescription(taxon, descriptionRef, false, true);
1125 TextData featurePlaceholder = docImport.getFeaturePlaceholder(state, description, feature, true);
1126 for (Reference<?> citation : refs) {
1127 featurePlaceholder.addSource(null, null, citation, null);
1128 }
1129 } else {
1130 String message = "No reference found in references";
1131 fireWarningEvent(message, next, 6);
1132 }
1133 } else if (isStartingElement(next, NUM)) {
1134 //TODO
1135 handleNotYetImplementedElement(next);
1136 } else if (isEndingElement(next, NUM)) {
1137 //TODO
1138 popUnimplemented(next.asEndElement());
1139 } else {
1140 handleUnexpectedElement(next);
1141 }
1142 }
1143 throw new IllegalStateException("<Feature> has no closing tag");
1144 }
1145
1146 /**
1147 * @param state
1148 * @param reader
1149 * @param taxonDescription
1150 * @param isDescription
1151 * @param lastDescriptionElement
1152 * @param next
1153 * @return
1154 * @throws XMLStreamException
1155 */
1156 private DescriptionElementBase makeFeatureFigureRef(MarkupImportState state, XMLEventReader reader,TaxonDescription taxonDescription,
1157 boolean isDescription, DescriptionElementBase lastDescriptionElement, XMLEvent next) throws XMLStreamException {
1158 FigureDataHolder figureHolder = handleFigureRef(state, reader, next);
1159 Feature figureFeature = getFeature(state, MarkupTransformer.uuidFigures, "Figures", "Figures", "Fig.",null);
1160 if (isDescription) {
1161 TextData figureHolderTextData = null;
1162 // if (figureHolderTextData == null){
1163 figureHolderTextData = TextData.NewInstance(figureFeature);
1164 if (StringUtils.isNotBlank(figureHolder.num)) {
1165 String annotationText = "<num>" + figureHolder.num.trim() + "</num>";
1166 Annotation annotation = Annotation.NewInstance(annotationText, AnnotationType.TECHNICAL(), getDefaultLanguage(state));
1167 figureHolderTextData.addAnnotation(annotation);
1168 }
1169 if (StringUtils.isNotBlank(figureHolder.figurePart)) {
1170 String annotationText = "<figurePart>"+ figureHolder.figurePart.trim() + "</figurePart>";
1171 Annotation annotation = Annotation.NewInstance(annotationText,AnnotationType.EDITORIAL(), getDefaultLanguage(state));
1172 figureHolderTextData.addAnnotation(annotation);
1173 }
1174 // if (StringUtils.isNotBlank(figureText)){
1175 // figureHolderTextData.putText(language, figureText);
1176 // }
1177 taxonDescription.addElement(figureHolderTextData);
1178 // }
1179 registerFigureDemand(state, next, figureHolderTextData, figureHolder.ref);
1180 } else {
1181 if (lastDescriptionElement == null) {
1182 String message = "No description element created yet that can be referred by figure. Create new TextData instead";
1183 fireWarningEvent(message, next, 4);
1184 lastDescriptionElement = TextData.NewInstance(figureFeature);
1185 taxonDescription.addElement(lastDescriptionElement);
1186 }
1187 registerFigureDemand(state, next, lastDescriptionElement, figureHolder.ref);
1188 }
1189 return lastDescriptionElement;
1190 }
1191
1192 /**
1193 * @param state
1194 * @param reader
1195 * @param feature
1196 * @param taxonDescription
1197 * @param lastDescriptionElement
1198 * @param distributionList
1199 * @param next
1200 * @return
1201 * @throws XMLStreamException
1202 */
1203 private DescriptionElementBase makeFeatureString(MarkupImportState state,XMLEventReader reader, Feature feature,
1204 TaxonDescription taxonDescription, DescriptionElementBase lastDescriptionElement, XMLEvent next, Boolean isFreetext) throws XMLStreamException {
1205
1206 //for specimen only
1207 if (feature.equals(Feature.SPECIMEN()) || feature.equals(Feature.MATERIALS_EXAMINED())){
1208
1209
1210 List<DescriptionElementBase> specimens = specimenImport.handleMaterialsExamined(state, reader, next, feature);
1211 for (DescriptionElementBase specimen : specimens){
1212 taxonDescription.addElement(specimen);
1213 lastDescriptionElement = specimen;
1214 }
1215 state.setCurrentCollector(null);
1216
1217 return lastDescriptionElement;
1218 }else{
1219
1220 //others
1221 Map<String, String> subheadingMap = handleString(state, reader, next, feature);
1222 for (String subheading : subheadingMap.keySet()) {
1223 Feature subheadingFeature = feature;
1224 if (StringUtils.isNotBlank(subheading) && subheadingMap.size() > 1) {
1225 subheadingFeature = makeFeature(subheading, state, next, null);
1226 }
1227 if (feature.equals(Feature.COMMON_NAME()) && (isFreetext == null || !isFreetext)){
1228 List<DescriptionElementBase> commonNames = makeVernacular(state, subheading, subheadingMap.get(subheading));
1229 for (DescriptionElementBase commonName : commonNames){
1230 taxonDescription.addElement(commonName);
1231 lastDescriptionElement = commonName;
1232 }
1233 }else {
1234 TextData textData = TextData.NewInstance(subheadingFeature);
1235 textData.putText(getDefaultLanguage(state), subheadingMap.get(subheading));
1236 taxonDescription.addElement(textData);
1237 lastDescriptionElement = textData;
1238 // TODO how to handle figures when these data are split in
1239 // subheadings
1240 }
1241 }
1242 return lastDescriptionElement;
1243 }
1244 }
1245
1246 private List<DescriptionElementBase> makeVernacular(MarkupImportState state, String subheading, String commonNameString) throws XMLStreamException {
1247 List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
1248 String[] splits = commonNameString.split(",");
1249 for (String split : splits){
1250 split = split.trim();
1251 if (! split.matches(".*\\(.*\\)\\.?")){
1252 fireWarningEvent("Common name string '"+split+"' does not match given pattern", state.getReader().peek(), 4);
1253 }
1254
1255 String name = split.replaceAll("\\(.*\\)", "").replace(".", "").trim();
1256 String languageStr = split.replaceFirst(".*\\(", "").replaceAll("\\)\\.?", "").trim();
1257
1258 Language language = null;
1259 if (StringUtils.isNotBlank(languageStr)){
1260 try {
1261 UUID langUuid = state.getTransformer().getLanguageUuid(languageStr);
1262 TermVocabulary<?> voc = null;
1263 language = getLanguage(state, langUuid, languageStr, languageStr, null, voc);
1264 if (language == null){
1265 logger.warn("Language " + languageStr + " not recognized by transformer");
1266 }
1267 } catch (UndefinedTransformerMethodException e) {
1268 throw new RuntimeException(e);
1269 }
1270 }
1271 NamedArea area = null;
1272 CommonTaxonName commonTaxonName = CommonTaxonName.NewInstance(name, language, area);
1273 result.add(commonTaxonName);
1274 }
1275
1276 return result;
1277 }
1278
1279 /**
1280 * @param state
1281 * @param reader
1282 * @param feature
1283 * @param taxon
1284 * @param next
1285 * @throws XMLStreamException
1286 */
1287 private void makeFeatureWriter(MarkupImportState state,XMLEventReader reader, Feature feature, Taxon taxon, XMLEvent next) throws XMLStreamException {
1288 WriterDataHolder writer = handleWriter(state, reader, next);
1289 if (isNotBlank(writer.writer)) {
1290 // TODO
1291 Reference<?> ref = state.getConfig().getSourceReference();
1292 TaxonDescription description = getTaxonDescription(taxon, ref,
1293 false, true);
1294 TextData featurePlaceholder = docImport.getFeaturePlaceholder(state,
1295 description, feature, true);
1296 featurePlaceholder.addAnnotation(writer.annotation);
1297 registerFootnotes(state, featurePlaceholder, writer.footnotes);
1298 } else {
1299 String message = "Writer element is empty";
1300 fireWarningEvent(message, next, 4);
1301 }
1302 }
1303
1304 /**
1305 * @param state
1306 * @param reader
1307 * @param classValue
1308 * @param feature
1309 * @param next
1310 * @throws XMLStreamException
1311 */
1312 private void makeFeatureHeading(MarkupImportState state, XMLEventReader reader, String classValue, Feature feature, XMLEvent next) throws XMLStreamException {
1313 String heading = handleHeading(state, reader, next);
1314 if (StringUtils.isNotBlank(heading)) {
1315 if (!heading.equalsIgnoreCase(classValue)) {
1316 try {
1317 if (!feature.equals(state.getTransformer().getFeatureByKey(
1318 heading))) {
1319 UUID headerFeatureUuid = state.getTransformer()
1320 .getFeatureUuid(heading);
1321 if (!feature.getUuid().equals(headerFeatureUuid)) {
1322 String message = "Feature heading '%s' differs from feature class '%s' and can not be transformed to feature";
1323 message = String.format(message, heading,
1324 classValue);
1325 fireWarningEvent(message, next, 1);
1326 }
1327 }
1328 } catch (UndefinedTransformerMethodException e) {
1329 throw new RuntimeException(e);
1330 }
1331 } else {
1332 // do nothing
1333 }
1334 }
1335 }
1336
1337 private List<Reference<?>> handleReferences(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1338 // attributes
1339 Map<String, Attribute> attributes = getAttributes(parentEvent);
1340 String bibliography = getAndRemoveAttributeValue(attributes,
1341 BIBLIOGRAPHY);
1342 String serialsAbbreviations = getAndRemoveAttributeValue(attributes,
1343 SERIALS_ABBREVIATIONS);
1344 if (isNotBlank(bibliography) || isNotBlank(serialsAbbreviations)) {
1345 String message = "Attributes not yet implemented for <references>";
1346 fireWarningEvent(message, parentEvent, 4);
1347 }
1348
1349 List<Reference<?>> result = new ArrayList<Reference<?>>();
1350
1351 // elements
1352 while (reader.hasNext()) {
1353 XMLEvent next = readNoWhitespace(reader);
1354 if (next.isEndElement()) {
1355 if (isMyEndingElement(next, parentEvent)) {
1356 return result;
1357 } else {
1358 if (isEndingElement(next, HEADING)) {
1359 // NOT YET IMPLEMENTED
1360 popUnimplemented(next.asEndElement());
1361 } else if (isEndingElement(next, WRITER)) {
1362 // NOT YET IMPLEMENTED
1363 popUnimplemented(next.asEndElement());
1364 } else if (isEndingElement(next, FOOTNOTE)) {
1365 // NOT YET IMPLEMENTED
1366 popUnimplemented(next.asEndElement());
1367 } else if (isEndingElement(next, STRING)) {
1368 // NOT YET IMPLEMENTED
1369 popUnimplemented(next.asEndElement());
1370 } else if (isEndingElement(next, REF_NUM)) {
1371 // NOT YET IMPLEMENTED
1372 popUnimplemented(next.asEndElement());
1373 } else {
1374 handleUnexpectedEndElement(next.asEndElement());
1375 }
1376 }
1377 } else if (next.isStartElement()) {
1378 if (isStartingElement(next, HEADING)) {
1379 handleNotYetImplementedElement(next);
1380 } else if (isStartingElement(next, SUB_HEADING)) {
1381 String subheading = getCData(state, reader, next).trim();
1382 String excludePattern = "(i?)(References?|Literature):?";
1383 if (!subheading.matches(excludePattern)) {
1384 fireNotYetImplementedElement(next.getLocation(), next.asStartElement().getName(), 0);
1385 }
1386 } else if (isStartingElement(next, WRITER)) {
1387 handleNotYetImplementedElement(next);
1388 } else if (isStartingElement(next, FOOTNOTE)) {
1389 handleNotYetImplementedElement(next);
1390 } else if (isStartingElement(next, STRING)) {
1391 handleNotYetImplementedElement(next);
1392 } else if (isStartingElement(next, REF_NUM)) {
1393 handleNotYetImplementedElement(next);
1394 } else if (isStartingElement(next, REFERENCE)) {
1395 Reference<?> ref = nomenclatureImport.handleReference(state, reader, next);
1396 result.add(ref);
1397 } else {
1398 handleUnexpectedStartElement(next);
1399 }
1400 } else {
1401 handleUnexpectedElement(next);
1402 }
1403 }
1404 throw new IllegalStateException("<References> has no closing tag");
1405 }
1406
1407 private void handleHabitat(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1408 checkNoAttributes(parentEvent);
1409 Taxon taxon = state.getCurrentTaxon();
1410 // TODO which ref to take?
1411 Reference<?> ref = state.getConfig().getSourceReference();
1412
1413 String text = "";
1414 while (reader.hasNext()) {
1415 XMLEvent next = readNoWhitespace(reader);
1416 if (isMyEndingElement(next, parentEvent)) {
1417 TaxonDescription description = getTaxonDescription(taxon, ref,
1418 false, true);
1419 UUID uuidExtractedHabitat = MarkupTransformer.uuidExtractedHabitat;
1420 Feature feature = getFeature(
1421 state,
1422 uuidExtractedHabitat,
1423 "Extracted Habitat",
1424 "An structured habitat that was extracted from a habitat text",
1425 "extr. habit.", null);
1426 TextData habitat = TextData.NewInstance(feature);
1427 habitat.putText(getDefaultLanguage(state), text);
1428 description.addElement(habitat);
1429
1430 return;
1431 } else if (next.isStartElement()) {
1432 if (isStartingElement(next, ALTITUDE)) {
1433 text = text.trim() + getTaggedCData(state, reader, next);
1434 } else if (isStartingElement(next, LIFE_CYCLE_PERIODS)) {
1435 handleNotYetImplementedElement(next);
1436 } else {
1437 handleUnexpectedStartElement(next.asStartElement());
1438 }
1439 } else if (next.isCharacters()) {
1440 text += next.asCharacters().getData();
1441 } else {
1442 handleUnexpectedElement(next);
1443 }
1444 }
1445 throw new IllegalStateException("<Habitat> has no closing tag");
1446 }
1447
1448 private String getTaggedCData(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1449 checkNoAttributes(parentEvent);
1450
1451 String text = getXmlTag(parentEvent);
1452 while (reader.hasNext()) {
1453 XMLEvent next = readNoWhitespace(reader);
1454 if (isMyEndingElement(next, parentEvent)) {
1455 text += getXmlTag(next);
1456 return text;
1457 } else if (next.isStartElement()) {
1458 text += getTaggedCData(state, reader, next);
1459 } else if (next.isEndElement()) {
1460 text += getTaggedCData(state, reader, next);
1461 } else if (next.isCharacters()) {
1462 text += next.asCharacters().getData();
1463 } else {
1464 handleUnexpectedEndElement(next.asEndElement());
1465 }
1466 }
1467 throw new IllegalStateException("Some tag has no closing tag");
1468 }
1469
1470 private String handleDistributionLocality(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1471 Map<String, Attribute> attributes = getAttributes(parentEvent);
1472 String classValue = getAndRemoveRequiredAttributeValue(parentEvent, attributes, CLASS);
1473 String statusValue =getAndRemoveAttributeValue(attributes, STATUS);
1474 String frequencyValue =getAndRemoveAttributeValue(attributes, FREQUENCY);
1475
1476
1477 Taxon taxon = state.getCurrentTaxon();
1478 // TODO which ref to take?
1479 Reference<?> ref = state.getConfig().getSourceReference();
1480
1481 String text = "";
1482 while (reader.hasNext()) {
1483 XMLEvent next = readNoWhitespace(reader);
1484 if (isMyEndingElement(next, parentEvent)) {
1485 if (StringUtils.isNotBlank(text)) {
1486 String label = CdmUtils.removeTrailingDot(normalize(text));
1487 TaxonDescription description = getTaxonDescription(taxon, ref, false, true);
1488 NamedAreaLevel level = makeNamedAreaLevel(state,classValue, next);
1489
1490 //status
1491 PresenceAbsenceTermBase<?> status = null;
1492 if (isNotBlank(statusValue)){
1493 try {
1494 status = state.getTransformer().getPresenceTermByKey(statusValue);
1495 if (status == null){
1496 //TODO
1497 String message = "The presence/absence status '%s' could not be transformed to an CDM status";
1498 fireWarningEvent(String.format(message, statusValue), next, 4);
1499 }
1500 } catch (UndefinedTransformerMethodException e) {
1501 throw new RuntimeException(e);
1502 }
1503 }else{
1504 status = PresenceTerm.PRESENT();
1505 }
1506 //frequency
1507 if (isNotBlank(frequencyValue)){
1508 String message = "The frequency attribute is currently not yet available in CDM";
1509 fireWarningEvent(message, parentEvent, 6);
1510 }
1511
1512 NamedArea higherArea = null;
1513 List<NamedArea> areas = new ArrayList<NamedArea>();
1514
1515 String patSingleArea = "([^,\\(]{3,})";
1516 String patSeparator = "(,|\\sand\\s)";
1517 String hierarchiePattern = String.format("%s\\((%s(%s%s)*)\\)",patSingleArea, patSingleArea, patSeparator, patSingleArea);
1518 Pattern patHierarchie = Pattern.compile(hierarchiePattern, Pattern.CASE_INSENSITIVE);
1519 Matcher matcher = patHierarchie.matcher(label);
1520 if (matcher.matches()){
1521 String higherAreaStr = matcher.group(1).trim();
1522 higherArea = makeArea(state, higherAreaStr, level);
1523 String[] innerAreas = matcher.group(2).split(patSeparator);
1524 for (String innerArea : innerAreas){
1525 if (isNotBlank(innerArea)){
1526 NamedArea singleArea = makeArea(state, innerArea.trim(), level);
1527 areas.add(singleArea);
1528 NamedArea partOf = singleArea.getPartOf();
1529 // if (partOf == null){
1530 // singleArea.setPartOf(higherArea);
1531 // }
1532 }
1533 }
1534 }else{
1535 NamedArea singleArea = makeArea(state, label, level);
1536 areas.add(singleArea);
1537 }
1538
1539 for (NamedArea area : areas){
1540 //create distribution
1541 Distribution distribution = Distribution.NewInstance(area,status);
1542 description.addElement(distribution);
1543 }
1544 } else {
1545 String message = "Empty distribution locality";
1546 fireWarningEvent(message, next, 4);
1547 }
1548 return text;
1549 } else if (isStartingElement(next, COORDINATES)) {
1550 //TODO
1551 handleNotYetImplementedElement(next);
1552 } else if (isEndingElement(next, COORDINATES)) {
1553 //TODO
1554 popUnimplemented(next.asEndElement());
1555 } else if (next.isCharacters()) {
1556 text += next.asCharacters().getData();
1557 } else {
1558 handleUnexpectedElement(next);
1559 }
1560 }
1561 throw new IllegalStateException("<DistributionLocality> has no closing tag");
1562 }
1563
1564 private String handleHeading(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1565 checkNoAttributes(parentEvent);
1566
1567 String text = "";
1568 while (reader.hasNext()) {
1569 XMLEvent next = readNoWhitespace(reader);
1570 if (isMyEndingElement(next, parentEvent)) {
1571 return text;
1572 } else if (next.isStartElement()) {
1573 if (isStartingElement(next, FOOTNOTE)) {
1574 handleNotYetImplementedElement(next);
1575 } else {
1576 handleUnexpectedStartElement(next.asStartElement());
1577 }
1578 } else if (next.isCharacters()) {
1579 text += next.asCharacters().getData();
1580 } else {
1581 handleUnexpectedEndElement(next.asEndElement());
1582 }
1583 }
1584 throw new IllegalStateException("<String> has no closing tag");
1585
1586 }
1587
1588 /**
1589 * Handle string
1590 * @param state
1591 * @param reader
1592 * @param parentEvent
1593 * @param feature only needed for distributionLocalities
1594 * @return
1595 * @throws XMLStreamException
1596 */
1597 private Map<String, String> handleString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature feature)throws XMLStreamException {
1598 // attributes
1599 String classValue = getClassOnlyAttribute(parentEvent, false);
1600 if (StringUtils.isNotBlank(classValue)) {
1601 String message = "class attribute for <string> not yet implemented";
1602 fireWarningEvent(message, parentEvent, 2);
1603 }
1604
1605 // subheadings
1606 Map<String, String> subHeadingMap = new HashMap<String, String>();
1607 String currentSubheading = null;
1608
1609 boolean isTextMode = true;
1610 String text = "";
1611 while (reader.hasNext()) {
1612 XMLEvent next = readNoWhitespace(reader);
1613 if (isMyEndingElement(next, parentEvent)) {
1614 putCurrentSubheading(subHeadingMap, currentSubheading, text);
1615 return subHeadingMap;
1616 } else if (isStartingElement(next, BR)) {
1617 text += "<br/>";
1618 isTextMode = false;
1619 } else if (isEndingElement(next, BR)) {
1620 isTextMode = true;
1621 } else if (isHtml(next)) {
1622 text += getXmlTag(next);
1623 } else if (isStartingElement(next, SUB_HEADING)) {
1624 text = putCurrentSubheading(subHeadingMap,currentSubheading, text);
1625 // TODO footnotes
1626 currentSubheading = getCData(state, reader, next).trim();
1627 } else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
1628 if (feature != null && !feature.equals(Feature.DISTRIBUTION())) {
1629 String message = "Distribution locality only allowed for feature of type 'distribution'";
1630 fireWarningEvent(message, next, 4);
1631 }
1632 text += handleDistributionLocality(state, reader, next);
1633 } else if (next.isCharacters()) {
1634 if (! isTextMode) {
1635 String message = "String is not in text mode";
1636 fireWarningEvent(message, next, 6);
1637 } else {
1638 text += next.asCharacters().getData();
1639 }
1640 } else if (isStartingElement(next, HEADING)) {
1641 //TODO
1642 handleNotYetImplementedElement(next);
1643 } else if (isStartingElement(next, VERNACULAR_NAMES)) {
1644 //TODO
1645 handleNotYetImplementedElement(next);
1646 } else if (isEndingElement(next, HEADING)) {
1647 //TODO
1648 popUnimplemented(next.asEndElement());
1649 } else if (isStartingElement(next, QUOTE)) {
1650 //TODO
1651 handleNotYetImplementedElement(next);
1652 } else if (isEndingElement(next, QUOTE)) {
1653 //TODO
1654 popUnimplemented(next.asEndElement());
1655 } else if (isStartingElement(next, DEDICATION)) {
1656 //TODO
1657 handleNotYetImplementedElement(next);
1658 } else if (isEndingElement(next, DEDICATION)) {
1659 //TODO
1660 popUnimplemented(next.asEndElement());
1661 } else if (isStartingElement(next, TAXONTYPE)) {
1662 //TODO
1663 handleNotYetImplementedElement(next);
1664 } else if (isEndingElement(next, TAXONTYPE)) {
1665 //TODO
1666 popUnimplemented(next.asEndElement());
1667 } else if (isStartingElement(next, FULL_NAME)) {
1668 //TODO
1669 handleNotYetImplementedElement(next);
1670 } else if (isEndingElement(next, FULL_NAME)) {
1671 //TODO
1672 popUnimplemented(next.asEndElement());
1673 }else if (isStartingElement(next, REFERENCES)) {
1674 //TODO
1675 handleNotYetImplementedElement(next);
1676 } else if (isEndingElement(next, REFERENCES)) {
1677 //TODO
1678 popUnimplemented(next.asEndElement());
1679 } else if (isStartingElement(next, GATHERING)) {
1680 //TODO
1681 handleNotYetImplementedElement(next);
1682 } else if (isEndingElement(next, GATHERING)) {
1683 //TODO
1684 popUnimplemented(next.asEndElement());
1685 } else if (isStartingElement(next, ANNOTATION)) {
1686 //TODO //TODO test handleSimpleAnnotation
1687 handleNotYetImplementedElement(next);
1688 } else if (isEndingElement(next, ANNOTATION)) {
1689 //TODO
1690 popUnimplemented(next.asEndElement());
1691 } else if (isStartingElement(next, HABITAT)) {
1692 //TODO
1693 handleNotYetImplementedElement(next);
1694 } else if (isEndingElement(next, HABITAT)) {
1695 //TODO
1696 popUnimplemented(next.asEndElement());
1697 } else if (isStartingElement(next, FIGURE_REF)) {
1698 //TODO
1699 handleNotYetImplementedElement(next);
1700 } else if (isEndingElement(next, FIGURE_REF)) {
1701 //TODO
1702 popUnimplemented(next.asEndElement());
1703 } else if (isStartingElement(next, FIGURE)) {
1704 //TODO
1705 handleNotYetImplementedElement(next);
1706 } else if (isEndingElement(next, FIGURE)) {
1707 //TODO
1708 popUnimplemented(next.asEndElement());
1709 } else if (isStartingElement(next, FOOTNOTE_REF)) {
1710 //TODO
1711 handleNotYetImplementedElement(next);
1712 } else if (isEndingElement(next, FOOTNOTE_REF)) {
1713 //TODO
1714 popUnimplemented(next.asEndElement());
1715 } else if (isStartingElement(next, FOOTNOTE)) {
1716 //TODO
1717 handleNotYetImplementedElement(next);
1718 } else if (isEndingElement(next, FOOTNOTE)) {
1719 //TODO
1720 popUnimplemented(next.asEndElement());
1721 } else if (isStartingElement(next, WRITER)) {
1722 //TODO
1723 handleNotYetImplementedElement(next);
1724 } else if (isEndingElement(next, WRITER)) {
1725 //TODO
1726 popUnimplemented(next.asEndElement());
1727 } else if (isStartingElement(next, DATES)) {
1728 //TODO
1729 handleNotYetImplementedElement(next);
1730 } else if (isEndingElement(next, DATES)) {
1731 //TODO
1732 popUnimplemented(next.asEndElement());
1733 } else {
1734 handleUnexpectedElement(next);
1735 }
1736 }
1737 throw new IllegalStateException("<String> has no closing tag");
1738 }
1739
1740 /**
1741 * @param subHeadingMap
1742 * @param currentSubheading
1743 * @param text
1744 * @return
1745 */
1746 private String putCurrentSubheading(Map<String, String> subHeadingMap, String currentSubheading, String text) {
1747 if (StringUtils.isNotBlank(text)) {
1748 text = removeStartingMinus(text);
1749 subHeadingMap.put(currentSubheading, text.trim());
1750 }
1751 return "";
1752 }
1753
1754 private String removeStartingMinus(String string) {
1755 string = replaceStart(string, "-");
1756 string = replaceStart(string, "\u002d");
1757 string = replaceStart(string, "\u2013");
1758 string = replaceStart(string, "\u2014");
1759 string = replaceStart(string, "--");
1760 return string;
1761 }
1762
1763 /**
1764 * @param value
1765 * @param replacementString
1766 */
1767 private String replaceStart(String value, String replacementString) {
1768 if (value.startsWith(replacementString) ){
1769 value = value.substring(replacementString.length()).trim();
1770 }
1771 while (value.startsWith("-") || value.startsWith("\u2014") ){
1772 value = value.substring("-".length()).trim();
1773 }
1774 return value;
1775 }
1776
1777 private String getXmlTag(XMLEvent event) {
1778 String result;
1779 if (event.isStartElement()) {
1780 result = "<" + event.asStartElement().getName().getLocalPart()
1781 + ">";
1782 } else if (event.isEndElement()) {
1783 result = "</" + event.asEndElement().getName().getLocalPart() + ">";
1784 } else {
1785 String message = "Only start or end elements are allowed as Html tags";
1786 throw new IllegalStateException(message);
1787 }
1788 return result;
1789 }
1790
1791 protected static final List<String> htmlList = Arrays.asList("sub", "sup",
1792 "ol", "ul", "li", "i", "b", "table", "br","tr","td");
1793
1794 private boolean isHtml(XMLEvent event) {
1795 if (event.isStartElement()) {
1796 String tag = event.asStartElement().getName().getLocalPart();
1797 return htmlList.contains(tag);
1798 } else if (event.isEndElement()) {
1799 String tag = event.asEndElement().getName().getLocalPart();
1800 return htmlList.contains(tag);
1801 } else {
1802 return false;
1803 }
1804
1805 }
1806
1807 /**
1808 * Handle the char or subchar element. As
1809 * @param state the import state
1810 * @param reader
1811 * @param parentEvent
1812 * @param parentFeature in case of subchars we need to attache the newly created feature to a parent feature, should be <code>null</code>
1813 * for top level chars.
1814 * @return List of TextData. Not a single one as the recursive TextData will also be returned
1815 * @throws XMLStreamException
1816 */
1817 private List<TextData> handleChar(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature parentFeature) throws XMLStreamException {
1818 List<TextData> result = new ArrayList<TextData>();
1819 String classValue = getClassOnlyAttribute(parentEvent);
1820 Feature feature = makeFeature(classValue, state, parentEvent, parentFeature);
1821
1822 boolean isTextMode = true;
1823 String text = "";
1824 while (reader.hasNext()) {
1825 XMLEvent next = readNoWhitespace(reader);
1826 if (isMyEndingElement(next, parentEvent)) {
1827 state.putFeatureToCharSorterList(feature);
1828 TextData textData = TextData.NewInstance(feature);
1829 textData.putText(getDefaultLanguage(state), text);
1830 result.add(textData);
1831 return result;
1832 } else if (isStartingElement(next, FIGURE_REF)) {
1833 //TODO
1834 handleNotYetImplementedElement(next);
1835 } else if (isStartingElement(next, FOOTNOTE_REF)) {
1836 //TODO
1837 handleNotYetImplementedElement(next);
1838 } else if (isStartingElement(next, BR)) {
1839 text += "<br/>";
1840 isTextMode = false;
1841 } else if (isEndingElement(next, BR)) {
1842 isTextMode = true;
1843 } else if (isHtml(next)) {
1844 text += getXmlTag(next);
1845 } else if (next.isStartElement()) {
1846 if (isStartingElement(next, ANNOTATION)) {
1847 handleNotYetImplementedElement(next); //TODO test handleSimpleAnnotation
1848 } else if (isStartingElement(next, ITALICS)) {
1849 handleNotYetImplementedElement(next);
1850 } else if (isStartingElement(next, BOLD)) {
1851 handleNotYetImplementedElement(next);
1852 } else if (isStartingElement(next, FIGURE)) {
1853 handleFigure(state, reader, next);
1854 } else if (isStartingElement(next, SUB_CHAR)) {
1855 List<TextData> textData = handleChar(state, reader, next, feature);
1856 result.addAll(textData);
1857 } else if (isStartingElement(next, FOOTNOTE)) {
1858 FootnoteDataHolder footnote = handleFootnote(state, reader, next);
1859 if (footnote.isRef()) {
1860 String message = "Ref footnote not implemented here";
1861 fireWarningEvent(message, next, 4);
1862 } else {
1863 registerGivenFootnote(state, footnote);
1864 }
1865 } else {
1866 handleUnexpectedStartElement(next.asStartElement());
1867 }
1868 } else if (next.isCharacters()) {
1869 if (!isTextMode) {
1870 String message = "String is not in text mode";
1871 fireWarningEvent(message, next, 6);
1872 } else {
1873 text += next.asCharacters().getData();
1874 }
1875 } else {
1876 handleUnexpectedEndElement(next.asEndElement());
1877 }
1878 }
1879 throw new IllegalStateException("RefPart has no closing tag");
1880 }
1881
1882 /**
1883 * @param classValue
1884 * @param state
1885 * @param parentEvent
1886 * @param parentFeature
1887 * @return
1888 * @throws UndefinedTransformerMethodException
1889 */
1890 private Feature makeFeature(String classValue, MarkupImportState state, XMLEvent parentEvent, Feature parentFeature) {
1891 UUID uuid;
1892 try {
1893 String featureText = StringUtils.capitalize(classValue);
1894 if (parentFeature != null){
1895 featureText = "<%s>" + featureText;
1896 featureText = String.format(featureText, parentFeature.getTitleCache());
1897 classValue = "<%s>" + classValue;
1898 classValue = String.format(classValue, parentFeature.getTitleCache());
1899 }
1900
1901
1902 Feature feature = state.getTransformer().getFeatureByKey(classValue);
1903 if (feature != null) {
1904 return feature;
1905 }
1906 uuid = state.getTransformer().getFeatureUuid(classValue);
1907 if (uuid == null) {
1908 uuid = state.getFeatureUuid(classValue);
1909 }
1910 if (uuid == null) {
1911 // TODO
1912 String message = "Uuid is not defined for '%s'";
1913 message = String.format(message, classValue);
1914 fireWarningEvent(message, parentEvent, 8);
1915 uuid = UUID.randomUUID();
1916 state.putFeatureUuid(classValue, uuid);
1917 }
1918
1919 // TODO eFlora vocabulary
1920 TermVocabulary<Feature> voc = null;
1921 feature = getFeature(state, uuid, featureText, featureText, classValue, voc);
1922 if (parentFeature != null){
1923 parentFeature.addIncludes(feature);
1924 save(parentFeature, state);
1925 }
1926 save(feature, state);
1927
1928 if (feature == null) {
1929 throw new NullPointerException(classValue + " not recognized as a feature");
1930 }
1931 // state.putFeatureToCurrentList(feature);
1932 return feature;
1933 } catch (Exception e) {
1934 String message = "Could not create feature for %s: %s";
1935 message = String.format(message, classValue, e.getMessage());
1936 fireWarningEvent(message, parentEvent, 4);
1937 e.printStackTrace();
1938 return Feature.UNKNOWN();
1939 }
1940 }
1941
1942
1943
1944 }