Project

General

Profile

Revision 5abe4a90

ID5abe4a901530322b4fbf2178c37a5b2ddb15f0f8
Parent 7fae61bc
Child 891e3d78

Added by Andreas Müller about 7 years ago

create Markup feature import

View differences:

.gitattributes
419 419
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/LookAheadEventReader.java -text
420 420
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupDocumentImport.java -text
421 421
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupDocumentImportNoComponent.java -text
422
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupFeatureImport.java -text
422 423
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupImportBase.java -text
423 424
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupImportConfigurator.java -text
424 425
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupImportState.java -text
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupDocumentImportNoComponent.java
11 11

  
12 12
import java.net.MalformedURLException;
13 13
import java.net.URL;
14
import java.util.ArrayList;
15
import java.util.Arrays;
16
import java.util.HashMap;
17 14
import java.util.HashSet;
18
import java.util.List;
19 15
import java.util.Map;
20 16
import java.util.Set;
21 17
import java.util.UUID;
22
import java.util.regex.Matcher;
23
import java.util.regex.Pattern;
24 18

  
25 19
import javax.xml.stream.Location;
26 20
import javax.xml.stream.XMLEventReader;
......
32 26
import org.apache.commons.lang.StringUtils;
33 27
import org.apache.log4j.Logger;
34 28

  
35
import eu.etaxonomy.cdm.common.CdmUtils;
36 29
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
37 30
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
38
import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
39
import eu.etaxonomy.cdm.model.common.Annotation;
40
import eu.etaxonomy.cdm.model.common.AnnotationType;
41
import eu.etaxonomy.cdm.model.common.CdmBase;
42
import eu.etaxonomy.cdm.model.common.Extension;
43 31
import eu.etaxonomy.cdm.model.common.ExtensionType;
44 32
import eu.etaxonomy.cdm.model.common.Language;
45
import eu.etaxonomy.cdm.model.common.TermVocabulary;
46
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
47
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
48
import eu.etaxonomy.cdm.model.description.Distribution;
49 33
import eu.etaxonomy.cdm.model.description.Feature;
50 34
import eu.etaxonomy.cdm.model.description.PolytomousKey;
51 35
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
52
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
53
import eu.etaxonomy.cdm.model.description.PresenceTerm;
54 36
import eu.etaxonomy.cdm.model.description.TaxonDescription;
55 37
import eu.etaxonomy.cdm.model.description.TextData;
56
import eu.etaxonomy.cdm.model.location.NamedArea;
57
import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
58
import eu.etaxonomy.cdm.model.media.IdentifiableMediaEntity;
59
import eu.etaxonomy.cdm.model.media.Media;
60 38
import eu.etaxonomy.cdm.model.name.CultivarPlantName;
61 39
import eu.etaxonomy.cdm.model.name.NonViralName;
62 40
import eu.etaxonomy.cdm.model.name.Rank;
63
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
64 41
import eu.etaxonomy.cdm.model.reference.Reference;
65 42
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
66 43
import eu.etaxonomy.cdm.model.taxon.Classification;
......
73 50
 * 
74 51
 */
75 52
public class MarkupDocumentImportNoComponent extends MarkupImportBase {
53
	@SuppressWarnings("unused")
76 54
	private static final Logger logger = Logger.getLogger(MarkupDocumentImportNoComponent.class);
77 55
	
78 56
	private MarkupKeyImport keyImport;
79
	private MarkupSpecimenImport specimenImport;
80 57

  
81
	private MarkupNomenclatureImport nomenclatureImport;
82 58
	private MarkupModsImport modsImport;
59
	private MarkupFeatureImport featureImport;
60
	private MarkupSpecimenImport specimenImport;
61
	private MarkupNomenclatureImport nomenclatureImport;
83 62

  
84 63
	public MarkupDocumentImportNoComponent(MarkupDocumentImport docImport) {
85 64
		super(docImport);
......
87 66
		this.specimenImport = new MarkupSpecimenImport(docImport);
88 67
		this.nomenclatureImport = new MarkupNomenclatureImport(docImport, keyImport, specimenImport);
89 68
		this.modsImport = new MarkupModsImport(docImport);
69
		this.featureImport = new MarkupFeatureImport(docImport, specimenImport, nomenclatureImport);
90 70
	}
91 71

  
92 72
	public void doInvoke(MarkupImportState state) throws XMLStreamException { 
......
398 378
					nomenclatureImport.handleNomenclature(state, reader, next);
399 379
					hasNomenclature = true;
400 380
				} else if (isStartingElement(next, FEATURE)) {
401
					handleFeature(state, reader, next);
381
					featureImport.handleFeature(state, reader, next);
402 382
				} else if (isStartingElement(next, NOTES)) {
403 383
					// TODO is this the correct way to handle notes?
404 384
					String note = handleNotes(state, reader, next);
......
425 405
						desc.addElement(textData);
426 406
					}
427 407
					textData = (TextData)desc.getElements().iterator().next();
428
					makeFeatureFigureRef(state, reader, desc, false, textData, next);
408
					featureImport.makeFeatureFigureRef(state, reader, desc, false, textData, next);
429 409
				} else if (isStartingElement(next, FIGURE)) {
430
					handleFigure(state, reader, next);
410
					handleFigure(state, reader, next, specimenImport, nomenclatureImport);
431 411
				} else if (isStartingElement(next, FOOTNOTE)) {
432
					FootnoteDataHolder footnote = handleFootnote(state, reader,	next);
412
					FootnoteDataHolder footnote = handleFootnote(state, reader,	next, specimenImport, nomenclatureImport);
433 413
					if (footnote.isRef()) {
434 414
						String message = "Ref footnote not implemented here";
435 415
						fireWarningEvent(message, next, 4);
......
643 623

  
644 624
	}
645 625

  
646
	private WriterDataHolder handleWriter(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
647
		String text = "";
648
		checkNoAttributes(parentEvent);
649
		WriterDataHolder dataHolder = new WriterDataHolder();
650
		List<FootnoteDataHolder> footnotes = new ArrayList<FootnoteDataHolder>();
651

  
652
		// TODO handle attributes
653
		while (reader.hasNext()) {
654
			XMLEvent next = readNoWhitespace(reader);
655
			if (isMyEndingElement(next, parentEvent)) {
656
				text = CdmUtils.removeBrackets(text);
657
				if (checkMandatoryText(text, parentEvent)) {
658
					text = normalize(text);
659
					dataHolder.writer = text;
660
					dataHolder.footnotes = footnotes;
661

  
662
					// Extension
663
					UUID uuidWriterExtension = MarkupTransformer.uuidWriterExtension;
664
					ExtensionType writerExtensionType = this
665
							.getExtensionType(state, uuidWriterExtension,
666
									"Writer", "writer", "writer");
667
					Extension extension = Extension.NewInstance();
668
					extension.setType(writerExtensionType);
669
					extension.setValue(text);
670
					dataHolder.extension = extension;
671

  
672
					// Annotation
673
					UUID uuidWriterAnnotation = MarkupTransformer.uuidWriterAnnotation;
674
					AnnotationType writerAnnotationType = this.getAnnotationType(state, uuidWriterAnnotation, "Writer", "writer", "writer", null);
675
					Annotation annotation = Annotation.NewInstance(text, writerAnnotationType, getDefaultLanguage(state));
676
					dataHolder.annotation = annotation;
677

  
678
					return dataHolder;
679
				} else {
680
					return null;
681
				}
682
			} else if (isStartingElement(next, FOOTNOTE_REF)) {
683
				FootnoteDataHolder footNote = handleFootnoteRef(state, reader, next);
684
				if (footNote.isRef()) {
685
					footnotes.add(footNote);
686
				} else {
687
					logger.warn("Non ref footnotes not yet impelemnted");
688
				}
689
			} else if (next.isCharacters()) {
690
				text += next.asCharacters().getData();
691

  
692
			} else {
693
				handleUnexpectedElement(next);
694
				state.setUnsuccessfull();
695
			}
696
		}
697
		throw new IllegalStateException("<writer> has no end tag");
698
	}
699

  
700
	private void registerFootnotes(MarkupImportState state, AnnotatableEntity entity, List<FootnoteDataHolder> footnotes) {
701
		for (FootnoteDataHolder footNote : footnotes) {
702
			registerFootnoteDemand(state, entity, footNote);
703
		}
704
	}
705

  
706
	private void registerGivenFootnote(MarkupImportState state, FootnoteDataHolder footnote) {
707
		state.registerFootnote(footnote);
708
		Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.id);
709
		if (demands != null) {
710
			for (AnnotatableEntity entity : demands) {
711
				attachFootnote(state, entity, footnote);
712
			}
713
		}
714
	}
715

  
716
	private void registerGivenFigure(MarkupImportState state, XMLEvent next, String id, Media figure) {
717
		state.registerFigure(id, figure);
718
		Set<AnnotatableEntity> demands = state.getFigureDemands(id);
719
		if (demands != null) {
720
			for (AnnotatableEntity entity : demands) {
721
				attachFigure(state, next, entity, figure);
722
			}
723
		}
724
		save(figure, state);
725
	}
726

  
727
	private void registerFootnoteDemand(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
728
		FootnoteDataHolder existingFootnote = state.getFootnote(footnote.ref);
729
		if (existingFootnote != null) {
730
			attachFootnote(state, entity, existingFootnote);
731
		} else {
732
			Set<AnnotatableEntity> demands = state.getFootnoteDemands(footnote.ref);
733
			if (demands == null) {
734
				demands = new HashSet<AnnotatableEntity>();
735
				state.putFootnoteDemands(footnote.ref, demands);
736
			}
737
			demands.add(entity);
738
		}
739
	}
740

  
741
	private void registerFigureDemand(MarkupImportState state, XMLEvent next, AnnotatableEntity entity, String figureRef) {
742
		Media existingFigure = state.getFigure(figureRef);
743
		if (existingFigure != null) {
744
			attachFigure(state, next, entity, existingFigure);
745
		} else {
746
			Set<AnnotatableEntity> demands = state.getFigureDemands(figureRef);
747
			if (demands == null) {
748
				demands = new HashSet<AnnotatableEntity>();
749
				state.putFigureDemands(figureRef, demands);
750
			}
751
			demands.add(entity);
752
		}
753
	}
754

  
755
	private void attachFootnote(MarkupImportState state, AnnotatableEntity entity, FootnoteDataHolder footnote) {
756
		AnnotationType annotationType = this.getAnnotationType(state, MarkupTransformer.uuidFootnote, "Footnote", "An e-flora footnote", "fn", null);
757
		Annotation annotation = Annotation.NewInstance(footnote.string, annotationType, getDefaultLanguage(state));
758
		// TODO transient objects
759
		entity.addAnnotation(annotation);
760
		save(entity, state);
761
	}
762

  
763
	private void attachFigure(MarkupImportState state, XMLEvent next, AnnotatableEntity entity, Media figure) {
764
		// IdentifiableEntity<?> toSave;
765
		if (entity.isInstanceOf(TextData.class)) {
766
			TextData deb = CdmBase.deproxy(entity, TextData.class);
767
			deb.addMedia(figure);
768
			// toSave = ((TaxonDescription)deb.getInDescription()).getTaxon();
769
		} else if (entity.isInstanceOf(SpecimenOrObservationBase.class)) {
770
			String message = "figures for specimen should be handled as Textdata";
771
			fireWarningEvent(message, next, 4);
772
			// toSave = ime;
773
		} else if (entity.isInstanceOf(IdentifiableMediaEntity.class)) {
774
			IdentifiableMediaEntity<?> ime = CdmBase.deproxy(entity, IdentifiableMediaEntity.class);
775
			ime.addMedia(figure);
776
			// toSave = ime;
777
		} else {
778
			String message = "Unsupported entity to attach media: %s";
779
			message = String.format(message, entity.getClass().getName());
780
			// toSave = null;
781
		}
782
		save(entity, state);
783
	}
784

  
785
	private Media handleFigure(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
786
		// FigureDataHolder result = new FigureDataHolder();
787

  
788
		Map<String, Attribute> attributes = getAttributes(parentEvent);
789
		String id = getAndRemoveAttributeValue(attributes, ID);
790
		String type = getAndRemoveAttributeValue(attributes, TYPE);
791
		String urlAttr = getAndRemoveAttributeValue(attributes, URL);
792
		checkNoAttributes(attributes, parentEvent);
793

  
794
		String urlString = null;
795
		String legendString = null;
796
		String titleString = null;
797
		String numString = null;
798
		String text = null;
799
		if (isNotBlank(urlAttr)){
800
			urlString = CdmUtils.Nz(state.getBaseMediaUrl()) + urlAttr;
801
		}
802
		while (reader.hasNext()) {
803
			XMLEvent next = readNoWhitespace(reader);
804
			if (isMyEndingElement(next, parentEvent)) {
805
				if (isNotBlank(text)){
806
					fireWarningEvent("Text not yet handled for figures: " + text, next, 4);
807
				}
808
				Media media = makeFigure(state, id, type, urlString, legendString, titleString, numString, next);
809
				return media;
810
			} else if (isStartingElement(next, FIGURE_LEGEND)) {
811
				// TODO same as figure string ?
812
				legendString = handleFootnoteString(state, reader, next);
813
			} else if (isStartingElement(next, FIGURE_TITLE)) {
814
				titleString = getCData(state, reader, next);
815
			} else if (isStartingElement(next, URL)) {
816
				String localUrl = getCData(state, reader, next);
817
				String url = CdmUtils.Nz(state.getBaseMediaUrl()) + localUrl;
818
				if (isBlank(urlString)){
819
					urlString = url;
820
				}
821
				if (! url.equals(urlString)){
822
					String message = "URL attribute and URL element differ. Attribute: %s, Element: %s";
823
					fireWarningEvent(String.format(message, urlString, url), next, 2);
824
				}
825
			} else if (isStartingElement(next, NUM)) {
826
				numString = getCData(state, reader, next);
827
			} else if (next.isCharacters()) {
828
				text += CdmUtils.concat("", text, next.asCharacters().getData());
829
			} else {
830
				fireUnexpectedEvent(next, 0);
831
			}
832
		}
833
		throw new IllegalStateException("<figure> has no end tag");
834
	}
835

  
836
	/**
837
	 * @param state
838
	 * @param id
839
	 * @param type
840
	 * @param urlString
841
	 * @param legendString
842
	 * @param titleString
843
	 * @param numString
844
	 * @param next
845
	 */
846
	private Media makeFigure(MarkupImportState state, String id, String type, String urlString, 
847
			String legendString, String titleString, String numString, XMLEvent next) {
848
		Media media = null;
849
		boolean isFigure = false;
850
		try {
851
			//TODO maybe everything is a figure as it is all taken from a book
852
			if ("lineart".equals(type)) {
853
				isFigure = true;
854
//				media = Figure.NewInstance(url.toURI(), null, null,	null);
855
			} else if (type == null || "photo".equals(type)
856
					|| "signature".equals(type)
857
					|| "others".equals(type)) {
858
				//TODO
859
			} else {
860
				String message = "Unknown figure type '%s'";
861
				message = String.format(message, type);
862
				fireWarningEvent(message, next, 2);
863
			}
864
			media = docImport.getImageMedia(urlString, docImport.getReadMediaData(), isFigure);
865
			
866
			if (media != null){
867
				// title
868
				if (StringUtils.isNotBlank(titleString)) {
869
					media.putTitle(getDefaultLanguage(state), titleString);
870
				}
871
				// legend
872
				if (StringUtils.isNotBlank(legendString)) {
873
					media.addDescription(legendString, getDefaultLanguage(state));
874
				}
875
				if (StringUtils.isNotBlank(numString)) {
876
					// TODO use concrete source (e.g. DAPHNIPHYLLACEAE in FM
877
					// vol.13)
878
					Reference<?> citation = state.getConfig().getSourceReference();
879
					media.addSource(numString, "num", citation, null);
880
					// TODO name used in source if available
881
				}
882
				// TODO which citation
883
				if (StringUtils.isNotBlank(id)) {
884
					media.addSource(id, null, state.getConfig().getSourceReference(), null);
885
				} else {
886
					String message = "Figure id should never be empty or null";
887
					fireWarningEvent(message, next, 6);
888
				}
889

  
890
				// text
891
				// do nothing
892
				registerGivenFigure(state, next, id, media);
893
				
894
			}else{
895
				String message = "No media found: ";
896
				fireWarningEvent(message, next, 4);
897
			}
898
		} catch (MalformedURLException e) {
899
			String message = "Media uri has incorrect syntax: %s";
900
			message = String.format(message, urlString);
901
			fireWarningEvent(message, next, 4);
902
//		} catch (URISyntaxException e) {
903
//			String message = "Media uri has incorrect syntax: %s";
904
//			message = String.format(message, urlString);
905
//			fireWarningEvent(message, next, 4);
906
		}
907

  
908
		return media;
909
	}
910

  
911
	private FigureDataHolder handleFigureRef(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent)
912
			throws XMLStreamException {
913
		FigureDataHolder result = new FigureDataHolder();
914
		Map<String, Attribute> attributes = getAttributes(parentEvent);
915
		result.ref = getAndRemoveAttributeValue(attributes, REF);
916
		checkNoAttributes(attributes, parentEvent);
917

  
918
		// text is not handled, needed only for debugging purposes
919
		String text = "";
920
		while (reader.hasNext()) {
921
			XMLEvent next = readNoWhitespace(reader);
922
			if (isMyEndingElement(next, parentEvent)) {
923
				return result;
924
			} else if (isStartingElement(next, NUM)) {
925
				String num = getCData(state, reader, next);
926
				result.num = num; // num is not handled during import
927
			} else if (isStartingElement(next, FIGURE_PART)) {
928
				result.figurePart = getCData(state, reader, next);
929
			} else if (next.isCharacters()) {
930
				text += next.asCharacters().getData();
931
			} else {
932
				fireUnexpectedEvent(next, 0);
933
			}
934
		}
935
		throw new IllegalStateException("<figureRef> has no end tag");
936
	}
937

  
938
	private FootnoteDataHolder handleFootnote(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
939
		FootnoteDataHolder result = new FootnoteDataHolder();
940
		Map<String, Attribute> attributes = getAttributes(parentEvent);
941
		result.id = getAndRemoveAttributeValue(attributes, ID);
942
		// result.ref = getAndRemoveAttributeValue(attributes, REF);
943
		checkNoAttributes(attributes, parentEvent);
944

  
945
		while (reader.hasNext()) {
946
			XMLEvent next = readNoWhitespace(reader);
947
			if (isStartingElement(next, FOOTNOTE_STRING)) {
948
				String string = handleFootnoteString(state, reader, next);
949
				result.string = string;
950
			} else if (isMyEndingElement(next, parentEvent)) {
951
				return result;
952
			} else {
953
				fireUnexpectedEvent(next, 0);
954
			}
955
		}
956
		return result;
957
	}
958

  
959
	private FootnoteDataHolder handleFootnoteRef(MarkupImportState state,
960
			XMLEventReader reader, XMLEvent parentEvent)
961
			throws XMLStreamException {
962
		FootnoteDataHolder result = new FootnoteDataHolder();
963
		Map<String, Attribute> attributes = getAttributes(parentEvent);
964
		result.ref = getAndRemoveAttributeValue(attributes, REF);
965
		checkNoAttributes(attributes, parentEvent);
966

  
967
		// text is not handled, needed only for debugging purposes
968
		String text = "";
969
		while (reader.hasNext()) {
970
			XMLEvent next = readNoWhitespace(reader);
971
			// if (isStartingElement(next, FOOTNOTE_STRING)){
972
			// String string = handleFootnoteString(state, reader, next);
973
			// result.string = string;
974
			// }else
975
			if (isMyEndingElement(next, parentEvent)) {
976
				return result;
977
			} else if (next.isCharacters()) {
978
				text += next.asCharacters().getData();
979

  
980
			} else {
981
				fireUnexpectedEvent(next, 0);
982
			}
983
		}
984
		return result;
985
	}
986

  
987

  
988

  
989
	private String handleFootnoteString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
990
		boolean isTextMode = true;
991
		String text = "";
992
		while (reader.hasNext()) {
993
			XMLEvent next = readNoWhitespace(reader);
994
			if (isMyEndingElement(next, parentEvent)) {
995
				return text;
996
			} else if (next.isEndElement()) {
997
				if (isEndingElement(next, FULL_NAME)) {
998
					popUnimplemented(next.asEndElement());
999
				} else if (isEndingElement(next, BR)) {
1000
					isTextMode = true;
1001
				} else if (isHtml(next)) {
1002
					text += getXmlTag(next);
1003
				} else {
1004
					handleUnexpectedEndElement(next.asEndElement());
1005
				}
1006
			} else if (next.isStartElement()) {
1007
				if (isStartingElement(next, FULL_NAME)) {
1008
					handleNotYetImplementedElement(next);
1009
				} else if (isStartingElement(next, GATHERING)) {
1010
					text += specimenImport.handleInLineGathering(state, reader, next);
1011
				} else if (isStartingElement(next, REFERENCES)) {
1012
					text += " " + handleInLineReferences(state, reader, next)+ " ";
1013
				} else if (isStartingElement(next, BR)) {
1014
					text += "<br/>";
1015
					isTextMode = false;
1016
				} else if (isStartingElement(next, NOMENCLATURE)) {
1017
					handleNotYetImplementedElement(next);
1018
				} else if (isHtml(next)) {
1019
					text += getXmlTag(next);
1020
				} else {
1021
					handleUnexpectedStartElement(next.asStartElement());
1022
				}
1023
			} else if (next.isCharacters()) {
1024
				if (!isTextMode) {
1025
					String message = "footnoteString is not in text mode";
1026
					fireWarningEvent(message, next, 6);
1027
				} else {
1028
					text += next.asCharacters().getData().trim(); 
1029
					// getCData(state, reader, next); does not work as we have inner tags like <references>
1030
				}
1031
			} else {
1032
				handleUnexpectedEndElement(next.asEndElement());
1033
			}
1034
		}
1035
		throw new IllegalStateException("<footnoteString> has no closing tag");
1036

  
1037
	}
1038

  
1039
	private String handleInLineReferences(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1040
		checkNoAttributes(parentEvent);
1041

  
1042
		boolean hasReference = false;
1043
		String text = "";
1044
		while (reader.hasNext()) {
1045
			XMLEvent next = readNoWhitespace(reader);
1046
			if (isMyEndingElement(next, parentEvent)) {
1047
				checkMandatoryElement(hasReference, parentEvent.asStartElement(), REFERENCE);
1048
				return text;
1049
			} else if (isStartingElement(next, REFERENCE)) {
1050
				text += handleInLineReference(state, reader, next);
1051
				hasReference = true;
1052
			} else {
1053
				handleUnexpectedElement(next);
1054
			}
1055
		}
1056
		throw new IllegalStateException("<References> has no closing tag");
1057
	}
1058

  
1059
	private String handleInLineReference(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1060
		Reference<?> reference = nomenclatureImport.handleReference(state, reader, parentEvent);
1061
		String result = "<cdm:ref uuid='%s'>%s</ref>";
1062
		result = String.format(result, reference.getUuid(), reference.getTitleCache());
1063
		save(reference, state);
1064
		return result;
1065
	}
1066

  
1067
	private void handleFeature(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1068
		Map<String, Attribute> attrs = getAttributes(parentEvent);
1069
		Boolean isFreetext = getAndRemoveBooleanAttributeValue(parentEvent, attrs, IS_FREETEXT, false);
1070
		String classValue =getAndRemoveRequiredAttributeValue(parentEvent, attrs, CLASS);
1071
		checkNoAttributes(attrs, parentEvent);
1072
		
1073
		
1074
		Feature feature = makeFeature(classValue, state, parentEvent, null);
1075
		Taxon taxon = state.getCurrentTaxon();
1076
		TaxonDescription taxonDescription = getTaxonDescription(taxon, state.getConfig().getSourceReference(), NO_IMAGE_GALLERY, CREATE_NEW);
1077
		// TextData figureHolderTextData = null; //for use with one TextData for
1078
		// all figure only
1079

  
1080
		boolean isDescription = feature.equals(Feature.DESCRIPTION());
1081
		DescriptionElementBase lastDescriptionElement = null;
1082
		
1083
		while (reader.hasNext()) {
1084
			XMLEvent next = readNoWhitespace(reader);
1085
			if (isMyEndingElement(next, parentEvent)) {
1086
				state.putFeatureToGeneralSorterList(feature);
1087
				return;
1088
			} else if (isEndingElement(next, DISTRIBUTION_LIST) || isEndingElement(next, HABITAT_LIST)) { 
1089
				// only handle list elements
1090
			} else if (isStartingElement(next, HEADING)) {
1091
				makeFeatureHeading(state, reader, classValue, feature, next);
1092
			} else if (isStartingElement(next, WRITER)) {
1093
				makeFeatureWriter(state, reader, feature, taxon, next);
1094
//			} else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
1095
//				if (!feature.equals(Feature.DISTRIBUTION())) {
1096
//					String message = "Distribution locality only allowed for feature of type 'distribution'";
1097
//					fireWarningEvent(message, next, 4);
1098
//				}
1099
//				handleDistributionLocality(state, reader, next);
1100
			} else if (isStartingElement(next, DISTRIBUTION_LIST) || isStartingElement(next, HABITAT_LIST)) {
1101
				// only handle single list elements
1102
			} else if (isStartingElement(next, HABITAT)) {
1103
				if (!(feature.equals(Feature.HABITAT())
1104
						|| feature.equals(Feature.HABITAT_ECOLOGY()) 
1105
						|| feature.equals(Feature.ECOLOGY()))) {
1106
					String message = "Habitat only allowed for feature of type 'habitat','habitat ecology' or 'ecology'";
1107
					fireWarningEvent(message, next, 4);
1108
				}
1109
				handleHabitat(state, reader, next);
1110
			} else if (isStartingElement(next, CHAR)) {
1111
				List<TextData> textDataList = handleChar(state, reader, next, null);
1112
				for (TextData textData : textDataList){
1113
					taxonDescription.addElement(textData);
1114
				}
1115
			} else if (isStartingElement(next, STRING)) {
1116
				lastDescriptionElement = makeFeatureString(state, reader,feature, taxonDescription, lastDescriptionElement,next, isFreetext);
1117
			} else if (isStartingElement(next, FIGURE_REF)) {
1118
				lastDescriptionElement = makeFeatureFigureRef(state, reader, taxonDescription, isDescription, lastDescriptionElement, next);
1119
			} else if (isStartingElement(next, REFERENCES)) {
1120
				// TODO details/microcitation ??
1121

  
1122
				List<Reference<?>> refs = handleReferences(state, reader, next);
1123
				if (!refs.isEmpty()) {
1124
					// TODO
1125
					Reference<?> descriptionRef = state.getConfig().getSourceReference();
1126
					TaxonDescription description = getTaxonDescription(taxon, descriptionRef, false, true);
1127
					TextData featurePlaceholder = docImport.getFeaturePlaceholder(state, description, feature, true);
1128
					for (Reference<?> citation : refs) {
1129
						featurePlaceholder.addSource(null, null, citation, null);
1130
					}
1131
				} else {
1132
					String message = "No reference found in references";
1133
					fireWarningEvent(message, next, 6);
1134
				}
1135
			} else if (isStartingElement(next, NUM)) {
1136
				//TODO
1137
				handleNotYetImplementedElement(next);
1138
			} else if (isEndingElement(next, NUM)) {
1139
				//TODO
1140
				popUnimplemented(next.asEndElement());
1141
			} else {
1142
				handleUnexpectedElement(next);
1143
			}
1144
		}
1145
		throw new IllegalStateException("<Feature> has no closing tag");
1146
	}
1147

  
1148
	/**
1149
	 * @param state
1150
	 * @param reader
1151
	 * @param taxonDescription
1152
	 * @param isDescription
1153
	 * @param lastDescriptionElement
1154
	 * @param next
1155
	 * @return
1156
	 * @throws XMLStreamException
1157
	 */
1158
	private DescriptionElementBase makeFeatureFigureRef(MarkupImportState state, XMLEventReader reader,TaxonDescription taxonDescription, 
1159
					boolean isDescription, DescriptionElementBase lastDescriptionElement, XMLEvent next) throws XMLStreamException {
1160
		FigureDataHolder figureHolder = handleFigureRef(state, reader, next);
1161
		Feature figureFeature = getFeature(state, MarkupTransformer.uuidFigures, "Figures", "Figures", "Fig.",null);
1162
		if (isDescription) {
1163
			TextData figureHolderTextData = null;
1164
			// if (figureHolderTextData == null){
1165
			figureHolderTextData = TextData.NewInstance(figureFeature);
1166
			if (StringUtils.isNotBlank(figureHolder.num)) {
1167
				String annotationText = "<num>" + figureHolder.num.trim() + "</num>";
1168
				Annotation annotation = Annotation.NewInstance(annotationText, AnnotationType.TECHNICAL(), getDefaultLanguage(state));
1169
				figureHolderTextData.addAnnotation(annotation);
1170
			}
1171
			if (StringUtils.isNotBlank(figureHolder.figurePart)) {
1172
				String annotationText = "<figurePart>"+ figureHolder.figurePart.trim() + "</figurePart>";
1173
				Annotation annotation = Annotation.NewInstance(annotationText,AnnotationType.EDITORIAL(), getDefaultLanguage(state));
1174
				figureHolderTextData.addAnnotation(annotation);
1175
			}
1176
			// if (StringUtils.isNotBlank(figureText)){
1177
			// figureHolderTextData.putText(language, figureText);
1178
			// }
1179
			taxonDescription.addElement(figureHolderTextData);
1180
			// }
1181
			registerFigureDemand(state, next, figureHolderTextData, figureHolder.ref);
1182
		} else {
1183
			if (lastDescriptionElement == null) {
1184
				String message = "No description element created yet that can be referred by figure. Create new TextData instead";
1185
				fireWarningEvent(message, next, 4);
1186
				lastDescriptionElement = TextData.NewInstance(figureFeature);
1187
				taxonDescription.addElement(lastDescriptionElement);
1188
			}
1189
			registerFigureDemand(state, next, lastDescriptionElement,	figureHolder.ref);
1190
		}
1191
		return lastDescriptionElement;
1192
	}
1193

  
1194
	/**
1195
	 * @param state
1196
	 * @param reader
1197
	 * @param feature
1198
	 * @param taxonDescription
1199
	 * @param lastDescriptionElement
1200
	 * @param distributionList 
1201
	 * @param next
1202
	 * @return
1203
	 * @throws XMLStreamException
1204
	 */
1205
	private DescriptionElementBase makeFeatureString(MarkupImportState state,XMLEventReader reader, Feature feature, 
1206
				TaxonDescription taxonDescription, DescriptionElementBase lastDescriptionElement, XMLEvent next, Boolean isFreetext) throws XMLStreamException {
1207
		
1208
		//for specimen only
1209
		if (feature.equals(Feature.SPECIMEN()) || feature.equals(Feature.MATERIALS_EXAMINED())){
1210
			
1211
			List<DescriptionElementBase> specimens = specimenImport.handleMaterialsExamined(state, reader, next, feature);
1212
			for (DescriptionElementBase specimen : specimens){
1213
				taxonDescription.addElement(specimen);
1214
				lastDescriptionElement = specimen;
1215
			}
1216
			state.setCurrentCollector(null);
1217
			
1218
			return lastDescriptionElement;
1219
		}else{
1220
		
1221
			//others
1222
			Map<String, String> subheadingMap = handleString(state, reader, next, feature);
1223
			for (String subheading : subheadingMap.keySet()) {
1224
				Feature subheadingFeature = feature;
1225
				if (StringUtils.isNotBlank(subheading) && subheadingMap.size() > 1) {
1226
					subheadingFeature = makeFeature(subheading, state, next, null);
1227
				}
1228
				if (feature.equals(Feature.COMMON_NAME()) && (isFreetext == null || !isFreetext)){
1229
					List<DescriptionElementBase> commonNames = makeVernacular(state, subheading, subheadingMap.get(subheading));
1230
					for (DescriptionElementBase commonName : commonNames){
1231
						taxonDescription.addElement(commonName);
1232
						lastDescriptionElement = commonName;
1233
					}
1234
				}else {
1235
					TextData textData = TextData.NewInstance(subheadingFeature);
1236
					textData.putText(getDefaultLanguage(state), subheadingMap.get(subheading));
1237
					taxonDescription.addElement(textData);
1238
					lastDescriptionElement = textData;
1239
					// TODO how to handle figures when these data are split in
1240
					// subheadings
1241
				}
1242
			}
1243
			return lastDescriptionElement;
1244
		}
1245
	}
1246

  
1247
	private List<DescriptionElementBase> makeVernacular(MarkupImportState state, String subheading, String commonNameString) throws XMLStreamException {
1248
		List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
1249
		String[] splits = commonNameString.split(",");
1250
		for (String split : splits){
1251
			split = split.trim();
1252
			if (! split.matches(".*\\(.*\\)\\.?")){
1253
				fireWarningEvent("Common name string '"+split+"' does not match given pattern", state.getReader().peek(), 4);
1254
			}
1255
			
1256
			String name = split.replaceAll("\\(.*\\)", "").replace(".", "").trim();
1257
			String languageStr = split.replaceFirst(".*\\(", "").replaceAll("\\)\\.?", "").trim();
1258
			
1259
			Language language = null;
1260
			if (StringUtils.isNotBlank(languageStr)){
1261
				try {
1262
					UUID langUuid = state.getTransformer().getLanguageUuid(languageStr);
1263
					TermVocabulary<?> voc = null;
1264
					language = getLanguage(state, langUuid, languageStr, languageStr, null, voc);
1265
					if (language == null){
1266
						logger.warn("Language " + languageStr + " not recognized by transformer");
1267
					}
1268
				} catch (UndefinedTransformerMethodException e) {
1269
					throw new RuntimeException(e);
1270
				}
1271
			}
1272
			NamedArea area = null;
1273
			CommonTaxonName commonTaxonName = CommonTaxonName.NewInstance(name, language, area);
1274
			result.add(commonTaxonName);
1275
		}
1276
		
1277
		return result;
1278
	}
1279

  
1280
	/**
1281
	 * @param state
1282
	 * @param reader
1283
	 * @param feature
1284
	 * @param taxon
1285
	 * @param next
1286
	 * @throws XMLStreamException
1287
	 */
1288
	private void makeFeatureWriter(MarkupImportState state,XMLEventReader reader, Feature feature, Taxon taxon, XMLEvent next) throws XMLStreamException {
1289
		WriterDataHolder writer = handleWriter(state, reader, next);
1290
		if (isNotBlank(writer.writer)) {
1291
			// TODO
1292
			Reference<?> ref = state.getConfig().getSourceReference();
1293
			TaxonDescription description = getTaxonDescription(taxon, ref,
1294
					false, true);
1295
			TextData featurePlaceholder = docImport.getFeaturePlaceholder(state,
1296
					description, feature, true);
1297
			featurePlaceholder.addAnnotation(writer.annotation);
1298
			registerFootnotes(state, featurePlaceholder, writer.footnotes);
1299
		} else {
1300
			String message = "Writer element is empty";
1301
			fireWarningEvent(message, next, 4);
1302
		}
1303
	}
1304

  
1305
	/**
1306
	 * @param state
1307
	 * @param reader
1308
	 * @param classValue
1309
	 * @param feature
1310
	 * @param next
1311
	 * @throws XMLStreamException
1312
	 */
1313
	private void makeFeatureHeading(MarkupImportState state, XMLEventReader reader, String classValue, Feature feature, XMLEvent next) throws XMLStreamException {
1314
		String heading = handleHeading(state, reader, next);
1315
		if (StringUtils.isNotBlank(heading)) {
1316
			if (!heading.equalsIgnoreCase(classValue)) {
1317
				try {
1318
					if (!feature.equals(state.getTransformer().getFeatureByKey(
1319
							heading))) {
1320
						UUID headerFeatureUuid = state.getTransformer()
1321
								.getFeatureUuid(heading);
1322
						if (!feature.getUuid().equals(headerFeatureUuid)) {
1323
							String message = "Feature heading '%s' differs from feature class '%s' and can not be transformed to feature";
1324
							message = String.format(message, heading,
1325
									classValue);
1326
							fireWarningEvent(message, next, 1);
1327
						}
1328
					}
1329
				} catch (UndefinedTransformerMethodException e) {
1330
					throw new RuntimeException(e);
1331
				}
1332
			} else {
1333
				// do nothing
1334
			}
1335
		}
1336
	}
1337

  
1338
	private List<Reference<?>> handleReferences(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1339
		// attributes
1340
		Map<String, Attribute> attributes = getAttributes(parentEvent);
1341
		String bibliography = getAndRemoveAttributeValue(attributes,
1342
				BIBLIOGRAPHY);
1343
		String serialsAbbreviations = getAndRemoveAttributeValue(attributes,
1344
				SERIALS_ABBREVIATIONS);
1345
		if (isNotBlank(bibliography) || isNotBlank(serialsAbbreviations)) {
1346
			String message = "Attributes not yet implemented for <references>";
1347
			fireWarningEvent(message, parentEvent, 4);
1348
		}
1349

  
1350
		List<Reference<?>> result = new ArrayList<Reference<?>>();
1351

  
1352
		// elements
1353
		while (reader.hasNext()) {
1354
			XMLEvent next = readNoWhitespace(reader);
1355
			if (next.isEndElement()) {
1356
				if (isMyEndingElement(next, parentEvent)) {
1357
					return result;
1358
				} else {
1359
					if (isEndingElement(next, HEADING)) {
1360
						// NOT YET IMPLEMENTED
1361
						popUnimplemented(next.asEndElement());
1362
					} else if (isEndingElement(next, WRITER)) {
1363
						// NOT YET IMPLEMENTED
1364
						popUnimplemented(next.asEndElement());
1365
					} else if (isEndingElement(next, FOOTNOTE)) {
1366
						// NOT YET IMPLEMENTED
1367
						popUnimplemented(next.asEndElement());
1368
					} else if (isEndingElement(next, STRING)) {
1369
						// NOT YET IMPLEMENTED
1370
						popUnimplemented(next.asEndElement());
1371
					} else if (isEndingElement(next, REF_NUM)) {
1372
						// NOT YET IMPLEMENTED
1373
						popUnimplemented(next.asEndElement());
1374
					} else {
1375
						handleUnexpectedEndElement(next.asEndElement());
1376
					}
1377
				}
1378
			} else if (next.isStartElement()) {
1379
				if (isStartingElement(next, HEADING)) {
1380
					handleNotYetImplementedElement(next);
1381
				} else if (isStartingElement(next, SUB_HEADING)) {
1382
					String subheading = getCData(state, reader, next).trim();
1383
					String excludePattern = "(i?)(References?|Literature):?";
1384
					if (!subheading.matches(excludePattern)) {
1385
						fireNotYetImplementedElement(next.getLocation(), next.asStartElement().getName(), 0);
1386
					}
1387
				} else if (isStartingElement(next, WRITER)) {
1388
					handleNotYetImplementedElement(next);
1389
				} else if (isStartingElement(next, FOOTNOTE)) {
1390
					handleNotYetImplementedElement(next);
1391
				} else if (isStartingElement(next, STRING)) {
1392
					handleNotYetImplementedElement(next);
1393
				} else if (isStartingElement(next, REF_NUM)) {
1394
					handleNotYetImplementedElement(next);
1395
				} else if (isStartingElement(next, REFERENCE)) {
1396
					Reference<?> ref = nomenclatureImport.handleReference(state, reader, next);
1397
					result.add(ref);
1398
				} else {
1399
					handleUnexpectedStartElement(next);
1400
				}
1401
			} else {
1402
				handleUnexpectedElement(next);
1403
			}
1404
		}
1405
		throw new IllegalStateException("<References> has no closing tag");
1406
	}
1407

  
1408
	private void handleHabitat(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1409
		checkNoAttributes(parentEvent);
1410
		Taxon taxon = state.getCurrentTaxon();
1411
		// TODO which ref to take?
1412
		Reference<?> ref = state.getConfig().getSourceReference();
1413

  
1414
		String text = "";
1415
		while (reader.hasNext()) {
1416
			XMLEvent next = readNoWhitespace(reader);
1417
			if (isMyEndingElement(next, parentEvent)) {
1418
				TaxonDescription description = getTaxonDescription(taxon, ref,
1419
						false, true);
1420
				UUID uuidExtractedHabitat = MarkupTransformer.uuidExtractedHabitat;
1421
				Feature feature = getFeature(
1422
						state,
1423
						uuidExtractedHabitat,
1424
						"Extracted Habitat",
1425
						"An structured habitat that was extracted from a habitat text",
1426
						"extr. habit.", null);
1427
				TextData habitat = TextData.NewInstance(feature);
1428
				habitat.putText(getDefaultLanguage(state), text);
1429
				description.addElement(habitat);
1430

  
1431
				return;
1432
			} else if (next.isStartElement()) {
1433
				if (isStartingElement(next, ALTITUDE)) {
1434
					text = text.trim() + getTaggedCData(state, reader, next);
1435
				} else if (isStartingElement(next, LIFE_CYCLE_PERIODS)) {
1436
					handleNotYetImplementedElement(next);
1437
				} else {
1438
					handleUnexpectedStartElement(next.asStartElement());
1439
				}
1440
			} else if (next.isCharacters()) {
1441
				text += next.asCharacters().getData();
1442
			} else {
1443
				handleUnexpectedElement(next);
1444
			}
1445
		}
1446
		throw new IllegalStateException("<Habitat> has no closing tag");
1447
	}
1448

  
1449
	private String getTaggedCData(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1450
		checkNoAttributes(parentEvent);
1451

  
1452
		String text = getXmlTag(parentEvent);
1453
		while (reader.hasNext()) {
1454
			XMLEvent next = readNoWhitespace(reader);
1455
			if (isMyEndingElement(next, parentEvent)) {
1456
				text += getXmlTag(next);
1457
				return text;
1458
			} else if (next.isStartElement()) {
1459
				text += getTaggedCData(state, reader, next);
1460
			} else if (next.isEndElement()) {
1461
				text += getTaggedCData(state, reader, next);
1462
			} else if (next.isCharacters()) {
1463
				text += next.asCharacters().getData();
1464
			} else {
1465
				handleUnexpectedEndElement(next.asEndElement());
1466
			}
1467
		}
1468
		throw new IllegalStateException("Some tag has no closing tag");
1469
	}
1470

  
1471
	private String handleDistributionLocality(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1472
		Map<String, Attribute> attributes = getAttributes(parentEvent);
1473
		String classValue = getAndRemoveRequiredAttributeValue(parentEvent, attributes, CLASS);
1474
		String statusValue =getAndRemoveAttributeValue(attributes, STATUS);
1475
		String frequencyValue =getAndRemoveAttributeValue(attributes, FREQUENCY);
1476
		
1477

  
1478
		Taxon taxon = state.getCurrentTaxon();
1479
		// TODO which ref to take?
1480
		Reference<?> ref = state.getConfig().getSourceReference();
1481

  
1482
		String text = "";
1483
		while (reader.hasNext()) {
1484
			XMLEvent next = readNoWhitespace(reader);
1485
			if (isMyEndingElement(next, parentEvent)) {
1486
				if (StringUtils.isNotBlank(text)) {
1487
					String label = CdmUtils.removeTrailingDot(normalize(text));
1488
					TaxonDescription description = getTaxonDescription(taxon, ref, false, true);
1489
					NamedAreaLevel level = makeNamedAreaLevel(state,classValue, next);
1490
					
1491
					//status
1492
					PresenceAbsenceTermBase<?> status = null;
1493
					if (isNotBlank(statusValue)){
1494
						try {
1495
							status = state.getTransformer().getPresenceTermByKey(statusValue);
1496
							if (status == null){
1497
								//TODO
1498
								String message = "The presence/absence status '%s' could not be transformed to an CDM status";								
1499
								fireWarningEvent(String.format(message, statusValue), next, 4);
1500
							}
1501
						} catch (UndefinedTransformerMethodException e) {
1502
							throw new RuntimeException(e);
1503
						}
1504
					}else{
1505
						status = PresenceTerm.PRESENT();
1506
					}
1507
					//frequency
1508
					if (isNotBlank(frequencyValue)){
1509
						String message = "The frequency attribute is currently not yet available in CDM";
1510
						fireWarningEvent(message, parentEvent, 6);
1511
					}
1512
					
1513
					NamedArea higherArea = null;
1514
					List<NamedArea> areas = new ArrayList<NamedArea>(); 
1515
					
1516
					String patSingleArea = "([^,\\(]{3,})";
1517
					String patSeparator = "(,|\\sand\\s)";
1518
					String hierarchiePattern = String.format("%s\\((%s(%s%s)*)\\)",patSingleArea, patSingleArea, patSeparator, patSingleArea);
1519
					Pattern patHierarchie = Pattern.compile(hierarchiePattern, Pattern.CASE_INSENSITIVE);
1520
					Matcher matcher = patHierarchie.matcher(label); 
1521
					if (matcher.matches()){
1522
						String higherAreaStr = matcher.group(1).trim();
1523
						higherArea =  makeArea(state, higherAreaStr, level);
1524
						String[] innerAreas = matcher.group(2).split(patSeparator);
1525
						for (String innerArea : innerAreas){
1526
							if (isNotBlank(innerArea)){
1527
								NamedArea singleArea = makeArea(state, innerArea.trim(), level);
1528
								areas.add(singleArea);
1529
								NamedArea partOf = singleArea.getPartOf();
1530
//								if (partOf == null){
1531
//									singleArea.setPartOf(higherArea);
1532
//								}
1533
							}
1534
						}
1535
					}else{
1536
						NamedArea singleArea = makeArea(state, label, level);
1537
						areas.add(singleArea);
1538
					}
1539
					
1540
					for (NamedArea area : areas){
1541
						//create distribution
1542
						Distribution distribution = Distribution.NewInstance(area,status);
1543
						description.addElement(distribution);
1544
					}
1545
				} else {
1546
					String message = "Empty distribution locality";
1547
					fireWarningEvent(message, next, 4);
1548
				}
1549
				return text;
1550
			} else if (isStartingElement(next, COORDINATES)) {
1551
				//TODO
1552
				handleNotYetImplementedElement(next);
1553
			} else if (isEndingElement(next, COORDINATES)) {
1554
				//TODO
1555
				popUnimplemented(next.asEndElement());
1556
			} else if (next.isCharacters()) {
1557
				text += next.asCharacters().getData();
1558
			} else {
1559
				handleUnexpectedElement(next);
1560
			}
1561
		}
1562
		throw new IllegalStateException("<DistributionLocality> has no closing tag");
1563
	}	
1564

  
1565
	private String handleHeading(MarkupImportState state,XMLEventReader reader, XMLEvent parentEvent)throws XMLStreamException {
1566
		checkNoAttributes(parentEvent);
1567

  
1568
		String text = "";
1569
		while (reader.hasNext()) {
1570
			XMLEvent next = readNoWhitespace(reader);
1571
			if (isMyEndingElement(next, parentEvent)) {
1572
				return text;
1573
			} else if (next.isStartElement()) {
1574
				if (isStartingElement(next, FOOTNOTE)) {
1575
					handleNotYetImplementedElement(next);
1576
				} else {
1577
					handleUnexpectedStartElement(next.asStartElement());
1578
				}
1579
			} else if (next.isCharacters()) {
1580
				text += next.asCharacters().getData();
1581
			} else {
1582
				handleUnexpectedEndElement(next.asEndElement());
1583
			}
1584
		}
1585
		throw new IllegalStateException("<String> has no closing tag");
1586

  
1587
	}
1588

  
1589
	/**
1590
	 * Handle string
1591
	 * @param state
1592
	 * @param reader
1593
	 * @param parentEvent
1594
	 * @param feature only needed for distributionLocalities
1595
	 * @return
1596
	 * @throws XMLStreamException
1597
	 */
1598
	private Map<String, String> handleString(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature feature)throws XMLStreamException {
1599
		// attributes
1600
		String classValue = getClassOnlyAttribute(parentEvent, false);
1601
		if (StringUtils.isNotBlank(classValue)) {
1602
			String message = "class attribute for <string> not yet implemented";
1603
			fireWarningEvent(message, parentEvent, 2);
1604
		}
1605

  
1606
		// subheadings
1607
		Map<String, String> subHeadingMap = new HashMap<String, String>();
1608
		String currentSubheading = null;
1609

  
1610
		boolean isTextMode = true;
1611
		String text = "";
1612
		while (reader.hasNext()) {
1613
			XMLEvent next = readNoWhitespace(reader);
1614
			if (isMyEndingElement(next, parentEvent)) {
1615
				putCurrentSubheading(subHeadingMap, currentSubheading, text);
1616
				return subHeadingMap;
1617
			} else if (isStartingElement(next, BR)) {
1618
				text += "<br/>";
1619
				isTextMode = false;
1620
			} else if (isEndingElement(next, BR)) {
1621
				isTextMode = true;
1622
			} else if (isHtml(next)) {
1623
				text += getXmlTag(next);
1624
			} else if (isStartingElement(next, SUB_HEADING)) {
1625
				text = putCurrentSubheading(subHeadingMap,currentSubheading, text);
1626
				// TODO footnotes
1627
				currentSubheading = getCData(state, reader, next).trim();
1628
			} else if (isStartingElement(next, DISTRIBUTION_LOCALITY)) {
1629
				if (feature != null && !feature.equals(Feature.DISTRIBUTION())) {
1630
					String message = "Distribution locality only allowed for feature of type 'distribution'";
1631
					fireWarningEvent(message, next, 4);
1632
				}
1633
				text += handleDistributionLocality(state, reader, next);
1634
			} else if (next.isCharacters()) {
1635
				if (! isTextMode) {
1636
					String message = "String is not in text mode";
1637
					fireWarningEvent(message, next, 6);
1638
				} else {
1639
					text += next.asCharacters().getData();
1640
				}
1641
			} else if (isStartingElement(next, HEADING)) {
1642
				//TODO
1643
				handleNotYetImplementedElement(next);
1644
			} else if (isStartingElement(next, VERNACULAR_NAMES)) {
1645
				//TODO
1646
				handleNotYetImplementedElement(next);
1647
			} else if (isEndingElement(next, HEADING)) {
1648
				//TODO
1649
				popUnimplemented(next.asEndElement());
1650
			} else if (isStartingElement(next, QUOTE)) {
1651
				//TODO
1652
				handleNotYetImplementedElement(next);
1653
			} else if (isEndingElement(next, QUOTE)) {
1654
				//TODO
1655
				popUnimplemented(next.asEndElement());
1656
			} else if (isStartingElement(next, DEDICATION)) {
1657
				//TODO
1658
				handleNotYetImplementedElement(next);
1659
			} else if (isEndingElement(next, DEDICATION)) {
1660
				//TODO
1661
				popUnimplemented(next.asEndElement());
1662
			} else if (isStartingElement(next, TAXONTYPE)) {
1663
				//TODO
1664
				handleNotYetImplementedElement(next);
1665
			} else if (isEndingElement(next, TAXONTYPE)) {
1666
				//TODO
1667
				popUnimplemented(next.asEndElement());
1668
			} else if (isStartingElement(next, FULL_NAME)) {
1669
				//TODO
1670
				handleNotYetImplementedElement(next);
1671
			} else if (isEndingElement(next, FULL_NAME)) {
1672
				//TODO
1673
				popUnimplemented(next.asEndElement());
1674
			}else if (isStartingElement(next, REFERENCES)) {
1675
				//TODO
1676
				handleNotYetImplementedElement(next);
1677
			} else if (isEndingElement(next, REFERENCES)) {
1678
				//TODO
1679
				popUnimplemented(next.asEndElement());
1680
			} else if (isStartingElement(next, GATHERING)) {
1681
				//TODO
1682
				handleNotYetImplementedElement(next);
1683
			} else if (isEndingElement(next, GATHERING)) {
1684
				//TODO
1685
				popUnimplemented(next.asEndElement());
1686
			} else if (isStartingElement(next, ANNOTATION)) {
1687
				//TODO  //TODO test handleSimpleAnnotation
1688
				handleNotYetImplementedElement(next);
1689
			} else if (isEndingElement(next, ANNOTATION)) {
1690
				//TODO
1691
				popUnimplemented(next.asEndElement());
1692
			} else if (isStartingElement(next, HABITAT)) {
1693
				//TODO
1694
				handleNotYetImplementedElement(next);
1695
			} else if (isEndingElement(next, HABITAT)) {
1696
				//TODO
1697
				popUnimplemented(next.asEndElement());
1698
			} else if (isStartingElement(next, FIGURE_REF)) {
1699
				//TODO
1700
				handleNotYetImplementedElement(next);
1701
			} else if (isEndingElement(next, FIGURE_REF)) {
1702
				//TODO
1703
				popUnimplemented(next.asEndElement());
1704
			} else if (isStartingElement(next, FIGURE)) {
1705
				//TODO
1706
				handleNotYetImplementedElement(next);
1707
			} else if (isEndingElement(next, FIGURE)) {
1708
				//TODO
1709
				popUnimplemented(next.asEndElement());
1710
			} else if (isStartingElement(next, FOOTNOTE_REF)) {
1711
				//TODO
1712
				handleNotYetImplementedElement(next);
1713
			} else if (isEndingElement(next, FOOTNOTE_REF)) {
1714
				//TODO
1715
				popUnimplemented(next.asEndElement());
1716
			} else if (isStartingElement(next, FOOTNOTE)) {
1717
				//TODO
1718
				handleNotYetImplementedElement(next);
1719
			} else if (isEndingElement(next, FOOTNOTE)) {
1720
				//TODO
1721
				popUnimplemented(next.asEndElement());
1722
			} else if (isStartingElement(next, WRITER)) {
1723
				//TODO
1724
				handleNotYetImplementedElement(next);
1725
			} else if (isEndingElement(next, WRITER)) {
1726
				//TODO
1727
				popUnimplemented(next.asEndElement());
1728
			} else if (isStartingElement(next, DATES)) {
1729
				//TODO
1730
				handleNotYetImplementedElement(next);
1731
			} else if (isEndingElement(next, DATES)) {
1732
				//TODO
1733
				popUnimplemented(next.asEndElement());
1734
			} else {
1735
				handleUnexpectedElement(next);
1736
			}
1737
		}
1738
		throw new IllegalStateException("<String> has no closing tag");
1739
	}
1740

  
1741
	/**
1742
	 * @param subHeadingMap
1743
	 * @param currentSubheading
1744
	 * @param text
1745
	 * @return
1746
	 */
1747
	private String putCurrentSubheading(Map<String, String> subHeadingMap, String currentSubheading, String text) {
1748
		if (StringUtils.isNotBlank(text)) {
1749
			text = removeStartingMinus(text);
1750
			subHeadingMap.put(currentSubheading, text.trim());
1751
		}
1752
		return "";
1753
	}
1754

  
1755
	private String removeStartingMinus(String string) {
1756
		string = replaceStart(string, "-");
1757
		string = replaceStart(string, "\u002d");
1758
		string = replaceStart(string, "\u2013");
1759
		string = replaceStart(string, "\u2014");
1760
		string = replaceStart(string, "--");
1761
		return string;
1762
	}
1763
	
1764
	/**
1765
	 * @param value
1766
	 * @param replacementString
1767
	 */
1768
	private String replaceStart(String value, String replacementString) {
1769
		if (value.startsWith(replacementString) ){
1770
			value = value.substring(replacementString.length()).trim();
1771
		}
1772
		while (value.startsWith("-") || value.startsWith("\u2014") ){
1773
			value = value.substring("-".length()).trim();
1774
		}
1775
		return value;
1776
	}
1777
	
1778
	private String getXmlTag(XMLEvent event) {
1779
		String result;
1780
		if (event.isStartElement()) {
1781
			result = "<" + event.asStartElement().getName().getLocalPart()
1782
					+ ">";
1783
		} else if (event.isEndElement()) {
1784
			result = "</" + event.asEndElement().getName().getLocalPart() + ">";
1785
		} else {
1786
			String message = "Only start or end elements are allowed as Html tags";
1787
			throw new IllegalStateException(message);
1788
		}
1789
		return result;
1790
	}
1791

  
1792
	protected static final List<String> htmlList = Arrays.asList("sub", "sup",
1793
			"ol", "ul", "li", "i", "b", "table", "br","tr","td");
1794

  
1795
	private boolean isHtml(XMLEvent event) {
1796
		if (event.isStartElement()) {
1797
			String tag = event.asStartElement().getName().getLocalPart();
1798
			return htmlList.contains(tag);
1799
		} else if (event.isEndElement()) {
1800
			String tag = event.asEndElement().getName().getLocalPart();
1801
			return htmlList.contains(tag);
1802
		} else {
1803
			return false;
1804
		}
1805

  
1806
	}
1807

  
1808
	/**
1809
	 * Handle the char or subchar element. As 
1810
	 * @param state the import state
1811
	 * @param reader 
1812
	 * @param parentEvent
1813
	 * @param parentFeature in case of subchars we need to attache the newly created feature to a parent feature, should be <code>null</code>
1814
	 * for top level chars.  
1815
	 * @return List of TextData. Not a single one as the recursive TextData will also be returned
1816
	 * @throws XMLStreamException
1817
	 */
1818
	private List<TextData> handleChar(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature parentFeature) throws XMLStreamException {
1819
		List<TextData> result = new ArrayList<TextData>();
1820
		String classValue = getClassOnlyAttribute(parentEvent);
1821
		Feature feature = makeFeature(classValue, state, parentEvent, parentFeature);
1822

  
1823
		boolean isTextMode = true;
1824
		String text = "";
1825
		while (reader.hasNext()) {
1826
			XMLEvent next = readNoWhitespace(reader);
1827
			if (isMyEndingElement(next, parentEvent)) {
1828
				state.putFeatureToCharSorterList(feature);
1829
				TextData textData = TextData.NewInstance(feature);
1830
				textData.putText(getDefaultLanguage(state), text);
1831
				result.add(textData);
1832
				return result;
1833
			} else if (isStartingElement(next, FIGURE_REF)) {
1834
				//TODO
1835
				handleNotYetImplementedElement(next);
1836
			} else if (isStartingElement(next, FOOTNOTE_REF)) {
1837
				//TODO
1838
				handleNotYetImplementedElement(next);
1839
			} else if (isStartingElement(next, BR)) {
1840
				text += "<br/>";
1841
				isTextMode = false;
1842
			} else if (isEndingElement(next, BR)) {
1843
				isTextMode = true;
1844
			} else if (isHtml(next)) {
1845
				text += getXmlTag(next);
1846
			} else if (next.isStartElement()) {
1847
				if (isStartingElement(next, ANNOTATION)) {
1848
					handleNotYetImplementedElement(next); //TODO test handleSimpleAnnotation
1849
				} else if (isStartingElement(next, ITALICS)) {
1850
					handleNotYetImplementedElement(next);
1851
				} else if (isStartingElement(next, BOLD)) {
1852
					handleNotYetImplementedElement(next);
1853
				} else if (isStartingElement(next, FIGURE)) {
1854
					handleFigure(state, reader, next);
1855
				} else if (isStartingElement(next, SUB_CHAR)) {
1856
					List<TextData> textData = handleChar(state, reader, next, feature);
1857
					result.addAll(textData);
1858
				} else if (isStartingElement(next, FOOTNOTE)) {
1859
					FootnoteDataHolder footnote = handleFootnote(state, reader,	next);
1860
					if (footnote.isRef()) {
1861
						String message = "Ref footnote not implemented here";
1862
						fireWarningEvent(message, next, 4);
1863
					} else {
1864
						registerGivenFootnote(state, footnote);
1865
					}
1866
				} else {
1867
					handleUnexpectedStartElement(next.asStartElement());
1868
				}
1869
			} else if (next.isCharacters()) {
1870
				if (!isTextMode) {
1871
					String message = "String is not in text mode";
1872
					fireWarningEvent(message, next, 6);
1873
				} else {
1874
					text += next.asCharacters().getData();
1875
				}
1876
			} else {
1877
				handleUnexpectedEndElement(next.asEndElement());
1878
			}
1879
		}
1880
		throw new IllegalStateException("RefPart has no closing tag");
1881
	}
1882

  
1883
	/**
1884
	 * @param classValue
1885
	 * @param state
1886
	 * @param parentEvent
1887
	 * @param parentFeature 
1888
	 * @return
1889
	 * @throws UndefinedTransformerMethodException
1890
	 */
1891
	private Feature makeFeature(String classValue, MarkupImportState state, XMLEvent parentEvent, Feature parentFeature) {
1892
		UUID uuid;
1893
		try {
1894
			String featureText = StringUtils.capitalize(classValue);
1895
			if (parentFeature != null){
1896
				featureText = "<%s>" + featureText;
1897
				featureText = String.format(featureText, parentFeature.getTitleCache());
1898
				classValue = "<%s>" + classValue;
1899
				classValue = String.format(classValue, parentFeature.getTitleCache());
1900
			}
1901

  
1902
			
1903
			Feature feature = state.getTransformer().getFeatureByKey(classValue);
1904
			if (feature != null) {
1905
				return feature;
1906
			}
1907
			uuid = state.getTransformer().getFeatureUuid(classValue);
1908
			
1909
			if (uuid == null){
1910
				uuid = state.getUnknownFeatureUuid(classValue);
1911
			}
1912
			
1913
			if (uuid == null) {
1914
				// TODO
1915
				String message = "Uuid is not defined for '%s'";
1916
				message = String.format(message, classValue);
1917
				if (! message.contains("<")){
1918
					//log only top level features
1919
					fireWarningEvent(message, parentEvent, 8);
1920
				}
1921
				uuid = UUID.randomUUID();
1922
				state.putUnknownFeatureUuid(classValue, uuid);
1923
			}
1924

  
1925
			// TODO eFlora vocabulary
1926
			TermVocabulary<Feature> voc = null;
1927
			feature = getFeature(state, uuid, featureText, featureText, classValue, voc);
1928
			if (parentFeature != null){
1929
				parentFeature.addIncludes(feature);
1930
				save(parentFeature, state);
1931
			}
1932
			save(feature, state);
1933
					
1934
			if (feature == null) {
1935
				throw new NullPointerException(classValue + " not recognized as a feature");
1936
			}
1937
//			state.putFeatureToCurrentList(feature);
1938
			return feature;
1939
		} catch (Exception e) {
1940
			String message = "Could not create feature for %s: %s";
1941
			message = String.format(message, classValue, e.getMessage());
1942
			fireWarningEvent(message, parentEvent, 4);
1943
			state.putUnknownFeatureUuid(classValue, null);
1944
//			e.printStackTrace();
1945
			return Feature.UNKNOWN();
1946
		}
1947
	}
1948 626

  
1949 627
}
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/markup/MarkupFeatureImport.java
1
/**
2
 * Copyright (C) 2009 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

  
10
package eu.etaxonomy.cdm.io.markup;
11

  
12
import java.util.ArrayList;
13
import java.util.HashSet;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.Set;
17
import java.util.UUID;
18

  
19
import javax.xml.stream.XMLEventReader;
20
import javax.xml.stream.XMLStreamException;
21
import javax.xml.stream.events.Attribute;
22
import javax.xml.stream.events.XMLEvent;
23

  
24
import org.apache.commons.lang.StringUtils;
25
import org.apache.log4j.Logger;
26

  
27
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
28
import eu.etaxonomy.cdm.model.common.AnnotatableEntity;
29
import eu.etaxonomy.cdm.model.common.Annotation;
30
import eu.etaxonomy.cdm.model.common.AnnotationType;
31
import eu.etaxonomy.cdm.model.common.Language;
32
import eu.etaxonomy.cdm.model.common.TermVocabulary;
33
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
34
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
35
import eu.etaxonomy.cdm.model.description.Feature;
36
import eu.etaxonomy.cdm.model.description.TaxonDescription;
37
import eu.etaxonomy.cdm.model.description.TextData;
38
import eu.etaxonomy.cdm.model.location.NamedArea;
39
import eu.etaxonomy.cdm.model.media.Media;
40
import eu.etaxonomy.cdm.model.reference.Reference;
41
import eu.etaxonomy.cdm.model.taxon.Taxon;
42

  
43
/**
44
 * @author a.mueller
45
 * @created 30.05.2012
46
 * 
47
 */
48
public class MarkupFeatureImport extends MarkupImportBase {
49
	@SuppressWarnings("unused")
50
	private static final Logger logger = Logger.getLogger(MarkupFeatureImport.class);
51

  
52
	protected static final String MODS_TITLEINFO = "titleInfo";
53

  
54
	private MarkupSpecimenImport specimenImport;
55
	private MarkupNomenclatureImport nomenclatureImport;
56

  
57
	public MarkupFeatureImport(MarkupDocumentImport docImport, MarkupSpecimenImport specimenImport,
58
			 MarkupNomenclatureImport nomenclatureImport) {
59
		super(docImport);
60
		this.specimenImport = specimenImport;
61
		this.nomenclatureImport = nomenclatureImport;
62
	}
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff

Add picture from clipboard (Maximum size: 40 MB)