2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.markup
;
12 import java
.util
.ArrayList
;
13 import java
.util
.List
;
17 import javax
.xml
.stream
.XMLEventReader
;
18 import javax
.xml
.stream
.XMLStreamException
;
19 import javax
.xml
.stream
.events
.Attribute
;
20 import javax
.xml
.stream
.events
.XMLEvent
;
22 import org
.apache
.commons
.lang
.StringUtils
;
23 import org
.apache
.log4j
.Logger
;
25 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
26 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacadeCacheStrategy
;
27 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
28 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
29 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
30 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
31 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
32 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
33 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
34 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
35 import eu
.etaxonomy
.cdm
.model
.location
.NamedAreaLevel
;
36 import eu
.etaxonomy
.cdm
.model
.location
.Country
;
37 import eu
.etaxonomy
.cdm
.model
.name
.HomotypicalGroup
;
38 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
39 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignationStatus
;
40 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
41 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
42 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
43 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationBase
;
44 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
45 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
46 import eu
.etaxonomy
.cdm
.strategy
.parser
.SpecimenTypeParser
;
47 import eu
.etaxonomy
.cdm
.strategy
.parser
.SpecimenTypeParser
.TypeInfo
;
48 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
55 public class MarkupSpecimenImport
extends MarkupImportBase
{
56 @SuppressWarnings("unused")
57 private static final Logger logger
= Logger
.getLogger(MarkupSpecimenImport
.class);
59 private static final String ALTERNATIVE_COLLECTION_TYPE_STATUS
= "alternativeCollectionTypeStatus";
60 private static final String ALTERNATIVE_COLLECTOR
= "alternativeCollector";
61 private static final String ALTERNATIVE_FIELD_NUM
= "alternativeFieldNum";
62 private static final String COLLECTOR
= "collector";
63 private static final String COLLECTION
= "collection";
64 private static final String COLLECTION_AND_TYPE
= "collectionAndType";
65 private static final String COLLECTION_TYPE_STATUS
= "collectionTypeStatus";
66 private static final String DAY
= "day";
67 private static final String DESTROYED
= "destroyed";
68 private static final String FIELD_NUM
= "fieldNum";
69 private static final String FULL_TYPE
= "fullType";
70 private static final String FULL_DATE
= "fullDate";
71 private static final String LOCALITY
= "locality";
72 private static final String LOST
= "lost";
73 private static final String MONTH
= "month";
74 private static final String SUB_GATHERING
= "subGathering";
75 private static final String NOT_FOUND
= "notFound";
76 private static final String NOT_SEEN
= "notSeen";
77 private static final String ORIGINAL_DETERMINATION
= "originalDetermination";
79 private static final String UNKNOWN
= "unknown";
80 private static final String YEAR
= "year";
84 public MarkupSpecimenImport(MarkupDocumentImport docImport
) {
89 public void handleSpecimenType(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
,
90 HomotypicalGroup homotypicalGroup
) throws XMLStreamException
{
93 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
94 String typeStatus
= getAndRemoveAttributeValue(attributes
, TYPE_STATUS
);
95 String notSeen
= getAndRemoveAttributeValue(attributes
, NOT_SEEN
);
96 String unknown
= getAndRemoveAttributeValue(attributes
, UNKNOWN
);
97 String notFound
= getAndRemoveAttributeValue(attributes
, NOT_FOUND
);
98 String destroyed
= getAndRemoveAttributeValue(attributes
, DESTROYED
);
99 String lost
= getAndRemoveAttributeValue(attributes
, LOST
);
100 checkNoAttributes(attributes
, parentEvent
);
101 if (StringUtils
.isNotEmpty(typeStatus
)) {
103 // currently not needed
104 } else if (StringUtils
.isNotEmpty(notSeen
)) {
105 handleNotYetImplementedAttribute(attributes
, NOT_SEEN
);
106 } else if (StringUtils
.isNotEmpty(unknown
)) {
107 handleNotYetImplementedAttribute(attributes
, UNKNOWN
);
108 } else if (StringUtils
.isNotEmpty(notFound
)) {
109 handleNotYetImplementedAttribute(attributes
, NOT_FOUND
);
110 } else if (StringUtils
.isNotEmpty(destroyed
)) {
111 handleNotYetImplementedAttribute(attributes
, DESTROYED
);
112 } else if (StringUtils
.isNotEmpty(lost
)) {
113 handleNotYetImplementedAttribute(attributes
, LOST
);
116 NonViralName
<?
> firstName
= null;
117 Set
<TaxonNameBase
> names
= homotypicalGroup
.getTypifiedNames();
118 if (names
.isEmpty()) {
119 String message
= "There is no name in a homotypical group. Can't create the specimen type";
120 fireWarningEvent(message
, parentEvent
, 8);
122 firstName
= CdmBase
.deproxy(names
.iterator().next(),NonViralName
.class);
125 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(SpecimenOrObservationType
.PreservedSpecimen
);
127 String collectionAndType
= "";
129 while (reader
.hasNext()) {
130 XMLEvent next
= readNoWhitespace(reader
);
131 if (isMyEndingElement(next
, parentEvent
)) {
132 makeSpecimenType(state
, facade
, text
, collectionAndType
, firstName
, parentEvent
);
134 } else if (isStartingElement(next
, FULL_TYPE
)) {
135 handleNotYetImplementedElement(next
);
136 // homotypicalGroup = handleNom(state, reader, next, taxon,
137 // homotypicalGroup);
138 } else if (isStartingElement(next
, TYPE_STATUS
)) {
139 handleNotYetImplementedElement(next
);
140 } else if (isStartingElement(next
, GATHERING
)) {
141 handleGathering(state
, reader
, next
, facade
);
142 } else if (isStartingElement(next
, ORIGINAL_DETERMINATION
)) {
143 handleNotYetImplementedElement(next
);
144 } else if (isStartingElement(next
, SPECIMEN_TYPE
)) {
145 handleNotYetImplementedElement(next
);
146 } else if (isStartingElement(next
, COLLECTION_AND_TYPE
)) {
147 collectionAndType
+= getCData(state
, reader
, next
, true);
148 } else if (isStartingElement(next
, CITATION
)) {
149 handleNotYetImplementedElement(next
);
150 } else if (isStartingElement(next
, NOTES
)) {
151 handleNotYetImplementedElement(next
);
152 } else if (isStartingElement(next
, ANNOTATION
)) {
153 handleNotYetImplementedElement(next
);
154 } else if (next
.isCharacters()) {
155 text
+= next
.asCharacters().getData();
157 handleUnexpectedElement(next
);
160 // TODO handle missing end element
161 throw new IllegalStateException("Specimen type has no closing tag");
166 private void makeSpecimenType(MarkupImportState state
, DerivedUnitFacade facade
, String text
, String collectionAndType
,
167 NonViralName
<?
> name
, XMLEvent parentEvent
) {
169 if (isPunctuation(text
)){
172 String message
= "Text '%s' not handled for <SpecimenType>";
173 this.fireWarningEvent(String
.format(message
, text
), parentEvent
, 4);
177 if (collectionAndType
.matches("^\\(.*\\)\\.?$")) {
178 collectionAndType
= collectionAndType
.replaceAll("\\.", "");
179 collectionAndType
= collectionAndType
.substring(1, collectionAndType
.length() - 1);
182 String
[] split
= collectionAndType
.split("[;,]");
183 for (String str
: split
) {
185 boolean addToAllNamesInGroup
= true;
186 TypeInfo typeInfo
= makeSpecimenTypeTypeInfo(str
, parentEvent
);
187 SpecimenTypeDesignationStatus typeStatus
= typeInfo
.status
;
188 Collection collection
= createCollection(typeInfo
.collectionString
);
190 // TODO improve cache strategy handling
191 DerivedUnit typeSpecimen
= facade
.addDuplicate(collection
, null, null, null, null);
192 typeSpecimen
.setCacheStrategy(new DerivedUnitFacadeCacheStrategy());
193 name
.addSpecimenTypeDesignation(typeSpecimen
, typeStatus
, null, null, null, false, addToAllNamesInGroup
);
198 private Collection
createCollection(String code
) {
200 // TODO code <-> name
201 Collection result
= Collection
.NewInstance();
202 result
.setCode(code
);
207 private TypeInfo
makeSpecimenTypeTypeInfo(String originalString
, XMLEvent event
) {
208 TypeInfo result
= new TypeInfo();
209 String
[] split
= originalString
.split("\\s+");
210 for (String str
: split
) {
211 if (str
.matches(SpecimenTypeParser
.typeTypePattern
)) {
212 SpecimenTypeDesignationStatus status
;
214 status
= SpecimenTypeParser
.parseSpecimenTypeStatus(str
);
215 } catch (UnknownCdmTypeException e
) {
216 String message
= "Specimen type status '%s' not recognized by parser";
217 fireWarningEvent(String
.format(message
, str
), event
, 4);
220 result
.status
= status
;
221 } else if (str
.matches(SpecimenTypeParser
.collectionPattern
)) {
222 result
.collectionString
= str
;
224 String message
= "Type part '%s' could not be recognized";
225 fireWarningEvent(String
.format(message
, str
), event
, 2);
233 private void handleGathering(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, DerivedUnitFacade facade
) throws XMLStreamException
{
234 checkNoAttributes(parentEvent
);
235 boolean hasCollector
= false;
236 boolean hasFieldNum
= false;
239 while (reader
.hasNext()) {
240 XMLEvent next
= readNoWhitespace(reader
);
241 if (isMyEndingElement(next
, parentEvent
)) {
243 if (state
.getCurrentCollector() == null){
244 checkMandatoryElement(hasCollector
,parentEvent
.asStartElement(), COLLECTOR
);
246 facade
.setCollector(state
.getCurrentCollector());
249 checkMandatoryElement(hasFieldNum
,parentEvent
.asStartElement(), FIELD_NUM
);
251 }else if (isStartingElement(next
, COLLECTOR
)) {
253 String collectorStr
= getCData(state
, reader
, next
);
254 TeamOrPersonBase
<?
> collector
= createCollector(collectorStr
);
255 facade
.setCollector(collector
);
256 state
.setCurrentCollector(collector
);
257 } else if (isStartingElement(next
, ALTERNATIVE_COLLECTOR
)) {
258 handleNotYetImplementedElement(next
);
259 } else if (isStartingElement(next
, FIELD_NUM
)) {
261 String fieldNumStr
= getCData(state
, reader
, next
);
262 facade
.setFieldNumber(fieldNumStr
);
263 } else if (isStartingElement(next
, ALTERNATIVE_FIELD_NUM
)) {
264 handleNotYetImplementedElement(next
);
265 } else if (isStartingElement(next
, COLLECTION_TYPE_STATUS
)) {
266 handleNotYetImplementedElement(next
);
267 } else if (isStartingElement(next
, COLLECTION_AND_TYPE
)) { //does this make sense here?
268 handleNotYetImplementedElement(next
);
269 } else if (isStartingElement(next
, ALTERNATIVE_COLLECTION_TYPE_STATUS
)) {
270 handleNotYetImplementedElement(next
);
271 } else if (isStartingElement(next
, SUB_GATHERING
)) {
272 handleNotYetImplementedElement(next
);
273 } else if (isStartingElement(next
, COLLECTION
)) {
274 handleNotYetImplementedElement(next
);
275 } else if (isStartingElement(next
, LOCALITY
)) {
276 handleLocality(state
, reader
, next
, facade
);
277 } else if (isStartingElement(next
, DATES
)) {
278 TimePeriod timePeriod
= handleDates(state
, reader
, next
);
279 facade
.setGatheringPeriod(timePeriod
);
280 } else if (isStartingElement(next
, NOTES
)) {
281 handleNotYetImplementedElement(next
);
283 handleUnexpectedElement(next
);
286 throw new IllegalStateException("Collection has no closing tag.");
291 private TimePeriod
handleDates(MarkupImportState state
, XMLEventReader reader
, XMLEvent parent
) throws XMLStreamException
{
292 checkNoAttributes(parent
);
293 TimePeriod result
= TimePeriod
.NewInstance();
294 String parseMessage
= "%s can not be parsed: %s";
295 boolean hasFullDate
= false;
296 boolean hasAtomised
= false;
297 boolean hasUnparsedAtomised
= false;
298 while (reader
.hasNext()) {
299 XMLEvent next
= readNoWhitespace(reader
);
300 if (isMyEndingElement(next
, parent
)) {
301 if (! isAlternative(hasFullDate
, hasAtomised
, hasUnparsedAtomised
)){
302 String message
= "Some problems exist when defining the date";
303 fireWarningEvent(message
, parent
, 4);
306 } else if (isStartingElement(next
, FULL_DATE
)) {
307 String fullDate
= getCData(state
, reader
, next
, true);
308 result
= TimePeriodParser
.parseString(fullDate
);
309 if (result
.getFreeText() != null){
310 fireWarningEvent(String
.format(parseMessage
, FULL_DATE
, fullDate
), parent
, 1);
313 } else if (isStartingElement(next
, DAY
)) {
314 String day
= getCData(state
, reader
, next
, true).trim();
315 day
= normalizeDate(day
);
316 if (CdmUtils
.isNumeric(day
)){
317 result
.setStartDay(Integer
.valueOf(day
));
320 fireWarningEvent(String
.format(parseMessage
,"Day", day
), parent
, 2);
321 hasUnparsedAtomised
= true;
323 } else if (isStartingElement(next
, MONTH
)) {
324 String month
= getCData(state
, reader
, next
, true).trim();
325 month
= normalizeDate(month
);
326 if (CdmUtils
.isNumeric(month
)){
327 result
.setStartMonth(Integer
.valueOf(month
));
330 fireWarningEvent(String
.format(parseMessage
,"Month", month
), parent
, 2);
331 hasUnparsedAtomised
= true;
333 } else if (isStartingElement(next
, YEAR
)) {
334 String year
= getCData(state
, reader
, next
, true).trim();
335 year
= normalizeDate(year
);
336 if (CdmUtils
.isNumeric(year
)){
337 result
.setStartYear(Integer
.valueOf(year
));
340 fireWarningEvent(String
.format(parseMessage
,"Year", year
), parent
, 2);
341 hasUnparsedAtomised
= true;
344 handleUnexpectedElement(next
);
347 throw new IllegalStateException("Dates has no closing tag.");
351 private String
normalizeDate(String partOfDate
) {
352 if (isBlank(partOfDate
)){
355 partOfDate
= partOfDate
.trim();
356 while (partOfDate
.startsWith("-")){
357 partOfDate
= partOfDate
.substring(1);
363 private boolean isAlternative(boolean first
, boolean second
, boolean third
) {
364 return ( (first ^ second
) && !third
) ||
365 (! first
&& ! second
&& third
) ;
369 private void handleLocality(MarkupImportState state
, XMLEventReader reader
,XMLEvent parentEvent
, DerivedUnitFacade facade
)throws XMLStreamException
{
370 String classValue
= getClassOnlyAttribute(parentEvent
);
371 boolean isLocality
= false;
372 NamedAreaLevel areaLevel
= null;
373 if ("locality".equalsIgnoreCase(classValue
)) {
376 areaLevel
= makeNamedAreaLevel(state
, classValue
, parentEvent
);
381 while (reader
.hasNext()) {
382 XMLEvent next
= readNoWhitespace(reader
);
383 if (isMyEndingElement(next
, parentEvent
)) {
384 if (StringUtils
.isNotBlank(text
)) {
385 text
= normalize(text
);
387 facade
.setLocality(text
, getDefaultLanguage(state
));
389 text
= CdmUtils
.removeTrailingDot(text
);
390 NamedArea area
= makeArea(state
, text
, areaLevel
);
391 facade
.addCollectingArea(area
);
396 }else if (isStartingElement(next
, ALTITUDE
)) {
397 handleNotYetImplementedElement(next
);
398 // homotypicalGroup = handleNom(state, reader, next, taxon,
399 // homotypicalGroup);
400 } else if (isStartingElement(next
, COORDINATES
)) {
401 handleNotYetImplementedElement(next
);
402 } else if (isStartingElement(next
, ANNOTATION
)) {
403 handleNotYetImplementedElement(next
);
404 } else if (next
.isCharacters()) {
405 text
+= next
.asCharacters().getData();
407 handleUnexpectedElement(next
);
410 throw new IllegalStateException("<SpecimenType> has no closing tag");
415 private TeamOrPersonBase
<?
> createCollector(String collectorStr
) {
416 return createAuthor(collectorStr
);
420 public List
<DescriptionElementBase
> handleMaterialsExamined(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, Feature feature
) throws XMLStreamException
{
421 List
<DescriptionElementBase
> result
= new ArrayList
<DescriptionElementBase
>();
422 //reset current areas
423 state
.removeCurrentAreas();
424 while (reader
.hasNext()) {
425 XMLEvent next
= readNoWhitespace(reader
);
426 if (isMyEndingElement(next
, parentEvent
)) {
427 if (result
.isEmpty()){
428 fireWarningEvent("Materials examined created empty Individual Associations list", parentEvent
, 4);
430 state
.removeCurrentAreas();
432 } else if (isStartingElement(next
, SUB_HEADING
)) {
433 // Map<String, Object> inlineMarkup = new HashMap<String, Object>();
434 String text
= getCData(state
, reader
, next
, true);
435 if (isFeatureHeading(state
, next
, text
)){
436 feature
= makeHeadingFeature(state
, next
, text
, feature
);
438 String message
= "Unhandled subheading: %s";
439 fireWarningEvent(String
.format(message
, text
), next
, 4);
441 // for (String key : inlineMarkup.keySet()){
442 // handleInlineMarkup(state, key, inlineMarkup);
445 } else if (isStartingElement(next
, BR
) || isEndingElement(next
, BR
)) {
447 } else if (isStartingElement(next
, GATHERING
)) {
448 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(SpecimenOrObservationType
.DerivedUnit
);
449 addCurrentAreas(state
, next
, facade
);
450 handleGathering(state
, reader
, next
, facade
);
451 SpecimenOrObservationBase
<?
> specimen
;
452 if (facade
.innerDerivedUnit() != null){
453 specimen
= facade
.innerDerivedUnit();
455 specimen
= facade
.innerFieldUnit();
457 IndividualsAssociation individualsAssociation
= IndividualsAssociation
.NewInstance();
458 individualsAssociation
.setAssociatedSpecimenOrObservation(specimen
);
459 result
.add(individualsAssociation
);
460 }else if (next
.isCharacters()) {
461 String text
= next
.asCharacters().getData().trim();
462 if (isPunctuation(text
)){
465 String message
= "Unrecognized text: %s";
466 fireWarningEvent(String
.format(message
, text
), next
, 6);
469 handleUnexpectedElement(next
);
472 throw new IllegalStateException("<String> has no closing tag");
478 private void addCurrentAreas(MarkupImportState state
, XMLEvent event
, DerivedUnitFacade facade
) {
479 for (NamedArea area
: state
.getCurrentAreas()){
482 }else if (area
.isInstanceOf(Country
.class)){
483 facade
.setCountry(area
);
485 String message
= "Current area %s is not country. This is not expected for currently known data.";
486 fireWarningEvent(String
.format(message
, area
.getTitleCache()), event
, 2);
487 facade
.addCollectingArea(area
);
494 // private void handleInlineMarkup(MarkupImportState state, String key, Map<String, Object> inlineMarkup) {
495 // Object obj = inlineMarkup.get(key);
496 // if (key.equals(LOCALITY)){
497 // if (obj instanceof NamedArea){
498 // NamedArea area = (NamedArea)obj;
499 // state.addCurrentArea(area);
507 * Changes the feature if the (sub)-heading implies this. Also recognizes hidden country information
514 private Feature
makeHeadingFeature(MarkupImportState state
, XMLEvent parent
, String originalText
, Feature feature
) {
515 //expand, provide by config or service
516 String materialRegEx
= "Mat[\u00E9\u00C9]riel";
517 String examinedRegEx
= "[\u00E9\u00C9]tudi[\u00E9\u00C9]";
518 String countryRegEx
= "(gabonais)";
519 String postfixCountryRegEx
= "\\s+(pour le Gabon)";
521 String materialExaminedRegEx
= "(?i)" + materialRegEx
+ "\\s+(" + countryRegEx
+"\\s+)?" + examinedRegEx
+ "(" +postfixCountryRegEx
+ ")?:?";
523 String text
= originalText
;
528 if (text
.matches(materialExaminedRegEx
)){
530 if (text
.contains("gabonais ")){
531 text
= text
.replace("gabonais ", "");
532 state
.addCurrentArea(Country
.GABONGABONESEREPUBLIC());
534 if (text
.contains(" pour le Gabon")){
535 text
= text
.replace(" pour le Gabon", "");
536 state
.addCurrentArea(Country
.GABONGABONESEREPUBLIC());
540 feature
= Feature
.MATERIALS_EXAMINED();
541 state
.putFeatureToGeneralSorterList(feature
);
544 String message
= "Heading/Subheading not recognized: %s";
545 fireWarningEvent(String
.format(message
, originalText
), parent
, 4);
553 * True if heading or subheading represents feature information
559 private boolean isFeatureHeading(MarkupImportState state
, XMLEvent parent
, String text
) {
560 return makeHeadingFeature(state
, parent
, text
, null) != null;
564 public String
handleInLineGathering(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
565 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(SpecimenOrObservationType
.FieldUnit
);
566 handleGathering(state
, reader
, parentEvent
, facade
);
567 SpecimenOrObservationBase
<?
> specimen
= facade
.innerFieldUnit();
568 if (specimen
== null){
569 specimen
= facade
.innerDerivedUnit();
570 String message
= "Inline gaterhing has no field unit";
571 fireWarningEvent(message
, parentEvent
, 2);
574 String result
= "<cdm:specimen uuid='%s'>%s</specimen>";
575 if (specimen
!= null){
576 result
= String
.format(result
, specimen
.getUuid(), specimen
.getTitleCache());
578 String message
= "Inline gathering has no specimen";
579 fireWarningEvent(message
, parentEvent
, 4);
581 save(specimen
, state
);