2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.markup
;
12 import java
.util
.ArrayList
;
13 import java
.util
.List
;
16 import java
.util
.UUID
;
17 import java
.util
.regex
.Matcher
;
18 import java
.util
.regex
.Pattern
;
20 import javax
.xml
.stream
.XMLEventReader
;
21 import javax
.xml
.stream
.XMLStreamException
;
22 import javax
.xml
.stream
.events
.Attribute
;
23 import javax
.xml
.stream
.events
.XMLEvent
;
25 import org
.apache
.commons
.lang
.StringUtils
;
26 import org
.apache
.log4j
.Logger
;
28 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
29 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacadeCacheStrategy
;
30 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
31 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
32 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
33 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
34 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
35 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTerm
;
36 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
37 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
38 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
39 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
40 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
41 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
42 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
43 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
44 import eu
.etaxonomy
.cdm
.model
.location
.Country
;
45 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
46 import eu
.etaxonomy
.cdm
.model
.location
.NamedAreaLevel
;
47 import eu
.etaxonomy
.cdm
.model
.name
.HomotypicalGroup
;
48 import eu
.etaxonomy
.cdm
.model
.name
.INonViralName
;
49 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
50 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
51 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignation
;
52 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignationStatus
;
53 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
54 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
55 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnit
;
56 import eu
.etaxonomy
.cdm
.model
.occurrence
.DeterminationEvent
;
57 import eu
.etaxonomy
.cdm
.model
.occurrence
.FieldUnit
;
58 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationBase
;
59 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
60 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
61 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
62 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
63 import eu
.etaxonomy
.cdm
.strategy
.parser
.SpecimenTypeParser
;
64 import eu
.etaxonomy
.cdm
.strategy
.parser
.SpecimenTypeParser
.TypeInfo
;
65 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
72 public class MarkupSpecimenImport
extends MarkupImportBase
{
73 @SuppressWarnings("unused")
74 private static final Logger logger
= Logger
.getLogger(MarkupSpecimenImport
.class);
76 private static final String ALTERNATIVE_COLLECTION_TYPE_STATUS
= "alternativeCollectionTypeStatus";
77 private static final String ALTERNATIVE_COLLECTOR
= "alternativeCollector";
78 private static final String ALTERNATIVE_FIELD_NUM
= "alternativeFieldNum";
79 private static final String COLLECTOR
= "collector";
80 private static final String COLLECTION
= "collection";
81 private static final String COLLECTION_AND_TYPE
= "collectionAndType";
82 private static final String COLLECTION_TYPE_STATUS
= "collectionTypeStatus";
83 private static final String DAY
= "day";
84 private static final String DESTROYED
= "destroyed";
85 private static final String FIELD_NUM
= "fieldNum";
86 private static final String FULL_TYPE
= "fullType";
87 private static final String FULL_DATE
= "fullDate";
88 private static final String GATHERING_NOTES
= "gatheringNotes";
89 private static final String LOST
= "lost";
90 private static final String MONTH
= "month";
91 private static final String SUB_GATHERING
= "subGathering";
92 private static final String NOT_FOUND
= "notFound";
93 private static final String NOT_SEEN
= "notSeen";
94 private static final String ORIGINAL_DETERMINATION
= "originalDetermination";
96 private static final String UNKNOWN
= "unknown";
97 private static final String YEAR
= "year";
101 public MarkupSpecimenImport(MarkupDocumentImport docImport
) {
106 public void handleSpecimenType(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
,
107 HomotypicalGroup homotypicalGroup
) throws XMLStreamException
{
110 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
111 String typeStatus
= getAndRemoveAttributeValue(attributes
, TYPE_STATUS
);
112 String notSeen
= getAndRemoveAttributeValue(attributes
, NOT_SEEN
);
113 String unknown
= getAndRemoveAttributeValue(attributes
, UNKNOWN
);
114 String notFound
= getAndRemoveAttributeValue(attributes
, NOT_FOUND
);
115 String destroyed
= getAndRemoveAttributeValue(attributes
, DESTROYED
);
116 String lost
= getAndRemoveAttributeValue(attributes
, LOST
);
117 checkNoAttributes(attributes
, parentEvent
);
118 if (StringUtils
.isNotEmpty(typeStatus
)) {
120 // currently not needed
121 fireWarningEvent("Type status not yet used", parentEvent
, 4);
122 } else if (StringUtils
.isNotEmpty(notSeen
)) {
123 handleNotYetImplementedAttribute(attributes
, NOT_SEEN
);
124 } else if (StringUtils
.isNotEmpty(unknown
)) {
125 handleNotYetImplementedAttribute(attributes
, UNKNOWN
);
126 } else if (StringUtils
.isNotEmpty(notFound
)) {
127 handleNotYetImplementedAttribute(attributes
, NOT_FOUND
);
128 } else if (StringUtils
.isNotEmpty(destroyed
)) {
129 handleNotYetImplementedAttribute(attributes
, DESTROYED
);
130 } else if (StringUtils
.isNotEmpty(lost
)) {
131 handleNotYetImplementedAttribute(attributes
, LOST
);
134 INonViralName firstName
= null;
135 Set
<TaxonNameBase
> names
= homotypicalGroup
.getTypifiedNames();
136 if (names
.isEmpty()) {
137 String message
= "There is no name in a homotypical group. Can't create the specimen type";
138 fireWarningEvent(message
, parentEvent
, 8);
140 firstName
= CdmBase
.deproxy(names
.iterator().next(),NonViralName
.class);
143 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(SpecimenOrObservationType
.PreservedSpecimen
);
145 state
.resetCollectionAndType();
146 state
.setSpecimenType(true);
147 boolean isFullType
= false;
149 while (reader
.hasNext()) {
150 XMLEvent next
= readNoWhitespace(reader
);
151 if (isMyEndingElement(next
, parentEvent
)) {
153 makeSpecimenType(state
, facade
, text
, state
.getCollectionAndType(), firstName
, parentEvent
);
155 state
.setSpecimenType(false);
156 state
.resetCollectionAndType();
158 } else if (isStartingElement(next
, FULL_TYPE
)) {
159 handleAmbigousManually(state
, reader
, next
.asStartElement());
161 } else if (isStartingElement(next
, TYPE_STATUS
)) {
162 handleNotYetImplementedElement(next
);
163 } else if (isStartingElement(next
, GATHERING
)) {
164 handleGathering(state
, reader
, next
, facade
);
165 } else if (isStartingElement(next
, ORIGINAL_DETERMINATION
)) {
166 handleNotYetImplementedElement(next
);
167 } else if (isStartingElement(next
, SPECIMEN_TYPE
)) {
168 handleNotYetImplementedElement(next
);
169 } else if (isStartingElement(next
, COLLECTION_AND_TYPE
)) {
170 String colAndType
= getCData(state
, reader
, next
, true);
171 state
.addCollectionAndType(colAndType
);
172 } else if (isStartingElement(next
, CITATION
)) {
173 handleNotYetImplementedElement(next
);
174 } else if (isStartingElement(next
, NOTES
)) {
175 handleNotYetImplementedElement(next
);
176 } else if (isStartingElement(next
, ANNOTATION
)) {
177 handleNotYetImplementedElement(next
);
178 } else if (next
.isCharacters()) {
179 text
+= next
.asCharacters().getData();
181 handleUnexpectedElement(next
);
184 throw new IllegalStateException("Specimen type has no closing tag");
189 private void makeSpecimenType(MarkupImportState state
, DerivedUnitFacade facade
, String text
, String collectionAndType
,
190 INonViralName name
, XMLEvent parentEvent
) {
192 if (isBlank(text
) || isPunctuation(text
)){
195 String message
= "Text '%s' not handled for <SpecimenType>";
196 this.fireWarningEvent(String
.format(message
, text
), parentEvent
, 4);
199 if (makeFotgSpecimenType(state
, collectionAndType
, facade
, name
, parentEvent
) || state
.getConfig().isUseFotGSpecimenTypeCollectionAndTypeOnly()){
203 if (collectionAndType
.matches("^\\(.*\\)\\.?$")) {
204 collectionAndType
= collectionAndType
.replaceAll("\\.$", "");
205 collectionAndType
= collectionAndType
.substring(1, collectionAndType
.length() - 1);
208 String
[] splitsSemi
= collectionAndType
.split("[;]");
209 for (String splitSemi
: splitsSemi
) {
210 String
[] splitKomma
= splitSemi
.split("[,]");
211 TypeInfo lastTypeInfo
= null;
212 for (String str
: splitKomma
) {
214 boolean addToAllNamesInGroup
= true;
215 TypeInfo typeInfo
= makeSpecimenTypeTypeInfo(state
, str
, lastTypeInfo
, parentEvent
);
216 SpecimenTypeDesignationStatus typeStatus
= typeInfo
.status
;
217 Collection collection
= this.getCollection(state
, typeInfo
.collectionString
);
219 // TODO improve cache strategy handling
220 DerivedUnit typeSpecimen
= facade
.addDuplicate(collection
, null, null, null, null);
221 typeSpecimen
.setCacheStrategy(new DerivedUnitFacadeCacheStrategy());
222 name
.addSpecimenTypeDesignation(typeSpecimen
, typeStatus
, null, null, null, false, addToAllNamesInGroup
);
223 lastTypeInfo
= typeInfo
;
231 private Pattern fotgTypePattern
= null;
233 * Implemented for Flora of the Guyanas this may include duplicated code from similar places
235 * @param collectionAndTypeOrig
241 private boolean makeFotgSpecimenType(MarkupImportState state
, final String collectionAndTypeOrig
, DerivedUnitFacade facade
, INonViralName name
, XMLEvent parentEvent
) {
242 String collectionAndType
= collectionAndTypeOrig
;
244 String notDesignatedRE
= "not\\s+designated";
245 String designatedByRE
= "\\s*\\(((designated\\s+by\\s+|according\\s+to\\s+)[^\\)]+|here\\s+designated)\\)";
246 String typesRE
= "(holotype|isotypes?|neotype|isoneotype|syntype|lectotype|isolectotypes?|typ\\.\\scons\\.,?)";
247 String collectionRE
= "[A-Z\\-]{1,5}!?";
248 String collectionsRE
= String
.format("%s(,\\s+%s)*",collectionRE
, collectionRE
);
249 String addInfoRE
= "(not\\s+seen|(presumed\\s+)?destroyed)";
250 String singleTypeTypeRE
= String
.format("(%s\\s)?%s(,\\s+%s)*", typesRE
, collectionsRE
, addInfoRE
);
251 String allTypesRE
= String
.format("(\\(not\\s+seen\\)|\\(%s([,;]\\s%s)?\\))", singleTypeTypeRE
, singleTypeTypeRE
);
252 String designatedRE
= String
.format("%s(%s)?", allTypesRE
, designatedByRE
);
253 if (fotgTypePattern
== null){
255 String pattern
= String
.format("(%s|%s)", notDesignatedRE
, designatedRE
);
256 fotgTypePattern
= Pattern
.compile(pattern
);
258 Matcher matcher
= fotgTypePattern
.matcher(collectionAndType
);
260 if (matcher
.matches()){
261 if (collectionAndType
.matches(notDesignatedRE
)){
262 SpecimenTypeDesignation desig
= SpecimenTypeDesignation
.NewInstance();
263 desig
.setNotDesignated(true);
264 // name.addSpecimenTypeDesignation(typeSpecimen, status, citation, citationMicroReference, originalNameString, isNotDesignated, addToAllHomotypicNames)
265 name
.addTypeDesignation(desig
, true);
266 }else if(collectionAndType
.matches(designatedRE
)){
267 String designatedBy
= null;
268 Matcher desigMatcher
= Pattern
.compile(designatedByRE
).matcher(collectionAndType
);
269 boolean hasDesignatedBy
= desigMatcher
.find();
270 if (hasDesignatedBy
){
271 designatedBy
= desigMatcher
.group(0);
272 collectionAndType
= collectionAndType
.replace(designatedBy
, "");
276 collectionAndType
= collectionAndType
.substring(1, collectionAndType
.length() -1);
277 List
<String
> singleTypes
= new ArrayList
<String
>();
278 Pattern singleTypePattern
= Pattern
.compile("^" + singleTypeTypeRE
);
279 matcher
= singleTypePattern
.matcher(collectionAndType
);
280 while (matcher
.find()){
281 String match
= matcher
.group(0);
282 singleTypes
.add(match
);
283 collectionAndType
= collectionAndType
.substring(match
.length());
284 if (!collectionAndType
.isEmpty()){
285 collectionAndType
= collectionAndType
.substring(1).trim();
289 matcher
= singleTypePattern
.matcher(collectionAndType
);
292 List
<SpecimenTypeDesignation
> designations
= new ArrayList
<SpecimenTypeDesignation
>();
295 for (String singleTypeOrig
: singleTypes
){
296 String singleType
= singleTypeOrig
;
298 Pattern typePattern
= Pattern
.compile("^" + typesRE
);
299 matcher
= typePattern
.matcher(singleType
);
300 SpecimenTypeDesignationStatus typeStatus
= null;
302 String typeStr
= matcher
.group(0);
303 singleType
= singleType
.substring(typeStr
.length()).trim();
305 typeStatus
= SpecimenTypeParser
.parseSpecimenTypeStatus(typeStr
);
306 } catch (UnknownCdmTypeException e
) {
307 fireWarningEvent("specimen type not recognized. Use generic type instead", parentEvent
, 4);
308 typeStatus
= SpecimenTypeDesignationStatus
.TYPE();
309 //TODO use also type info from state
312 typeStatus
= SpecimenTypeDesignationStatus
.TYPE();
313 //TODO use also type info from state
318 Pattern collectionPattern
= Pattern
.compile("^" + collectionsRE
);
319 matcher
= collectionPattern
.matcher(singleType
);
320 String
[] collectionStrings
= new String
[0];
322 String collectionStr
= matcher
.group(0);
323 singleType
= singleType
.substring(collectionStr
.length());
324 collectionStr
= collectionStr
.replace("(", "").replace(")", "").replaceAll("\\s", "");
325 collectionStrings
= collectionStr
.split(",");
329 if (!singleType
.isEmpty() && singleType
.startsWith(", ")){
330 singleType
= singleType
.substring(2);
333 boolean notSeen
= false;
334 if (singleType
.equals("not seen")){
335 singleType
= singleType
.replace("not seen", "");
338 if (singleType
.startsWith("not seen, ")){
339 singleType
= singleType
.replace("not seen, ", "");
342 boolean destroyed
= false;
343 if (singleType
.equals("destroyed")){
345 singleType
= singleType
.replace("destroyed", "");
347 boolean presumedDestroyed
= false;
348 if (singleType
.equals("presumed destroyed")){
349 presumedDestroyed
= true;
350 singleType
= singleType
.replace("presumed destroyed", "");
352 boolean hasAddInfo
= notSeen
|| destroyed
|| presumedDestroyed
;
355 if (!singleType
.isEmpty()){
356 String message
= "SingleType was not fully read. Remaining: " + singleType
+ ". Original singleType was: " + singleTypeOrig
;
357 fireWarningEvent(message
, parentEvent
, 6);
358 System
.out
.println(message
);
361 if (collectionStrings
.length
> 0){
362 boolean isFirst
= true;
363 for (String collStr
: collectionStrings
){
364 Collection collection
= getCollection(state
, collStr
);
365 DerivedUnit unit
= isFirst ? facade
.innerDerivedUnit()
366 : facade
.addDuplicate(collection
, null, null, null, null);
367 SpecimenTypeDesignation desig
= SpecimenTypeDesignation
.NewInstance();
368 designations
.add(desig
);
369 desig
.setTypeSpecimen(unit
);
370 desig
.setTypeStatus(typeStatus
);
371 handleSpecimenTypeAddInfo(state
, notSeen
, destroyed
,
372 presumedDestroyed
, desig
);
373 name
.addTypeDesignation(desig
, true);
376 }else if (hasAddInfo
){ //handle addInfo if no collection data available
377 SpecimenTypeDesignation desig
= SpecimenTypeDesignation
.NewInstance();
378 designations
.add(desig
);
379 desig
.setTypeStatus(typeStatus
);
380 handleSpecimenTypeAddInfo(state
, notSeen
, destroyed
,
381 presumedDestroyed
, desig
);
382 name
.addTypeDesignation(desig
, true);
384 fireWarningEvent("No type designation could be created as collection info was not recognized", parentEvent
, 4);
388 if (designatedBy
!= null){
389 if (designations
.size() != 1){
390 fireWarningEvent("Size of type designations is not exactly 1, which is expected for 'designated by'", parentEvent
, 2);
392 designatedBy
= designatedBy
.trim();
393 if (designatedBy
.startsWith("(") && designatedBy
.endsWith(")") ){
394 designatedBy
= designatedBy
.substring(1, designatedBy
.length() - 1);
397 for (SpecimenTypeDesignation desig
: designations
){
398 if (designatedBy
.startsWith("designated by")){
399 String titleCache
= designatedBy
.replace("designated by", "").trim();
400 Reference reference
= ReferenceFactory
.newGeneric();
401 reference
.setTitleCache(titleCache
, true);
402 desig
.setCitation(reference
);
403 //in future we could also try to parse it automatically
404 fireWarningEvent("MANUALLY: Designated by should be parsed manually: " + titleCache
, parentEvent
, 1);
405 }else if (designatedBy
.equals("designated here")){
406 Reference ref
= state
.getConfig().getSourceReference();
407 desig
.setCitation(ref
);
408 fireWarningEvent("MANUALLY: Microcitation should be added to 'designated here", parentEvent
, 1);
409 }else if (designatedBy
.startsWith("according to")){
410 String annotationStr
= designatedBy
.replace("according to", "").trim();
411 Annotation annotation
= Annotation
.NewInstance(annotationStr
, AnnotationType
.EDITORIAL(), Language
.ENGLISH());
412 desig
.addAnnotation(annotation
);
414 fireWarningEvent("Designated by does not match known pattern: " + designatedBy
, parentEvent
, 6);
419 fireWarningEvent("CollectionAndType unexpectedly not matching: " + collectionAndTypeOrig
, parentEvent
, 6);
423 if (state
.getConfig().isUseFotGSpecimenTypeCollectionAndTypeOnly()){
424 fireWarningEvent("NO MATCH: " + collectionAndTypeOrig
, parentEvent
, 4);
429 // // remove brackets
430 // if (collectionAndType.matches("^\\(.*\\)\\.?$")) {
431 // collectionAndType = collectionAndType.replaceAll("\\.$", "");
432 // collectionAndType = collectionAndType.substring(1, collectionAndType.length() - 1);
435 // String[] split = collectionAndType.split("[;,]");
436 // for (String str : split) {
438 // boolean addToAllNamesInGroup = true;
439 // TypeInfo typeInfo = makeSpecimenTypeTypeInfo(str, parentEvent);
440 // SpecimenTypeDesignationStatus typeStatus = typeInfo.status;
441 // Collection collection = this.getCollection(state, typeInfo.collectionString);
443 // // TODO improve cache strategy handling
444 // DerivedUnit typeSpecimen = facade.addDuplicate(collection, null, null, null, null);
445 // typeSpecimen.setCacheStrategy(new DerivedUnitFacadeCacheStrategy());
446 // name.addSpecimenTypeDesignation(typeSpecimen, typeStatus, null, null, null, false, addToAllNamesInGroup);
454 * @param presumedDestroyed
457 private void handleSpecimenTypeAddInfo(MarkupImportState state
, boolean notSeen
, boolean destroyed
,
458 boolean presumedDestroyed
, SpecimenTypeDesignation desig
) {
460 UUID uuidNotSeenMarker
= MarkupTransformer
.uuidNotSeen
;
461 MarkerType notSeenMarkerType
= getMarkerType(state
, uuidNotSeenMarker
, "Not seen", "Not seen", null, null);
462 Marker marker
= Marker
.NewInstance(notSeenMarkerType
, true);
463 desig
.addMarker(marker
);
464 fireWarningEvent("not seen not yet implemented", "handleSpecimenTypeAddInfo", 4);
467 UUID uuidDestroyedMarker
= MarkupTransformer
.uuidDestroyed
;
468 MarkerType destroyedMarkerType
= getMarkerType(state
, uuidDestroyedMarker
, "Destroyed", "Destroyed", null, null);
469 Marker marker
= Marker
.NewInstance(destroyedMarkerType
, true);
470 desig
.addMarker(marker
);
471 fireWarningEvent("'destroyed' not yet fully implemented", "handleSpecimenTypeAddInfo", 4);
473 if (presumedDestroyed
){
474 Annotation annotation
= Annotation
.NewInstance("presumably destroyed", Language
.ENGLISH());
475 annotation
.setAnnotationType(AnnotationType
.EDITORIAL());
476 desig
.addAnnotation(annotation
);
481 private TypeInfo
makeSpecimenTypeTypeInfo(MarkupImportState state
, String originalString
, TypeInfo lastTypeInfo
, XMLEvent event
) {
482 TypeInfo result
= new TypeInfo();
483 if ("not designated".equals(originalString
)){
484 result
.notDesignated
= true;
487 List
<String
> knownCollections
= state
.getConfig().getKnownCollections();
488 for (String knownCollection
:knownCollections
){
489 if (originalString
.contains(knownCollection
)){
490 result
.collectionString
= knownCollection
;
491 originalString
= originalString
.replace(knownCollection
, "").trim();
495 String
[] split
= originalString
.split("\\s+");
497 for (String str
: split
) {
498 if (str
.matches(SpecimenTypeParser
.typeTypePattern
)) {
499 SpecimenTypeDesignationStatus status
;
501 status
= SpecimenTypeParser
.parseSpecimenTypeStatus(str
);
502 } catch (UnknownCdmTypeException e
) {
503 String message
= "Specimen type status '%s' not recognized by parser";
504 fireWarningEvent(String
.format(message
, str
), event
, 4);
507 if (result
.status
!= null){
508 String message
= "More than 1 status string found: " + originalString
;
509 fireWarningEvent(message
, event
, 4);
511 result
.status
= status
;
512 } else if (str
.matches(SpecimenTypeParser
.collectionPattern
)) {
513 if (result
.collectionString
!= null){
514 String message
= "More than 1 collection string found: " + originalString
;
515 fireWarningEvent(message
, event
, 4);
517 result
.collectionString
= str
;
519 String message
= "Type part '%s' could not be recognized";
520 fireWarningEvent(String
.format(message
, str
), event
, 2);
522 if (result
.status
== null && lastTypeInfo
!= null && lastTypeInfo
.status
!= null){
523 result
.status
= lastTypeInfo
.status
;
532 private void handleGathering(MarkupImportState state
, XMLEventReader readerOrig
, XMLEvent parentEvent
, DerivedUnitFacade facade
) throws XMLStreamException
{
533 checkNoAttributes(parentEvent
);
534 boolean hasCollector
= false;
535 boolean hasFieldNum
= false;
537 LookAheadEventReader reader
= new LookAheadEventReader(parentEvent
.asStartElement(), readerOrig
);
540 while (reader
.hasNext()) {
541 XMLEvent next
= readNoWhitespace(reader
);
542 if (isMyEndingElement(next
, parentEvent
)) {
544 if (state
.getCurrentCollector() == null){
545 checkMandatoryElement(hasCollector
,parentEvent
.asStartElement(), COLLECTOR
);
547 facade
.setCollector(state
.getCurrentCollector());
550 checkMandatoryElement(hasFieldNum
,parentEvent
.asStartElement(), FIELD_NUM
);
552 }else if (isStartingElement(next
, COLLECTOR
)) {
554 String collectorStr
= getCData(state
, reader
, next
);
555 TeamOrPersonBase
<?
> collector
= createCollector(collectorStr
);
556 facade
.setCollector(collector
);
557 state
.setCurrentCollector(collector
);
558 } else if (isStartingElement(next
, ALTERNATIVE_COLLECTOR
)) {
559 handleNotYetImplementedElement(next
);
560 } else if (isStartingElement(next
, FIELD_NUM
)) {
562 String fieldNumStr
= getCData(state
, reader
, next
);
563 facade
.setFieldNumber(fieldNumStr
);
564 } else if (isStartingElement(next
, ALTERNATIVE_FIELD_NUM
)) {
565 handleAlternativeFieldNumber(state
, reader
, next
, facade
.innerFieldUnit());
566 } else if (isStartingElement(next
, COLLECTION_TYPE_STATUS
)) {
567 handleNotYetImplementedElement(next
);
568 } else if (isStartingElement(next
, COLLECTION_AND_TYPE
)) {
569 handleGatheringCollectionAndType(state
, reader
, next
, facade
);
570 } else if (isStartingElement(next
, ALTERNATIVE_COLLECTION_TYPE_STATUS
)) {
571 handleNotYetImplementedElement(next
);
572 } else if (isStartingElement(next
, SUB_GATHERING
)) {
573 handleNotYetImplementedElement(next
);
574 } else if (isStartingElement(next
, COLLECTION
)) {
575 handleNotYetImplementedElement(next
);
576 } else if (isStartingElement(next
, LOCALITY
)) {
577 handleLocality(state
, reader
, next
, facade
);
578 } else if (isStartingElement(next
, FULL_NAME
)) {
579 Rank defaultRank
= Rank
.SPECIES(); // can be any
580 INonViralName nvn
= createNameByCode(state
, defaultRank
);
581 handleFullName(state
, reader
, nvn
, next
);
582 TaxonNameBase
<?
,?
> name
= TaxonNameBase
.castAndDeproxy(nvn
);
583 DeterminationEvent
.NewInstance(name
, facade
.innerDerivedUnit() != null ? facade
.innerDerivedUnit() : facade
.innerFieldUnit());
584 } else if (isStartingElement(next
, DATES
)) {
585 TimePeriod timePeriod
= handleDates(state
, reader
, next
);
586 facade
.setGatheringPeriod(timePeriod
);
587 } else if (isStartingElement(next
, GATHERING_NOTES
)) {
588 handleAmbigousManually(state
, reader
, next
.asStartElement());
589 } else if (isStartingElement(next
, NOTES
)) {
590 handleNotYetImplementedElement(next
);
591 }else if (next
.isCharacters()) {
592 String text
= next
.asCharacters().getData().trim();
593 if (isPunctuation(text
)){
595 }else if (state
.isSpecimenType() && charIsSimpleType(text
) ){
597 }else if ( (text
.equals("=") || text
.equals("(") ) && reader
.nextIsStart(ALTERNATIVE_FIELD_NUM
)){
599 }else if ( (text
.equals(").") || text
.equals(")")) && reader
.previousWasEnd(ALTERNATIVE_FIELD_NUM
)){
601 }else if ( charIsOpeningOrClosingBracket(text
) ){
602 //for now we don't do anything, however in future brackets may have semantics
605 String message
= "Unrecognized text: %s";
606 fireWarningEvent(String
.format(message
, text
), next
, 6);
609 handleUnexpectedElement(next
);
612 throw new IllegalStateException("Collection has no closing tag.");
617 private final String fotgPattern
= "^\\(([A-Z]{1,3})(?:,\\s?([A-Z]{1,3}))*\\)"; // eg. (US, B, CAN)
618 private void handleGatheringCollectionAndType(MarkupImportState state
, XMLEventReader reader
, XMLEvent parent
, DerivedUnitFacade facade
) throws XMLStreamException
{
619 checkNoAttributes(parent
);
621 XMLEvent next
= readNoWhitespace(reader
);
623 if (next
.isCharacters()){
624 String txt
= next
.asCharacters().getData().trim();
625 if (state
.isSpecimenType()){
626 state
.addCollectionAndType(txt
);
629 Matcher fotgMatcher
= Pattern
.compile(fotgPattern
).matcher(txt
);
631 if (fotgMatcher
.matches()){
632 txt
= txt
.substring(1, txt
.length() - 1); //remove bracket
633 String
[] splits
= txt
.split(",");
634 for (String split
: splits
){
635 Collection collection
= getCollection(state
, split
.trim());
636 if (facade
.innerDerivedUnit() == null){
637 String message
= "Adding a duplicate to a non derived unit based facade is not possible. Please check why no derived unit exists yet in facade!";
638 this.fireWarningEvent(message
, next
, -6);
640 facade
.addDuplicate(collection
, null, null, null, null);
644 //create derived units and and add collections
647 fireWarningEvent("Collection and type pattern for gathering not recognized: " + txt
, next
, 4);
652 fireUnexpectedEvent(next
, 0);
655 if (isMyEndingElement(next
, parent
)){
656 return; //in case we have a completely empty element
658 next
= readNoWhitespace(reader
);
659 if (isMyEndingElement(next
, parent
)){
662 fireUnexpectedEvent(next
, 0);
668 private Collection
getCollection(MarkupImportState state
, String code
) {
669 Collection collection
= state
.getCollectionByCode(code
);
670 if (collection
== null){
671 List
<Collection
> list
= this.docImport
.getCollectionService().searchByCode(code
);
672 if (list
.size() == 1){
673 collection
= list
.get(0);
674 }else if (list
.size() > 1){
675 fireWarningEvent("More then one occurrence for collection " + code
+ " in database. Collection not reused" , "", 1);
678 if (collection
== null){
679 collection
= Collection
.NewInstance();
680 collection
.setCode(code
);
681 this.docImport
.getCollectionService().saveOrUpdate(collection
);
683 state
.putCollectionByCode(code
, collection
);
689 private void handleAlternativeFieldNumber(MarkupImportState state
, XMLEventReader reader
, XMLEvent parent
, FieldUnit fieldUnit
) throws XMLStreamException
{
690 Map
<String
, Attribute
> attrs
= getAttributes(parent
);
691 Boolean doubtful
= this.getAndRemoveBooleanAttributeValue(parent
, attrs
, "doubful", false);
693 //for now we do not handle annotation and typeNotes
694 String altFieldNum
= getCData(state
, reader
, parent
, false).trim();
695 DefinedTerm type
= this.getIdentifierType(state
, MarkupTransformer
.uuidIdentTypeAlternativeFieldNumber
, "Alternative field number", "Alternative field number", "alt. field no.", null);
696 fieldUnit
.addIdentifier(altFieldNum
, type
);
698 fireWarningEvent("Marking alternative field numbers as doubtful not yet possible, see #4673", parent
,4);
699 // Marker.NewInstance(identifier, "true", MarkerType.IS_DOUBTFUL());
705 private boolean charIsOpeningOrClosingBracket(String text
) {
706 return text
.equals("(") || text
.equals(")");
710 private TimePeriod
handleDates(MarkupImportState state
, XMLEventReader reader
, XMLEvent parent
) throws XMLStreamException
{
711 checkNoAttributes(parent
);
712 TimePeriod result
= TimePeriod
.NewInstance();
713 String parseMessage
= "%s can not be parsed: %s";
714 boolean hasFullDate
= false;
715 boolean hasAtomised
= false;
716 boolean hasUnparsedAtomised
= false;
717 while (reader
.hasNext()) {
718 XMLEvent next
= readNoWhitespace(reader
);
719 if (isMyEndingElement(next
, parent
)) {
720 if (! isAlternative(hasFullDate
, hasAtomised
, hasUnparsedAtomised
)){
721 String message
= "Some problems exist when defining the date";
722 fireWarningEvent(message
, parent
, 4);
725 } else if (isStartingElement(next
, FULL_DATE
)) {
726 String fullDate
= getCData(state
, reader
, next
, true);
727 if (fullDate
.endsWith(".")){
728 fullDate
= fullDate
.substring(0, fullDate
.length()-1);
730 result
= TimePeriodParser
.parseString(fullDate
);
731 if (result
.getFreeText() != null){
732 fireWarningEvent(String
.format(parseMessage
, FULL_DATE
, fullDate
), parent
, 1);
735 } else if (isStartingElement(next
, DAY
)) {
736 String day
= getCData(state
, reader
, next
, true).trim();
737 day
= normalizeDate(day
);
738 if (CdmUtils
.isNumeric(day
)){
739 result
.setStartDay(Integer
.valueOf(day
));
742 fireWarningEvent(String
.format(parseMessage
,"Day", day
), parent
, 2);
743 hasUnparsedAtomised
= true;
745 } else if (isStartingElement(next
, MONTH
)) {
746 String month
= getCData(state
, reader
, next
, true).trim();
747 month
= normalizeDate(month
);
748 if (CdmUtils
.isNumeric(month
)){
749 result
.setStartMonth(Integer
.valueOf(month
));
752 fireWarningEvent(String
.format(parseMessage
,"Month", month
), parent
, 2);
753 hasUnparsedAtomised
= true;
755 } else if (isStartingElement(next
, YEAR
)) {
756 String year
= getCData(state
, reader
, next
, true).trim();
757 year
= normalizeDate(year
);
758 if (CdmUtils
.isNumeric(year
)){
759 result
.setStartYear(Integer
.valueOf(year
));
762 fireWarningEvent(String
.format(parseMessage
,"Year", year
), parent
, 2);
763 hasUnparsedAtomised
= true;
766 handleUnexpectedElement(next
);
769 throw new IllegalStateException("Dates has no closing tag.");
773 private String
normalizeDate(String partOfDate
) {
774 if (isBlank(partOfDate
)){
777 partOfDate
= partOfDate
.trim();
778 while (partOfDate
.startsWith("-")){
779 partOfDate
= partOfDate
.substring(1);
785 private boolean isAlternative(boolean first
, boolean second
, boolean third
) {
786 return ( (first ^ second
) && !third
) ||
787 (! first
&& ! second
&& third
) ;
791 private void handleLocality(MarkupImportState state
, XMLEventReader reader
,XMLEvent parentEvent
, DerivedUnitFacade facade
)throws XMLStreamException
{
792 String classValue
= getClassOnlyAttribute(parentEvent
);
793 boolean isLocality
= false;
794 NamedAreaLevel areaLevel
= null;
795 if ("locality".equalsIgnoreCase(classValue
)) {
798 areaLevel
= makeNamedAreaLevel(state
, classValue
, parentEvent
);
803 while (reader
.hasNext()) {
804 XMLEvent next
= readNoWhitespace(reader
);
805 if (isMyEndingElement(next
, parentEvent
)) {
806 if (StringUtils
.isNotBlank(text
)) {
807 text
= normalize(text
);
809 facade
.setLocality(text
, getDefaultLanguage(state
));
811 text
= CdmUtils
.removeTrailingDot(text
);
812 NamedArea area
= makeArea(state
, text
, areaLevel
);
813 facade
.addCollectingArea(area
);
818 }else if (isStartingElement(next
, ALTITUDE
)) {
819 handleNotYetImplementedElement(next
);
820 // homotypicalGroup = handleNom(state, reader, next, taxon,
821 // homotypicalGroup);
822 } else if (isStartingElement(next
, COORDINATES
)) {
823 handleNotYetImplementedElement(next
);
824 } else if (isStartingElement(next
, ANNOTATION
)) {
825 handleNotYetImplementedElement(next
);
826 } else if (next
.isCharacters()) {
827 text
+= next
.asCharacters().getData();
829 handleUnexpectedElement(next
);
832 throw new IllegalStateException("<SpecimenType> has no closing tag");
837 private TeamOrPersonBase
<?
> createCollector(String collectorStr
) {
838 return createAuthor(collectorStr
);
842 public List
<DescriptionElementBase
> handleMaterialsExamined(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
, Feature feature
, TaxonDescription defaultDescription
) throws XMLStreamException
{
843 List
<DescriptionElementBase
> result
= new ArrayList
<DescriptionElementBase
>();
844 //reset current areas
845 state
.removeCurrentAreas();
846 while (reader
.hasNext()) {
847 XMLEvent next
= readNoWhitespace(reader
);
848 if (isMyEndingElement(next
, parentEvent
)) {
849 if (result
.isEmpty()){
850 fireWarningEvent("Materials examined created empty Individual Associations list", parentEvent
, 4);
852 state
.removeCurrentAreas();
854 } else if (isStartingElement(next
, SUB_HEADING
)) {
855 // Map<String, Object> inlineMarkup = new HashMap<String, Object>();
856 String text
= getCData(state
, reader
, next
, true);
857 if (isFeatureHeading(state
, next
, text
)){
858 feature
= makeHeadingFeature(state
, next
, text
, feature
);
860 String message
= "Unhandled subheading: %s";
861 fireWarningEvent(String
.format(message
, text
), next
, 4);
863 // for (String key : inlineMarkup.keySet()){
864 // handleInlineMarkup(state, key, inlineMarkup);
867 } else if (isStartingElement(next
, BR
) || isEndingElement(next
, BR
)) {
869 } else if (isStartingElement(next
, GATHERING
)) {
870 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(SpecimenOrObservationType
.DerivedUnit
);
871 addCurrentAreas(state
, next
, facade
);
872 handleGathering(state
, reader
, next
, facade
);
873 SpecimenOrObservationBase
<?
> specimen
;
874 if (facade
.innerDerivedUnit() != null){
875 specimen
= facade
.innerDerivedUnit();
877 specimen
= facade
.innerFieldUnit();
879 IndividualsAssociation individualsAssociation
= IndividualsAssociation
.NewInstance();
880 individualsAssociation
.setAssociatedSpecimenOrObservation(specimen
);
881 result
.add(individualsAssociation
);
882 } else if (isStartingElement(next
, GATHERING_GROUP
)) {
883 List
<DescriptionElementBase
> list
= getGatheringGroupDescription(state
, reader
, next
);
885 }else if (next
.isCharacters()) {
886 String text
= next
.asCharacters().getData().trim();
887 if (isPunctuation(text
)){
890 String message
= "Unrecognized text: %s";
891 fireWarningEvent(String
.format(message
, text
), next
, 6);
894 handleUnexpectedElement(next
);
897 throw new IllegalStateException("<String> has no closing tag");
903 private List
<DescriptionElementBase
> getGatheringGroupDescription(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
904 Map
<String
, Attribute
> attributes
= getAttributes(parentEvent
);
905 String geoScope
= getAndRemoveAttributeValue(attributes
, "geoscope");
906 Boolean doubtful
= getAndRemoveBooleanAttributeValue(parentEvent
, attributes
, DOUBTFUL
, null);
907 checkNoAttributes(attributes
, parentEvent
);
909 List
<DescriptionElementBase
> result
= new ArrayList
<DescriptionElementBase
>();
912 TaxonDescription td
= null;
914 if (isNotBlank(geoScope
)){
915 NamedArea area
= Country
.getCountryByLabel(geoScope
);
918 area
= state
.getTransformer().getNamedAreaByKey(geoScope
);
919 } catch (Exception e
) {
920 fireWarningEvent("getNamedArea not supported", parentEvent
, 16);
924 fireWarningEvent("Area for geoscope not found: " + geoScope
+"; add specimen group to ordinary description", parentEvent
, 4);
926 state
.addCurrentArea(area
);
927 Set
<TaxonDescription
> descs
= state
.getCurrentTaxon().getDescriptions();
928 for (TaxonDescription desc
: descs
){
929 Set
<NamedArea
> scopes
= desc
.getGeoScopes();
930 if (scopes
.size() == 1 && scopes
.iterator().next().equals(area
)){
936 TaxonDescription desc
= TaxonDescription
.NewInstance(state
.getCurrentTaxon());
937 desc
.addGeoScope(area
);
938 if (doubtful
!= null){
939 desc
.addMarker(Marker
.NewInstance(MarkerType
.IS_DOUBTFUL(), doubtful
));
946 while (reader
.hasNext()) {
947 XMLEvent next
= readNoWhitespace(reader
);
948 if (isMyEndingElement(next
, parentEvent
)) {
949 if (result
.isEmpty()){
950 fireWarningEvent("Gathering group created empty Individual Associations list", parentEvent
, 4);
952 state
.removeCurrentAreas();
954 } else if (isStartingElement(next
, GATHERING
)) {
955 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(SpecimenOrObservationType
.DerivedUnit
);
956 addCurrentAreas(state
, next
, facade
);
957 handleGathering(state
, reader
, next
, facade
);
958 SpecimenOrObservationBase
<?
> specimen
;
959 if (facade
.innerDerivedUnit() != null){
960 specimen
= facade
.innerDerivedUnit();
962 specimen
= facade
.innerFieldUnit();
964 IndividualsAssociation individualsAssociation
= IndividualsAssociation
.NewInstance();
965 individualsAssociation
.setAssociatedSpecimenOrObservation(specimen
);
966 result
.add(individualsAssociation
);
968 }else if (next
.isCharacters()) {
969 String text
= next
.asCharacters().getData().trim();
970 if (isPunctuation(text
)){
974 String message
= "Unrecognized text: %s";
975 fireWarningEvent(String
.format(message
, text
), next
, 6);
978 handleUnexpectedElement(next
);
981 throw new IllegalStateException("<Gathering group> has no closing tag");
985 private void addCurrentAreas(MarkupImportState state
, XMLEvent event
, DerivedUnitFacade facade
) {
986 for (NamedArea area
: state
.getCurrentAreas()){
989 }else if (area
.isInstanceOf(Country
.class)){
990 facade
.setCountry(area
);
992 String message
= "Current area %s is not country. This is not expected for currently known data.";
993 fireWarningEvent(String
.format(message
, area
.getTitleCache()), event
, 2);
994 facade
.addCollectingArea(area
);
1001 // private void handleInlineMarkup(MarkupImportState state, String key, Map<String, Object> inlineMarkup) {
1002 // Object obj = inlineMarkup.get(key);
1003 // if (key.equals(LOCALITY)){
1004 // if (obj instanceof NamedArea){
1005 // NamedArea area = (NamedArea)obj;
1006 // state.addCurrentArea(area);
1014 * Changes the feature if the (sub)-heading implies this. Also recognizes hidden country information
1021 private Feature
makeHeadingFeature(MarkupImportState state
, XMLEvent parent
, String originalText
, Feature feature
) {
1022 //expand, provide by config or service
1023 String materialRegEx
= "Mat[\u00E9\u00C9]riel";
1024 String examinedRegEx
= "[\u00E9\u00C9]tudi[\u00E9\u00C9]";
1025 String countryRegEx
= "(gabonais)";
1026 String postfixCountryRegEx
= "\\s+(pour le Gabon)";
1028 String materialExaminedRegEx
= "(?i)" + materialRegEx
+ "\\s+(" + countryRegEx
+"\\s+)?" + examinedRegEx
+ "(" +postfixCountryRegEx
+ ")?:?";
1030 String text
= originalText
;
1035 if (text
.matches(materialExaminedRegEx
)){
1037 if (text
.contains("gabonais ")){
1038 text
= text
.replace("gabonais ", "");
1039 state
.addCurrentArea(Country
.GABONGABONESEREPUBLIC());
1041 if (text
.contains(" pour le Gabon")){
1042 text
= text
.replace(" pour le Gabon", "");
1043 state
.addCurrentArea(Country
.GABONGABONESEREPUBLIC());
1047 feature
= Feature
.MATERIALS_EXAMINED();
1048 state
.putFeatureToGeneralSorterList(feature
);
1051 String message
= "Heading/Subheading not recognized: %s";
1052 fireWarningEvent(String
.format(message
, originalText
), parent
, 4);
1060 * True if heading or subheading represents feature information
1066 private boolean isFeatureHeading(MarkupImportState state
, XMLEvent parent
, String text
) {
1067 return makeHeadingFeature(state
, parent
, text
, null) != null;
1071 public String
handleInLineGathering(MarkupImportState state
, XMLEventReader reader
, XMLEvent parentEvent
) throws XMLStreamException
{
1072 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(SpecimenOrObservationType
.FieldUnit
);
1073 handleGathering(state
, reader
, parentEvent
, facade
);
1074 SpecimenOrObservationBase
<?
> specimen
= facade
.innerFieldUnit();
1075 if (specimen
== null){
1076 specimen
= facade
.innerDerivedUnit();
1077 String message
= "Inline gaterhing has no field unit";
1078 fireWarningEvent(message
, parentEvent
, 2);
1081 String result
= "<cdm:specimen uuid='%s'>%s</specimen>";
1082 if (specimen
!= null){
1083 result
= String
.format(result
, specimen
.getUuid(), specimen
.getTitleCache());
1085 String message
= "Inline gathering has no specimen";
1086 fireWarningEvent(message
, parentEvent
, 4);
1088 save(specimen
, state
);