ref #6410 Nepenthaceae, handle known collections, issues and fulldates
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupSpecimenImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.markup;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16 import java.util.UUID;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19
20 import javax.xml.stream.XMLEventReader;
21 import javax.xml.stream.XMLStreamException;
22 import javax.xml.stream.events.Attribute;
23 import javax.xml.stream.events.XMLEvent;
24
25 import org.apache.commons.lang.StringUtils;
26 import org.apache.log4j.Logger;
27
28 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
29 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacadeCacheStrategy;
30 import eu.etaxonomy.cdm.common.CdmUtils;
31 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
32 import eu.etaxonomy.cdm.model.common.Annotation;
33 import eu.etaxonomy.cdm.model.common.AnnotationType;
34 import eu.etaxonomy.cdm.model.common.CdmBase;
35 import eu.etaxonomy.cdm.model.common.DefinedTerm;
36 import eu.etaxonomy.cdm.model.common.Language;
37 import eu.etaxonomy.cdm.model.common.Marker;
38 import eu.etaxonomy.cdm.model.common.MarkerType;
39 import eu.etaxonomy.cdm.model.common.TimePeriod;
40 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
41 import eu.etaxonomy.cdm.model.description.Feature;
42 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
43 import eu.etaxonomy.cdm.model.description.TaxonDescription;
44 import eu.etaxonomy.cdm.model.location.Country;
45 import eu.etaxonomy.cdm.model.location.NamedArea;
46 import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
47 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
48 import eu.etaxonomy.cdm.model.name.INonViralName;
49 import eu.etaxonomy.cdm.model.name.NonViralName;
50 import eu.etaxonomy.cdm.model.name.Rank;
51 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
52 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
53 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
54 import eu.etaxonomy.cdm.model.occurrence.Collection;
55 import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
56 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
57 import eu.etaxonomy.cdm.model.occurrence.FieldUnit;
58 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
59 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
60 import eu.etaxonomy.cdm.model.reference.Reference;
61 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
62 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
63 import eu.etaxonomy.cdm.strategy.parser.SpecimenTypeParser;
64 import eu.etaxonomy.cdm.strategy.parser.SpecimenTypeParser.TypeInfo;
65 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
66
67 /**
68 * @author a.mueller
69 * @created 30.05.2012
70 *
71 */
72 public class MarkupSpecimenImport extends MarkupImportBase {
73 @SuppressWarnings("unused")
74 private static final Logger logger = Logger.getLogger(MarkupSpecimenImport.class);
75
76 private static final String ALTERNATIVE_COLLECTION_TYPE_STATUS = "alternativeCollectionTypeStatus";
77 private static final String ALTERNATIVE_COLLECTOR = "alternativeCollector";
78 private static final String ALTERNATIVE_FIELD_NUM = "alternativeFieldNum";
79 private static final String COLLECTOR = "collector";
80 private static final String COLLECTION = "collection";
81 private static final String COLLECTION_AND_TYPE = "collectionAndType";
82 private static final String COLLECTION_TYPE_STATUS = "collectionTypeStatus";
83 private static final String DAY = "day";
84 private static final String DESTROYED = "destroyed";
85 private static final String FIELD_NUM = "fieldNum";
86 private static final String FULL_TYPE = "fullType";
87 private static final String FULL_DATE = "fullDate";
88 private static final String GATHERING_NOTES = "gatheringNotes";
89 private static final String LOST = "lost";
90 private static final String MONTH = "month";
91 private static final String SUB_GATHERING = "subGathering";
92 private static final String NOT_FOUND = "notFound";
93 private static final String NOT_SEEN = "notSeen";
94 private static final String ORIGINAL_DETERMINATION = "originalDetermination";
95
96 private static final String UNKNOWN = "unknown";
97 private static final String YEAR = "year";
98
99
100
101 public MarkupSpecimenImport(MarkupDocumentImport docImport) {
102 super(docImport);
103 }
104
105
106 public void handleSpecimenType(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
107 HomotypicalGroup homotypicalGroup) throws XMLStreamException {
108
109 // attributes
110 Map<String, Attribute> attributes = getAttributes(parentEvent);
111 String typeStatus = getAndRemoveAttributeValue(attributes, TYPE_STATUS);
112 String notSeen = getAndRemoveAttributeValue(attributes, NOT_SEEN);
113 String unknown = getAndRemoveAttributeValue(attributes, UNKNOWN);
114 String notFound = getAndRemoveAttributeValue(attributes, NOT_FOUND);
115 String destroyed = getAndRemoveAttributeValue(attributes, DESTROYED);
116 String lost = getAndRemoveAttributeValue(attributes, LOST);
117 checkNoAttributes(attributes, parentEvent);
118 if (StringUtils.isNotEmpty(typeStatus)) {
119 // TODO
120 // currently not needed
121 fireWarningEvent("Type status not yet used", parentEvent, 4);
122 } else if (StringUtils.isNotEmpty(notSeen)) {
123 handleNotYetImplementedAttribute(attributes, NOT_SEEN);
124 } else if (StringUtils.isNotEmpty(unknown)) {
125 handleNotYetImplementedAttribute(attributes, UNKNOWN);
126 } else if (StringUtils.isNotEmpty(notFound)) {
127 handleNotYetImplementedAttribute(attributes, NOT_FOUND);
128 } else if (StringUtils.isNotEmpty(destroyed)) {
129 handleNotYetImplementedAttribute(attributes, DESTROYED);
130 } else if (StringUtils.isNotEmpty(lost)) {
131 handleNotYetImplementedAttribute(attributes, LOST);
132 }
133
134 INonViralName firstName = null;
135 Set<TaxonNameBase> names = homotypicalGroup.getTypifiedNames();
136 if (names.isEmpty()) {
137 String message = "There is no name in a homotypical group. Can't create the specimen type";
138 fireWarningEvent(message, parentEvent, 8);
139 } else {
140 firstName = CdmBase.deproxy(names.iterator().next(),NonViralName.class);
141 }
142
143 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen);
144 String text = "";
145 state.resetCollectionAndType();
146 state.setSpecimenType(true);
147 boolean isFullType = false;
148 // elements
149 while (reader.hasNext()) {
150 XMLEvent next = readNoWhitespace(reader);
151 if (isMyEndingElement(next, parentEvent)) {
152 if (! isFullType){
153 makeSpecimenType(state, facade, text, state.getCollectionAndType(), firstName, parentEvent);
154 }
155 state.setSpecimenType(false);
156 state.resetCollectionAndType();
157 return;
158 } else if (isStartingElement(next, FULL_TYPE)) {
159 handleAmbigousManually(state, reader, next.asStartElement());
160 isFullType = true;
161 } else if (isStartingElement(next, TYPE_STATUS)) {
162 handleNotYetImplementedElement(next);
163 } else if (isStartingElement(next, GATHERING)) {
164 handleGathering(state, reader, next, facade);
165 } else if (isStartingElement(next, ORIGINAL_DETERMINATION)) {
166 handleNotYetImplementedElement(next);
167 } else if (isStartingElement(next, SPECIMEN_TYPE)) {
168 handleNotYetImplementedElement(next);
169 } else if (isStartingElement(next, COLLECTION_AND_TYPE)) {
170 String colAndType = getCData(state, reader, next, true);
171 state.addCollectionAndType(colAndType);
172 } else if (isStartingElement(next, CITATION)) {
173 handleNotYetImplementedElement(next);
174 } else if (isStartingElement(next, NOTES)) {
175 handleNotYetImplementedElement(next);
176 } else if (isStartingElement(next, ANNOTATION)) {
177 handleNotYetImplementedElement(next);
178 } else if (next.isCharacters()) {
179 text += next.asCharacters().getData();
180 } else {
181 handleUnexpectedElement(next);
182 }
183 }
184 throw new IllegalStateException("Specimen type has no closing tag");
185 }
186
187
188
189 private void makeSpecimenType(MarkupImportState state, DerivedUnitFacade facade, String text, String collectionAndType,
190 INonViralName name, XMLEvent parentEvent) {
191 text = text.trim();
192 if (isBlank(text) || isPunctuation(text)){
193 //do nothing
194 }else{
195 String message = "Text '%s' not handled for <SpecimenType>";
196 this.fireWarningEvent(String.format(message, text), parentEvent, 4);
197 }
198
199 if (makeFotgSpecimenType(state, collectionAndType, facade, name, parentEvent) || state.getConfig().isUseFotGSpecimenTypeCollectionAndTypeOnly()){
200 return;
201 }else{
202 // remove brackets
203 if (collectionAndType.matches("^\\(.*\\)\\.?$")) {
204 collectionAndType = collectionAndType.replaceAll("\\.$", "");
205 collectionAndType = collectionAndType.substring(1, collectionAndType.length() - 1);
206 }
207
208 String[] splitsSemi = collectionAndType.split("[;]");
209 for (String splitSemi : splitsSemi) {
210 String[] splitKomma = splitSemi.split("[,]");
211 TypeInfo lastTypeInfo = null;
212 for (String str : splitKomma) {
213 str = str.trim();
214 boolean addToAllNamesInGroup = true;
215 TypeInfo typeInfo = makeSpecimenTypeTypeInfo(state, str, lastTypeInfo, parentEvent);
216 SpecimenTypeDesignationStatus typeStatus = typeInfo.status;
217 Collection collection = this.getCollection(state, typeInfo.collectionString);
218
219 // TODO improve cache strategy handling
220 DerivedUnit typeSpecimen = facade.addDuplicate(collection, null, null, null, null);
221 typeSpecimen.setCacheStrategy(new DerivedUnitFacadeCacheStrategy());
222 name.addSpecimenTypeDesignation(typeSpecimen, typeStatus, null, null, null, false, addToAllNamesInGroup);
223 lastTypeInfo = typeInfo;
224 }
225 }
226 }
227
228 }
229
230
231 private Pattern fotgTypePattern = null;
232 /**
233 * Implemented for Flora of the Guyanas this may include duplicated code from similar places
234 * @param state
235 * @param collectionAndTypeOrig
236 * @param facade
237 * @param name
238 * @param parentEvent
239 * @return
240 */
241 private boolean makeFotgSpecimenType(MarkupImportState state, final String collectionAndTypeOrig, DerivedUnitFacade facade, INonViralName name, XMLEvent parentEvent) {
242 String collectionAndType = collectionAndTypeOrig;
243
244 String notDesignatedRE = "not\\s+designated";
245 String designatedByRE = "\\s*\\(((designated\\s+by\\s+|according\\s+to\\s+)[^\\)]+|here\\s+designated)\\)";
246 String typesRE = "(holotype|isotypes?|neotype|isoneotype|syntype|lectotype|isolectotypes?|typ\\.\\scons\\.,?)";
247 String collectionRE = "[A-Z\\-]{1,5}!?";
248 String collectionsRE = String.format("%s(,\\s+%s)*",collectionRE, collectionRE);
249 String addInfoRE = "(not\\s+seen|(presumed\\s+)?destroyed)";
250 String singleTypeTypeRE = String.format("(%s\\s)?%s(,\\s+%s)*", typesRE, collectionsRE, addInfoRE);
251 String allTypesRE = String.format("(\\(not\\s+seen\\)|\\(%s([,;]\\s%s)?\\))", singleTypeTypeRE, singleTypeTypeRE);
252 String designatedRE = String.format("%s(%s)?", allTypesRE, designatedByRE);
253 if (fotgTypePattern == null){
254
255 String pattern = String.format("(%s|%s)", notDesignatedRE, designatedRE );
256 fotgTypePattern = Pattern.compile(pattern);
257 }
258 Matcher matcher = fotgTypePattern.matcher(collectionAndType);
259
260 if (matcher.matches()){
261 if (collectionAndType.matches(notDesignatedRE)){
262 SpecimenTypeDesignation desig = SpecimenTypeDesignation.NewInstance();
263 desig.setNotDesignated(true);
264 // name.addSpecimenTypeDesignation(typeSpecimen, status, citation, citationMicroReference, originalNameString, isNotDesignated, addToAllHomotypicNames)
265 name.addTypeDesignation(desig, true);
266 }else if(collectionAndType.matches(designatedRE)){
267 String designatedBy = null;
268 Matcher desigMatcher = Pattern.compile(designatedByRE).matcher(collectionAndType);
269 boolean hasDesignatedBy = desigMatcher.find();
270 if (hasDesignatedBy){
271 designatedBy = desigMatcher.group(0);
272 collectionAndType = collectionAndType.replace(designatedBy, "");
273 }
274
275 //remove brackets
276 collectionAndType = collectionAndType.substring(1, collectionAndType.length() -1);
277 List<String> singleTypes = new ArrayList<String>();
278 Pattern singleTypePattern = Pattern.compile("^" + singleTypeTypeRE);
279 matcher = singleTypePattern.matcher(collectionAndType);
280 while (matcher.find()){
281 String match = matcher.group(0);
282 singleTypes.add(match);
283 collectionAndType = collectionAndType.substring(match.length());
284 if (!collectionAndType.isEmpty()){
285 collectionAndType = collectionAndType.substring(1).trim();
286 }else{
287 break;
288 }
289 matcher = singleTypePattern.matcher(collectionAndType);
290 }
291
292 List<SpecimenTypeDesignation> designations = new ArrayList<SpecimenTypeDesignation>();
293
294 //single types
295 for (String singleTypeOrig : singleTypes){
296 String singleType = singleTypeOrig;
297 //type
298 Pattern typePattern = Pattern.compile("^" + typesRE);
299 matcher = typePattern.matcher(singleType);
300 SpecimenTypeDesignationStatus typeStatus = null;
301 if (matcher.find()){
302 String typeStr = matcher.group(0);
303 singleType = singleType.substring(typeStr.length()).trim();
304 try {
305 typeStatus = SpecimenTypeParser.parseSpecimenTypeStatus(typeStr);
306 } catch (UnknownCdmTypeException e) {
307 fireWarningEvent("specimen type not recognized. Use generic type instead", parentEvent, 4);
308 typeStatus = SpecimenTypeDesignationStatus.TYPE();
309 //TODO use also type info from state
310 }
311 }else{
312 typeStatus = SpecimenTypeDesignationStatus.TYPE();
313 //TODO use also type info from state
314 }
315
316
317 //collection
318 Pattern collectionPattern = Pattern.compile("^" + collectionsRE);
319 matcher = collectionPattern.matcher(singleType);
320 String[] collectionStrings = new String[0];
321 if (matcher.find()){
322 String collectionStr = matcher.group(0);
323 singleType = singleType.substring(collectionStr.length());
324 collectionStr = collectionStr.replace("(", "").replace(")", "").replaceAll("\\s", "");
325 collectionStrings = collectionStr.split(",");
326 }
327
328 //addInfo
329 if (!singleType.isEmpty() && singleType.startsWith(", ")){
330 singleType = singleType.substring(2);
331 }
332
333 boolean notSeen = false;
334 if (singleType.equals("not seen")){
335 singleType = singleType.replace("not seen", "");
336 notSeen = true;
337 }
338 if (singleType.startsWith("not seen, ")){
339 singleType = singleType.replace("not seen, ", "");
340 notSeen = true;
341 }
342 boolean destroyed = false;
343 if (singleType.equals("destroyed")){
344 destroyed = true;
345 singleType = singleType.replace("destroyed", "");
346 }
347 boolean presumedDestroyed = false;
348 if (singleType.equals("presumed destroyed")){
349 presumedDestroyed = true;
350 singleType = singleType.replace("presumed destroyed", "");
351 }
352 boolean hasAddInfo = notSeen || destroyed || presumedDestroyed;
353
354
355 if (!singleType.isEmpty()){
356 String message = "SingleType was not fully read. Remaining: " + singleType + ". Original singleType was: " + singleTypeOrig;
357 fireWarningEvent(message, parentEvent, 6);
358 System.out.println(message);
359 }
360
361 if (collectionStrings.length > 0){
362 boolean isFirst = true;
363 for (String collStr : collectionStrings){
364 Collection collection = getCollection(state, collStr);
365 DerivedUnit unit = isFirst ? facade.innerDerivedUnit()
366 : facade.addDuplicate(collection, null, null, null, null);
367 SpecimenTypeDesignation desig = SpecimenTypeDesignation.NewInstance();
368 designations.add(desig);
369 desig.setTypeSpecimen(unit);
370 desig.setTypeStatus(typeStatus);
371 handleSpecimenTypeAddInfo(state, notSeen, destroyed,
372 presumedDestroyed, desig);
373 name.addTypeDesignation(desig, true);
374 isFirst = false;
375 }
376 }else if (hasAddInfo){ //handle addInfo if no collection data available
377 SpecimenTypeDesignation desig = SpecimenTypeDesignation.NewInstance();
378 designations.add(desig);
379 desig.setTypeStatus(typeStatus);
380 handleSpecimenTypeAddInfo(state, notSeen, destroyed,
381 presumedDestroyed, desig);
382 name.addTypeDesignation(desig, true);
383 }else{
384 fireWarningEvent("No type designation could be created as collection info was not recognized", parentEvent, 4);
385 }
386 }
387
388 if (designatedBy != null){
389 if (designations.size() != 1){
390 fireWarningEvent("Size of type designations is not exactly 1, which is expected for 'designated by'", parentEvent, 2);
391 }
392 designatedBy = designatedBy.trim();
393 if (designatedBy.startsWith("(") && designatedBy.endsWith(")") ){
394 designatedBy = designatedBy.substring(1, designatedBy.length() - 1);
395 }
396
397 for (SpecimenTypeDesignation desig : designations){
398 if (designatedBy.startsWith("designated by")){
399 String titleCache = designatedBy.replace("designated by", "").trim();
400 Reference reference = ReferenceFactory.newGeneric();
401 reference.setTitleCache(titleCache, true);
402 desig.setCitation(reference);
403 //in future we could also try to parse it automatically
404 fireWarningEvent("MANUALLY: Designated by should be parsed manually: " + titleCache, parentEvent, 1);
405 }else if (designatedBy.equals("designated here")){
406 Reference ref = state.getConfig().getSourceReference();
407 desig.setCitation(ref);
408 fireWarningEvent("MANUALLY: Microcitation should be added to 'designated here", parentEvent, 1);
409 }else if (designatedBy.startsWith("according to")){
410 String annotationStr = designatedBy.replace("according to", "").trim();
411 Annotation annotation = Annotation.NewInstance(annotationStr, AnnotationType.EDITORIAL(), Language.ENGLISH());
412 desig.addAnnotation(annotation);
413 }else{
414 fireWarningEvent("Designated by does not match known pattern: " + designatedBy, parentEvent, 6);
415 }
416 }
417 }
418 }else{
419 fireWarningEvent("CollectionAndType unexpectedly not matching: " + collectionAndTypeOrig, parentEvent, 6);
420 }
421 return true;
422 }else{
423 if (state.getConfig().isUseFotGSpecimenTypeCollectionAndTypeOnly()){
424 fireWarningEvent("NO MATCH: " + collectionAndTypeOrig, parentEvent, 4);
425 }
426 return false;
427 }
428
429 // // remove brackets
430 // if (collectionAndType.matches("^\\(.*\\)\\.?$")) {
431 // collectionAndType = collectionAndType.replaceAll("\\.$", "");
432 // collectionAndType = collectionAndType.substring(1, collectionAndType.length() - 1);
433 // }
434 //
435 // String[] split = collectionAndType.split("[;,]");
436 // for (String str : split) {
437 // str = str.trim();
438 // boolean addToAllNamesInGroup = true;
439 // TypeInfo typeInfo = makeSpecimenTypeTypeInfo(str, parentEvent);
440 // SpecimenTypeDesignationStatus typeStatus = typeInfo.status;
441 // Collection collection = this.getCollection(state, typeInfo.collectionString);
442 //
443 // // TODO improve cache strategy handling
444 // DerivedUnit typeSpecimen = facade.addDuplicate(collection, null, null, null, null);
445 // typeSpecimen.setCacheStrategy(new DerivedUnitFacadeCacheStrategy());
446 // name.addSpecimenTypeDesignation(typeSpecimen, typeStatus, null, null, null, false, addToAllNamesInGroup);
447 // }
448 }
449
450
451 /**
452 * @param notSeen
453 * @param destroyed
454 * @param presumedDestroyed
455 * @param desig
456 */
457 private void handleSpecimenTypeAddInfo(MarkupImportState state, boolean notSeen, boolean destroyed,
458 boolean presumedDestroyed, SpecimenTypeDesignation desig) {
459 if (notSeen){
460 UUID uuidNotSeenMarker = MarkupTransformer.uuidNotSeen;
461 MarkerType notSeenMarkerType = getMarkerType(state, uuidNotSeenMarker, "Not seen", "Not seen", null, null);
462 Marker marker = Marker.NewInstance(notSeenMarkerType, true);
463 desig.addMarker(marker);
464 fireWarningEvent("not seen not yet implemented", "handleSpecimenTypeAddInfo", 4);
465 }
466 if (destroyed){
467 UUID uuidDestroyedMarker = MarkupTransformer.uuidDestroyed;
468 MarkerType destroyedMarkerType = getMarkerType(state, uuidDestroyedMarker, "Destroyed", "Destroyed", null, null);
469 Marker marker = Marker.NewInstance(destroyedMarkerType, true);
470 desig.addMarker(marker);
471 fireWarningEvent("'destroyed' not yet fully implemented", "handleSpecimenTypeAddInfo", 4);
472 }
473 if (presumedDestroyed){
474 Annotation annotation = Annotation.NewInstance("presumably destroyed", Language.ENGLISH());
475 annotation.setAnnotationType(AnnotationType.EDITORIAL());
476 desig.addAnnotation(annotation);
477 }
478 }
479
480
481 private TypeInfo makeSpecimenTypeTypeInfo(MarkupImportState state, String originalString, TypeInfo lastTypeInfo, XMLEvent event) {
482 TypeInfo result = new TypeInfo();
483 if ("not designated".equals(originalString)){
484 result.notDesignated = true;
485 return result;
486 }
487 List<String> knownCollections = state.getConfig().getKnownCollections();
488 for (String knownCollection:knownCollections){
489 if (originalString.contains(knownCollection)){
490 result.collectionString = knownCollection;
491 originalString = originalString.replace(knownCollection, "").trim();
492 break;
493 }
494 }
495 String[] split = originalString.split("\\s+");
496
497 for (String str : split) {
498 if (str.matches(SpecimenTypeParser.typeTypePattern)) {
499 SpecimenTypeDesignationStatus status;
500 try {
501 status = SpecimenTypeParser.parseSpecimenTypeStatus(str);
502 } catch (UnknownCdmTypeException e) {
503 String message = "Specimen type status '%s' not recognized by parser";
504 fireWarningEvent(String.format(message, str), event, 4);
505 status = null;
506 }
507 if (result.status != null){
508 String message = "More than 1 status string found: " + originalString;
509 fireWarningEvent(message, event, 4);
510 }
511 result.status = status;
512 } else if (str.matches(SpecimenTypeParser.collectionPattern)) {
513 if (result.collectionString != null){
514 String message = "More than 1 collection string found: " + originalString;
515 fireWarningEvent(message, event, 4);
516 }
517 result.collectionString = str;
518 } else {
519 String message = "Type part '%s' could not be recognized";
520 fireWarningEvent(String.format(message, str), event, 2);
521 }
522 if (result.status == null && lastTypeInfo != null && lastTypeInfo.status != null){
523 result.status = lastTypeInfo.status;
524 }
525
526 }
527
528 return result;
529 }
530
531
532 private void handleGathering(MarkupImportState state, XMLEventReader readerOrig, XMLEvent parentEvent , DerivedUnitFacade facade) throws XMLStreamException {
533 checkNoAttributes(parentEvent);
534 boolean hasCollector = false;
535 boolean hasFieldNum = false;
536
537 LookAheadEventReader reader = new LookAheadEventReader(parentEvent.asStartElement(), readerOrig);
538
539 // elements
540 while (reader.hasNext()) {
541 XMLEvent next = readNoWhitespace(reader);
542 if (isMyEndingElement(next, parentEvent)) {
543 if (! hasCollector){
544 if (state.getCurrentCollector() == null){
545 checkMandatoryElement(hasCollector,parentEvent.asStartElement(), COLLECTOR);
546 }else{
547 facade.setCollector(state.getCurrentCollector());
548 }
549 }
550 checkMandatoryElement(hasFieldNum,parentEvent.asStartElement(), FIELD_NUM);
551 return;
552 }else if (isStartingElement(next, COLLECTOR)) {
553 hasCollector = true;
554 String collectorStr = getCData(state, reader, next);
555 TeamOrPersonBase<?> collector = createCollector(collectorStr);
556 facade.setCollector(collector);
557 state.setCurrentCollector(collector);
558 } else if (isStartingElement(next, ALTERNATIVE_COLLECTOR)) {
559 handleNotYetImplementedElement(next);
560 } else if (isStartingElement(next, FIELD_NUM)) {
561 hasFieldNum = true;
562 String fieldNumStr = getCData(state, reader, next);
563 facade.setFieldNumber(fieldNumStr);
564 } else if (isStartingElement(next, ALTERNATIVE_FIELD_NUM)) {
565 handleAlternativeFieldNumber(state, reader, next, facade.innerFieldUnit());
566 } else if (isStartingElement(next, COLLECTION_TYPE_STATUS)) {
567 handleNotYetImplementedElement(next);
568 } else if (isStartingElement(next, COLLECTION_AND_TYPE)) {
569 handleGatheringCollectionAndType(state, reader, next, facade);
570 } else if (isStartingElement(next, ALTERNATIVE_COLLECTION_TYPE_STATUS)) {
571 handleNotYetImplementedElement(next);
572 } else if (isStartingElement(next, SUB_GATHERING)) {
573 handleNotYetImplementedElement(next);
574 } else if (isStartingElement(next, COLLECTION)) {
575 handleNotYetImplementedElement(next);
576 } else if (isStartingElement(next, LOCALITY)) {
577 handleLocality(state, reader, next, facade);
578 } else if (isStartingElement(next, FULL_NAME)) {
579 Rank defaultRank = Rank.SPECIES(); // can be any
580 INonViralName nvn = createNameByCode(state, defaultRank);
581 handleFullName(state, reader, nvn, next);
582 TaxonNameBase<?,?> name = TaxonNameBase.castAndDeproxy(nvn);
583 DeterminationEvent.NewInstance(name, facade.innerDerivedUnit() != null ? facade.innerDerivedUnit() : facade.innerFieldUnit());
584 } else if (isStartingElement(next, DATES)) {
585 TimePeriod timePeriod = handleDates(state, reader, next);
586 facade.setGatheringPeriod(timePeriod);
587 } else if (isStartingElement(next, GATHERING_NOTES)) {
588 handleAmbigousManually(state, reader, next.asStartElement());
589 } else if (isStartingElement(next, NOTES)) {
590 handleNotYetImplementedElement(next);
591 }else if (next.isCharacters()) {
592 String text = next.asCharacters().getData().trim();
593 if (isPunctuation(text)){
594 //do nothing
595 }else if (state.isSpecimenType() && charIsSimpleType(text) ){
596 //do nothing
597 }else if ( (text.equals("=") || text.equals("(") ) && reader.nextIsStart(ALTERNATIVE_FIELD_NUM)){
598 //do nothing
599 }else if ( (text.equals(").") || text.equals(")")) && reader.previousWasEnd(ALTERNATIVE_FIELD_NUM)){
600 //do nothing
601 }else if ( charIsOpeningOrClosingBracket(text) ){
602 //for now we don't do anything, however in future brackets may have semantics
603 }else{
604 //TODO
605 String message = "Unrecognized text: %s";
606 fireWarningEvent(String.format(message, text), next, 6);
607 }
608 } else {
609 handleUnexpectedElement(next);
610 }
611 }
612 throw new IllegalStateException("Collection has no closing tag.");
613
614 }
615
616
617 private final String fotgPattern = "^\\(([A-Z]{1,3})(?:,\\s?([A-Z]{1,3}))*\\)"; // eg. (US, B, CAN)
618 private void handleGatheringCollectionAndType(MarkupImportState state, XMLEventReader reader, XMLEvent parent, DerivedUnitFacade facade) throws XMLStreamException {
619 checkNoAttributes(parent);
620
621 XMLEvent next = readNoWhitespace(reader);
622
623 if (next.isCharacters()){
624 String txt = next.asCharacters().getData().trim();
625 if (state.isSpecimenType()){
626 state.addCollectionAndType(txt);
627 }else{
628
629 Matcher fotgMatcher = Pattern.compile(fotgPattern).matcher(txt);
630
631 if (fotgMatcher.matches()){
632 txt = txt.substring(1, txt.length() - 1); //remove bracket
633 String[] splits = txt.split(",");
634 for (String split : splits ){
635 Collection collection = getCollection(state, split.trim());
636 if (facade.innerDerivedUnit() == null){
637 String message = "Adding a duplicate to a non derived unit based facade is not possible. Please check why no derived unit exists yet in facade!";
638 this.fireWarningEvent(message, next, -6);
639 }else{
640 facade.addDuplicate(collection, null, null, null, null);
641 }
642 }
643 //FIXME 9
644 //create derived units and and add collections
645
646 }else{
647 fireWarningEvent("Collection and type pattern for gathering not recognized: " + txt, next, 4);
648 }
649 }
650
651 }else{
652 fireUnexpectedEvent(next, 0);
653 }
654
655 if (isMyEndingElement(next, parent)){
656 return; //in case we have a completely empty element
657 }
658 next = readNoWhitespace(reader);
659 if (isMyEndingElement(next, parent)){
660 return;
661 }else{
662 fireUnexpectedEvent(next, 0);
663 return;
664 }
665 }
666
667
668 private Collection getCollection(MarkupImportState state, String code) {
669 Collection collection = state.getCollectionByCode(code);
670 if (collection == null){
671 List<Collection> list = this.docImport.getCollectionService().searchByCode(code);
672 if (list.size() == 1){
673 collection = list.get(0);
674 }else if (list.size() > 1){
675 fireWarningEvent("More then one occurrence for collection " + code + " in database. Collection not reused" , "", 1);
676 }
677
678 if (collection == null){
679 collection = Collection.NewInstance();
680 collection.setCode(code);
681 this.docImport.getCollectionService().saveOrUpdate(collection);
682 }
683 state.putCollectionByCode(code, collection);
684 }
685 return collection;
686 }
687
688
689 private void handleAlternativeFieldNumber(MarkupImportState state, XMLEventReader reader, XMLEvent parent, FieldUnit fieldUnit) throws XMLStreamException {
690 Map<String, Attribute> attrs = getAttributes(parent);
691 Boolean doubtful = this.getAndRemoveBooleanAttributeValue(parent, attrs, "doubful", false);
692
693 //for now we do not handle annotation and typeNotes
694 String altFieldNum = getCData(state, reader, parent, false).trim();
695 DefinedTerm type = this.getIdentifierType(state, MarkupTransformer.uuidIdentTypeAlternativeFieldNumber, "Alternative field number", "Alternative field number", "alt. field no.", null);
696 fieldUnit.addIdentifier(altFieldNum, type);
697 if (doubtful){
698 fireWarningEvent("Marking alternative field numbers as doubtful not yet possible, see #4673", parent,4);
699 // Marker.NewInstance(identifier, "true", MarkerType.IS_DOUBTFUL());
700 }
701
702 }
703
704
705 private boolean charIsOpeningOrClosingBracket(String text) {
706 return text.equals("(") || text.equals(")");
707 }
708
709
710 private TimePeriod handleDates(MarkupImportState state, XMLEventReader reader, XMLEvent parent) throws XMLStreamException {
711 checkNoAttributes(parent);
712 TimePeriod result = TimePeriod.NewInstance();
713 String parseMessage = "%s can not be parsed: %s";
714 boolean hasFullDate = false;
715 boolean hasAtomised = false;
716 boolean hasUnparsedAtomised = false;
717 while (reader.hasNext()) {
718 XMLEvent next = readNoWhitespace(reader);
719 if (isMyEndingElement(next, parent)) {
720 if (! isAlternative(hasFullDate, hasAtomised, hasUnparsedAtomised)){
721 String message = "Some problems exist when defining the date";
722 fireWarningEvent(message, parent, 4);
723 }
724 return result;
725 } else if (isStartingElement(next, FULL_DATE)) {
726 String fullDate = getCData(state, reader, next, true);
727 if (fullDate.endsWith(".")){
728 fullDate = fullDate.substring(0, fullDate.length()-1);
729 }
730 result = TimePeriodParser.parseString(fullDate);
731 if (result.getFreeText() != null){
732 fireWarningEvent(String.format(parseMessage, FULL_DATE, fullDate), parent, 1);
733 }
734 hasFullDate = true;
735 } else if (isStartingElement(next, DAY)) {
736 String day = getCData(state, reader, next, true).trim();
737 day = normalizeDate(day);
738 if (CdmUtils.isNumeric(day)){
739 result.setStartDay(Integer.valueOf(day));
740 hasAtomised = true;
741 }else{
742 fireWarningEvent(String.format(parseMessage,"Day", day), parent, 2);
743 hasUnparsedAtomised = true;
744 }
745 } else if (isStartingElement(next, MONTH)) {
746 String month = getCData(state, reader, next, true).trim();
747 month = normalizeDate(month);
748 if (CdmUtils.isNumeric(month)){
749 result.setStartMonth(Integer.valueOf(month));
750 hasAtomised = true;
751 }else{
752 fireWarningEvent(String.format(parseMessage,"Month", month), parent, 2);
753 hasUnparsedAtomised = true;
754 }
755 } else if (isStartingElement(next, YEAR)) {
756 String year = getCData(state, reader, next, true).trim();
757 year = normalizeDate(year);
758 if (CdmUtils.isNumeric(year)){
759 result.setStartYear(Integer.valueOf(year));
760 hasAtomised = true;
761 }else{
762 fireWarningEvent(String.format(parseMessage,"Year", year), parent, 2);
763 hasUnparsedAtomised = true;
764 }
765 } else {
766 handleUnexpectedElement(next);
767 }
768 }
769 throw new IllegalStateException("Dates has no closing tag.");
770 }
771
772
773 private String normalizeDate(String partOfDate) {
774 if (isBlank(partOfDate)){
775 return null;
776 }
777 partOfDate = partOfDate.trim();
778 while (partOfDate.startsWith("-")){
779 partOfDate = partOfDate.substring(1);
780 }
781 return partOfDate;
782 }
783
784
785 private boolean isAlternative(boolean first, boolean second, boolean third) {
786 return ( (first ^ second) && !third) ||
787 (! first && ! second && third) ;
788 }
789
790
791 private void handleLocality(MarkupImportState state, XMLEventReader reader,XMLEvent parentEvent, DerivedUnitFacade facade)throws XMLStreamException {
792 String classValue = getClassOnlyAttribute(parentEvent);
793 boolean isLocality = false;
794 NamedAreaLevel areaLevel = null;
795 if ("locality".equalsIgnoreCase(classValue)) {
796 isLocality = true;
797 } else {
798 areaLevel = makeNamedAreaLevel(state, classValue, parentEvent);
799 }
800
801 String text = "";
802 // elements
803 while (reader.hasNext()) {
804 XMLEvent next = readNoWhitespace(reader);
805 if (isMyEndingElement(next, parentEvent)) {
806 if (StringUtils.isNotBlank(text)) {
807 text = normalize(text);
808 if (isLocality) {
809 facade.setLocality(text, getDefaultLanguage(state));
810 } else {
811 text = CdmUtils.removeTrailingDot(text);
812 NamedArea area = makeArea(state, text, areaLevel);
813 facade.addCollectingArea(area);
814 }
815 }
816 // TODO
817 return;
818 }else if (isStartingElement(next, ALTITUDE)) {
819 handleNotYetImplementedElement(next);
820 // homotypicalGroup = handleNom(state, reader, next, taxon,
821 // homotypicalGroup);
822 } else if (isStartingElement(next, COORDINATES)) {
823 handleNotYetImplementedElement(next);
824 } else if (isStartingElement(next, ANNOTATION)) {
825 handleNotYetImplementedElement(next);
826 } else if (next.isCharacters()) {
827 text += next.asCharacters().getData();
828 } else {
829 handleUnexpectedElement(next);
830 }
831 }
832 throw new IllegalStateException("<SpecimenType> has no closing tag");
833 }
834
835
836
837 private TeamOrPersonBase<?> createCollector(String collectorStr) {
838 return createAuthor(collectorStr);
839 }
840
841
842 public List<DescriptionElementBase> handleMaterialsExamined(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent, Feature feature, TaxonDescription defaultDescription) throws XMLStreamException {
843 List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
844 //reset current areas
845 state.removeCurrentAreas();
846 while (reader.hasNext()) {
847 XMLEvent next = readNoWhitespace(reader);
848 if (isMyEndingElement(next, parentEvent)) {
849 if (result.isEmpty()){
850 fireWarningEvent("Materials examined created empty Individual Associations list", parentEvent, 4);
851 }
852 state.removeCurrentAreas();
853 return result;
854 } else if (isStartingElement(next, SUB_HEADING)) {
855 // Map<String, Object> inlineMarkup = new HashMap<String, Object>();
856 String text = getCData(state, reader, next, true);
857 if (isFeatureHeading(state, next, text)){
858 feature = makeHeadingFeature(state, next, text, feature);
859 }else{
860 String message = "Unhandled subheading: %s";
861 fireWarningEvent(String.format(message, text), next, 4);
862 }
863 // for (String key : inlineMarkup.keySet()){
864 // handleInlineMarkup(state, key, inlineMarkup);
865 // }
866
867 } else if (isStartingElement(next, BR) || isEndingElement(next, BR)) {
868 //do nothing
869 } else if (isStartingElement(next, GATHERING)) {
870 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.DerivedUnit);
871 addCurrentAreas(state, next, facade);
872 handleGathering(state, reader, next, facade);
873 SpecimenOrObservationBase<?> specimen;
874 if (facade.innerDerivedUnit() != null){
875 specimen = facade.innerDerivedUnit();
876 }else{
877 specimen = facade.innerFieldUnit();
878 }
879 IndividualsAssociation individualsAssociation = IndividualsAssociation.NewInstance();
880 individualsAssociation.setAssociatedSpecimenOrObservation(specimen);
881 result.add(individualsAssociation);
882 } else if (isStartingElement(next, GATHERING_GROUP)) {
883 List<DescriptionElementBase> list = getGatheringGroupDescription(state, reader, next);
884 result.addAll(list);
885 }else if (next.isCharacters()) {
886 String text = next.asCharacters().getData().trim();
887 if (isPunctuation(text)){
888 //do nothing
889 }else{
890 String message = "Unrecognized text: %s";
891 fireWarningEvent(String.format(message, text), next, 6);
892 }
893 } else {
894 handleUnexpectedElement(next);
895 }
896 }
897 throw new IllegalStateException("<String> has no closing tag");
898
899 }
900
901
902
903 private List<DescriptionElementBase> getGatheringGroupDescription(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
904 Map<String, Attribute> attributes = getAttributes(parentEvent);
905 String geoScope = getAndRemoveAttributeValue(attributes, "geoscope");
906 Boolean doubtful = getAndRemoveBooleanAttributeValue(parentEvent, attributes, DOUBTFUL, null);
907 checkNoAttributes(attributes, parentEvent);
908
909 List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
910
911
912 TaxonDescription td = null;
913
914 if (isNotBlank(geoScope)){
915 NamedArea area = Country.getCountryByLabel(geoScope);
916 if (area == null){
917 try {
918 area = state.getTransformer().getNamedAreaByKey(geoScope);
919 } catch (Exception e) {
920 fireWarningEvent("getNamedArea not supported", parentEvent, 16);
921 }
922 }
923 if (area == null){
924 fireWarningEvent("Area for geoscope not found: " + geoScope +"; add specimen group to ordinary description", parentEvent, 4);
925 }else{
926 state.addCurrentArea(area);
927 Set<TaxonDescription> descs = state.getCurrentTaxon().getDescriptions();
928 for (TaxonDescription desc : descs){
929 Set<NamedArea> scopes = desc.getGeoScopes();
930 if (scopes.size() == 1 && scopes.iterator().next().equals(area)){
931 td = desc;
932 break;
933 }
934 }
935 if (td == null){
936 TaxonDescription desc = TaxonDescription.NewInstance(state.getCurrentTaxon());
937 desc.addGeoScope(area);
938 if (doubtful != null){
939 desc.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), doubtful));
940 }
941 td = desc;
942 }
943 }
944 }
945
946 while (reader.hasNext()) {
947 XMLEvent next = readNoWhitespace(reader);
948 if (isMyEndingElement(next, parentEvent)) {
949 if (result.isEmpty()){
950 fireWarningEvent("Gathering group created empty Individual Associations list", parentEvent, 4);
951 }
952 state.removeCurrentAreas();
953 return result;
954 } else if (isStartingElement(next, GATHERING)) {
955 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.DerivedUnit);
956 addCurrentAreas(state, next, facade);
957 handleGathering(state, reader, next, facade);
958 SpecimenOrObservationBase<?> specimen;
959 if (facade.innerDerivedUnit() != null){
960 specimen = facade.innerDerivedUnit();
961 }else{
962 specimen = facade.innerFieldUnit();
963 }
964 IndividualsAssociation individualsAssociation = IndividualsAssociation.NewInstance();
965 individualsAssociation.setAssociatedSpecimenOrObservation(specimen);
966 result.add(individualsAssociation);
967
968 }else if (next.isCharacters()) {
969 String text = next.asCharacters().getData().trim();
970 if (isPunctuation(text)){
971 //do nothing
972 }else{
973 //TODO
974 String message = "Unrecognized text: %s";
975 fireWarningEvent(String.format(message, text), next, 6);
976 }
977 } else {
978 handleUnexpectedElement(next);
979 }
980 }
981 throw new IllegalStateException("<Gathering group> has no closing tag");
982
983 }
984
985 private void addCurrentAreas(MarkupImportState state, XMLEvent event, DerivedUnitFacade facade) {
986 for (NamedArea area : state.getCurrentAreas()){
987 if (area == null){
988 continue;
989 }else if (area.isInstanceOf(Country.class)){
990 facade.setCountry(area);
991 }else{
992 String message = "Current area %s is not country. This is not expected for currently known data.";
993 fireWarningEvent(String.format(message, area.getTitleCache()), event, 2);
994 facade.addCollectingArea(area);
995 }
996 }
997
998 }
999
1000
1001 // private void handleInlineMarkup(MarkupImportState state, String key, Map<String, Object> inlineMarkup) {
1002 // Object obj = inlineMarkup.get(key);
1003 // if (key.equals(LOCALITY)){
1004 // if (obj instanceof NamedArea){
1005 // NamedArea area = (NamedArea)obj;
1006 // state.addCurrentArea(area);
1007 // }
1008 // }
1009 //
1010 // }
1011
1012
1013 /**
1014 * Changes the feature if the (sub)-heading implies this. Also recognizes hidden country information
1015 * @param state
1016 * @param parent
1017 * @param text
1018 * @param feature
1019 * @return
1020 */
1021 private Feature makeHeadingFeature(MarkupImportState state, XMLEvent parent, String originalText, Feature feature) {
1022 //expand, provide by config or service
1023 String materialRegEx = "Mat[\u00E9\u00C9]riel";
1024 String examinedRegEx = "[\u00E9\u00C9]tudi[\u00E9\u00C9]";
1025 String countryRegEx = "(gabonais)";
1026 String postfixCountryRegEx = "\\s+(pour le Gabon)";
1027
1028 String materialExaminedRegEx = "(?i)" + materialRegEx + "\\s+(" + countryRegEx +"\\s+)?" + examinedRegEx + "(" +postfixCountryRegEx + ")?:?";
1029
1030 String text = originalText;
1031
1032 if (isBlank(text)){
1033 return feature;
1034 }else{
1035 if (text.matches(materialExaminedRegEx)){
1036 //gabon specific
1037 if (text.contains("gabonais ")){
1038 text = text.replace("gabonais ", "");
1039 state.addCurrentArea(Country.GABONGABONESEREPUBLIC());
1040 }
1041 if (text.contains(" pour le Gabon")){
1042 text = text.replace(" pour le Gabon", "");
1043 state.addCurrentArea(Country.GABONGABONESEREPUBLIC());
1044 }
1045
1046 //update feature
1047 feature = Feature.MATERIALS_EXAMINED();
1048 state.putFeatureToGeneralSorterList(feature);
1049 return feature;
1050 }else{
1051 String message = "Heading/Subheading not recognized: %s";
1052 fireWarningEvent(String.format(message, originalText), parent, 4);
1053 return feature;
1054 }
1055 }
1056 }
1057
1058
1059 /**
1060 * True if heading or subheading represents feature information
1061 * @param state
1062 * @param parent
1063 * @param text
1064 * @return
1065 */
1066 private boolean isFeatureHeading(MarkupImportState state, XMLEvent parent, String text) {
1067 return makeHeadingFeature(state, parent, text, null) != null;
1068 }
1069
1070
1071 public String handleInLineGathering(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
1072 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.FieldUnit);
1073 handleGathering(state, reader, parentEvent, facade);
1074 SpecimenOrObservationBase<?> specimen = facade.innerFieldUnit();
1075 if (specimen == null){
1076 specimen = facade.innerDerivedUnit();
1077 String message = "Inline gaterhing has no field unit";
1078 fireWarningEvent(message, parentEvent, 2);
1079 }
1080
1081 String result = "<cdm:specimen uuid='%s'>%s</specimen>";
1082 if (specimen != null){
1083 result = String.format(result, specimen.getUuid(), specimen.getTitleCache());
1084 }else{
1085 String message = "Inline gathering has no specimen";
1086 fireWarningEvent(message, parentEvent, 4);
1087 }
1088 save(specimen, state);
1089 return result;
1090 }
1091
1092
1093
1094
1095
1096 }