created markup nomenclaturalImport and specimen import
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / MarkupSpecimenImport.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.markup;
11
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16
17 import javax.xml.stream.XMLEventReader;
18 import javax.xml.stream.XMLStreamException;
19 import javax.xml.stream.events.Attribute;
20 import javax.xml.stream.events.XMLEvent;
21
22 import org.apache.commons.lang.StringUtils;
23 import org.apache.log4j.Logger;
24
25 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
26 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade.DerivedUnitType;
27 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacadeCacheStrategy;
28 import eu.etaxonomy.cdm.common.CdmUtils;
29 import eu.etaxonomy.cdm.model.agent.AgentBase;
30 import eu.etaxonomy.cdm.model.common.CdmBase;
31 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
32 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
33 import eu.etaxonomy.cdm.model.location.NamedArea;
34 import eu.etaxonomy.cdm.model.location.NamedAreaLevel;
35 import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
36 import eu.etaxonomy.cdm.model.name.NonViralName;
37 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
38 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
39 import eu.etaxonomy.cdm.model.occurrence.Collection;
40 import eu.etaxonomy.cdm.model.occurrence.DerivedUnitBase;
41 import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
42 import eu.etaxonomy.cdm.model.occurrence.Specimen;
43 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
44 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
45 import eu.etaxonomy.cdm.strategy.parser.SpecimenTypeParser;
46 import eu.etaxonomy.cdm.strategy.parser.SpecimenTypeParser.TypeInfo;
47
48 /**
49 * @author a.mueller
50 * @created 30.05.2012
51 *
52 */
53 public class MarkupSpecimenImport extends MarkupImportBase {
54 @SuppressWarnings("unused")
55 private static final Logger logger = Logger.getLogger(MarkupSpecimenImport.class);
56
57 private static final String ALTERNATIVE_COLLECTION_TYPE_STATUS = "alternativeCollectionTypeStatus";
58 private static final String ALTERNATIVE_COLLECTOR = "alternativeCollector";
59 private static final String ALTERNATIVE_FIELD_NUM = "alternativeFieldNum";
60 private static final String COLLECTOR = "collector";
61 private static final String COLLECTION = "collection";
62 private static final String COLLECTION_AND_TYPE = "collectionAndType";
63 private static final String COLLECTION_TYPE_STATUS = "collectionTypeStatus";
64 private static final String DESTROYED = "destroyed";
65 private static final String FIELD_NUM = "fieldNum";
66 private static final String FULL_TYPE = "fullType";
67 private static final String LOCALITY = "locality";
68 private static final String LOST = "lost";
69 private static final String SUB_COLLECTION = "subCollection";
70 private static final String NOT_FOUND = "notFound";
71 private static final String NOT_SEEN = "notSeen";
72 private static final String ORIGINAL_DETERMINATION = "originalDetermination";
73
74 private static final String UNKNOWN = "unknown";
75
76
77 public MarkupSpecimenImport(MarkupDocumentImport docImport) {
78 super(docImport);
79 }
80
81
82 public void handleSpecimenType(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent,
83 HomotypicalGroup homotypicalGroup) throws XMLStreamException {
84
85 // attributes
86 Map<String, Attribute> attributes = getAttributes(parentEvent);
87 String typeStatus = getAndRemoveAttributeValue(attributes, TYPE_STATUS);
88 String notSeen = getAndRemoveAttributeValue(attributes, NOT_SEEN);
89 String unknown = getAndRemoveAttributeValue(attributes, UNKNOWN);
90 String notFound = getAndRemoveAttributeValue(attributes, NOT_FOUND);
91 String destroyed = getAndRemoveAttributeValue(attributes, DESTROYED);
92 String lost = getAndRemoveAttributeValue(attributes, LOST);
93 checkNoAttributes(attributes, parentEvent);
94 if (StringUtils.isNotEmpty(typeStatus)) {
95 // TODO
96 // currently not needed
97 } else if (StringUtils.isNotEmpty(notSeen)) {
98 handleNotYetImplementedAttribute(attributes, NOT_SEEN);
99 } else if (StringUtils.isNotEmpty(unknown)) {
100 handleNotYetImplementedAttribute(attributes, UNKNOWN);
101 } else if (StringUtils.isNotEmpty(notFound)) {
102 handleNotYetImplementedAttribute(attributes, NOT_FOUND);
103 } else if (StringUtils.isNotEmpty(destroyed)) {
104 handleNotYetImplementedAttribute(attributes, DESTROYED);
105 } else if (StringUtils.isNotEmpty(lost)) {
106 handleNotYetImplementedAttribute(attributes, LOST);
107 }
108
109 NonViralName<?> firstName = null;
110 Set<TaxonNameBase> names = homotypicalGroup.getTypifiedNames();
111 if (names.isEmpty()) {
112 String message = "There is no name in a homotypical group. Can't create the specimen type";
113 fireWarningEvent(message, parentEvent, 8);
114 } else {
115 firstName = CdmBase.deproxy(names.iterator().next(),NonViralName.class);
116 }
117
118 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(DerivedUnitType.Specimen);
119 String text = "";
120 // elements
121 while (reader.hasNext()) {
122 XMLEvent next = readNoWhitespace(reader);
123 if (next.isEndElement()) {
124 if (isMyEndingElement(next, parentEvent)) {
125 makeSpecimenType(state, facade, text, firstName, parentEvent);
126 return;
127 } else {
128 if (isEndingElement(next, FULL_TYPE)) {
129 // NOT YET IMPLEMENTED
130 popUnimplemented(next.asEndElement());
131 } else if (isEndingElement(next, TYPE_STATUS)) {
132 // NOT YET IMPLEMENTED
133 popUnimplemented(next.asEndElement());
134 } else if (isEndingElement(next, ORIGINAL_DETERMINATION)) {
135 // NOT YET IMPLEMENTED
136 popUnimplemented(next.asEndElement());
137 } else if (isEndingElement(next, SPECIMEN_TYPE)) {
138 // NOT YET IMPLEMENTED
139 popUnimplemented(next.asEndElement());
140 } else if (isEndingElement(next, COLLECTION_AND_TYPE)) {
141 // NOT YET IMPLEMENTED
142 popUnimplemented(next.asEndElement());
143 } else if (isEndingElement(next, CITATION)) {
144 // NOT YET IMPLEMENTED
145 popUnimplemented(next.asEndElement());
146 } else if (isEndingElement(next, NOTES)) {
147 // NOT YET IMPLEMENTED
148 popUnimplemented(next.asEndElement());
149 } else if (isEndingElement(next, ANNOTATION)) {
150 // NOT YET IMPLEMENTED
151 popUnimplemented(next.asEndElement());
152 } else {
153 handleUnexpectedEndElement(next.asEndElement());
154 }
155 }
156 } else if (next.isStartElement()) {
157 if (isStartingElement(next, FULL_TYPE)) {
158 handleNotYetImplementedElement(next);
159 // homotypicalGroup = handleNom(state, reader, next, taxon,
160 // homotypicalGroup);
161 } else if (isStartingElement(next, TYPE_STATUS)) {
162 handleNotYetImplementedElement(next);
163 } else if (isStartingElement(next, GATHERING)) {
164 handleGathering(state, reader, next, facade);
165 } else if (isStartingElement(next, ORIGINAL_DETERMINATION)) {
166 handleNotYetImplementedElement(next);
167 } else if (isStartingElement(next, SPECIMEN_TYPE)) {
168 handleNotYetImplementedElement(next);
169 } else if (isStartingElement(next, COLLECTION_AND_TYPE)) {
170 handleNotYetImplementedElement(next);
171 } else if (isStartingElement(next, CITATION)) {
172 handleNotYetImplementedElement(next);
173 } else if (isStartingElement(next, NOTES)) {
174 handleNotYetImplementedElement(next);
175 } else if (isStartingElement(next, ANNOTATION)) {
176 handleNotYetImplementedElement(next);
177 } else {
178 handleUnexpectedStartElement(next);
179 }
180 } else if (next.isCharacters()) {
181 text += next.asCharacters().getData();
182 } else {
183 handleUnexpectedElement(next);
184 }
185 }
186 // TODO handle missing end element
187 throw new IllegalStateException("Specimen type has no closing tag");
188 }
189
190
191
192 private void makeSpecimenType(MarkupImportState state, DerivedUnitFacade facade, String text,
193 NonViralName name, XMLEvent parentEvent) {
194 text = text.trim();
195 // remove brackets
196 if (text.matches("^\\(.*\\)\\.?$")) {
197 text = text.replaceAll("\\.", "");
198 text = text.substring(1, text.length() - 1);
199 }
200 String[] split = text.split("[;,]");
201 for (String str : split) {
202 str = str.trim();
203 boolean addToAllNamesInGroup = true;
204 TypeInfo typeInfo = makeSpecimenTypeTypeInfo(str, parentEvent);
205 SpecimenTypeDesignationStatus typeStatus = typeInfo.status;
206 Collection collection = createCollection(typeInfo.collectionString);
207
208 // TODO improve cache strategy handling
209 DerivedUnitBase typeSpecimen = facade.addDuplicate(collection,
210 null, null, null, null);
211 typeSpecimen.setCacheStrategy(new DerivedUnitFacadeCacheStrategy());
212 name.addSpecimenTypeDesignation((Specimen) typeSpecimen, typeStatus, null, null, null, false, addToAllNamesInGroup);
213 }
214 }
215
216
217 private Collection createCollection(String code) {
218 // TODO deduplicate
219 // TODO code <-> name
220 Collection result = Collection.NewInstance();
221 result.setCode(code);
222 return result;
223 }
224
225
226 private TypeInfo makeSpecimenTypeTypeInfo(String originalString, XMLEvent event) {
227 TypeInfo result = new TypeInfo();
228 String[] split = originalString.split("\\s+");
229 for (String str : split) {
230 if (str.matches(SpecimenTypeParser.typeTypePattern)) {
231 SpecimenTypeDesignationStatus status;
232 try {
233 status = SpecimenTypeParser.parseSpecimenTypeStatus(str);
234 } catch (UnknownCdmTypeException e) {
235 String message = "Specimen type status '%s' not recognized by parser";
236 message = String.format(message, str);
237 fireWarningEvent(message, event, 4);
238 status = null;
239 }
240 result.status = status;
241 } else if (str.matches(SpecimenTypeParser.collectionPattern)) {
242 result.collectionString = str;
243 } else {
244 String message = "Type part '%s' could not be recognized";
245 message = String.format(message, str);
246 fireWarningEvent(message, event, 2);
247 }
248 }
249
250 return result;
251 }
252
253
254 private void handleGathering(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent , DerivedUnitFacade facade) throws XMLStreamException {
255 checkNoAttributes(parentEvent);
256 boolean hasCollector = false;
257 boolean hasFieldNum = false;
258
259 // elements
260 while (reader.hasNext()) {
261 XMLEvent next = readNoWhitespace(reader);
262 if (next.isEndElement()) {
263 if (isMyEndingElement(next, parentEvent)) {
264 checkMandatoryElement(hasCollector,parentEvent.asStartElement(), COLLECTOR);
265 checkMandatoryElement(hasFieldNum,parentEvent.asStartElement(), FIELD_NUM);
266 return;
267 } else {
268 if (isEndingElement(next, ALTERNATIVE_COLLECTOR)) {
269 // NOT YET IMPLEMENTED
270 popUnimplemented(next.asEndElement());
271 } else if (isEndingElement(next, ALTERNATIVE_FIELD_NUM)) {
272 // NOT YET IMPLEMENTED
273 popUnimplemented(next.asEndElement());
274 } else if (isEndingElement(next, COLLECTION_TYPE_STATUS)) {
275 // NOT YET IMPLEMENTED
276 popUnimplemented(next.asEndElement());
277 } else if (isEndingElement(next, COLLECTION_AND_TYPE)) {
278 // NOT YET IMPLEMENTED , does this make sense here?
279 popUnimplemented(next.asEndElement());
280 } else if (isEndingElement(next,
281 ALTERNATIVE_COLLECTION_TYPE_STATUS)) {
282 // NOT YET IMPLEMENTED
283 popUnimplemented(next.asEndElement());
284 } else if (isEndingElement(next, SUB_COLLECTION)) {
285 // NOT YET IMPLEMENTED
286 popUnimplemented(next.asEndElement());
287 } else if (isEndingElement(next, COLLECTION)) {
288 // NOT YET IMPLEMENTED
289 popUnimplemented(next.asEndElement());
290 } else if (isEndingElement(next, DATES)) {
291 // NOT YET IMPLEMENTED
292 popUnimplemented(next.asEndElement());
293 } else if (isEndingElement(next, NOTES)) {
294 // NOT YET IMPLEMENTED
295 popUnimplemented(next.asEndElement());
296 } else {
297 handleUnexpectedEndElement(next.asEndElement());
298 }
299 }
300 } else if (next.isStartElement()) {
301 if (isStartingElement(next, COLLECTOR)) {
302 hasCollector = true;
303 String collectorStr = getCData(state, reader, next);
304 AgentBase<?> collector = createCollector(collectorStr);
305 facade.setCollector(collector);
306 } else if (isStartingElement(next, ALTERNATIVE_COLLECTOR)) {
307 handleNotYetImplementedElement(next);
308 } else if (isStartingElement(next, FIELD_NUM)) {
309 hasFieldNum = true;
310 String fieldNumStr = getCData(state, reader, next);
311 facade.setFieldNumber(fieldNumStr);
312 } else if (isStartingElement(next, ALTERNATIVE_FIELD_NUM)) {
313 handleNotYetImplementedElement(next);
314 } else if (isStartingElement(next, COLLECTION_TYPE_STATUS)) {
315 handleNotYetImplementedElement(next);
316 } else if (isStartingElement(next, COLLECTION_AND_TYPE)) { //does this make sense here?
317 handleNotYetImplementedElement(next);
318 } else if (isStartingElement(next, ALTERNATIVE_COLLECTION_TYPE_STATUS)) {
319 handleNotYetImplementedElement(next);
320 } else if (isStartingElement(next, SUB_COLLECTION)) {
321 handleNotYetImplementedElement(next);
322 } else if (isStartingElement(next, COLLECTION)) {
323 handleNotYetImplementedElement(next);
324 } else if (isStartingElement(next, LOCALITY)) {
325 handleLocality(state, reader, next, facade);
326 } else if (isStartingElement(next, DATES)) {
327 handleNotYetImplementedElement(next);
328 } else if (isStartingElement(next, NOTES)) {
329 handleNotYetImplementedElement(next);
330 } else {
331 handleUnexpectedStartElement(next);
332 }
333 } else {
334 handleUnexpectedElement(next);
335 }
336 }
337 // TODO handle missing end element
338 throw new IllegalStateException("Collection has no closing tag");
339
340 }
341
342
343 private void handleLocality(MarkupImportState state, XMLEventReader reader,XMLEvent parentEvent, DerivedUnitFacade facade)throws XMLStreamException {
344 String classValue = getClassOnlyAttribute(parentEvent);
345 boolean isLocality = false;
346 NamedAreaLevel areaLevel = null;
347 if ("locality".equalsIgnoreCase(classValue)) {
348 isLocality = true;
349 } else {
350 areaLevel = makeNamedAreaLevel(state, classValue, parentEvent);
351 }
352
353 String text = "";
354 // elements
355 while (reader.hasNext()) {
356 XMLEvent next = readNoWhitespace(reader);
357 if (next.isEndElement()) {
358 if (isMyEndingElement(next, parentEvent)) {
359 if (StringUtils.isNotBlank(text)) {
360 text = normalize(text);
361 if (isLocality) {
362 facade.setLocality(text);
363 } else {
364 text = CdmUtils.removeTrailingDot(text);
365 NamedArea area = makeArea(state, text, areaLevel);
366 facade.addCollectingArea(area);
367 }
368 }
369 // TODO
370 return;
371 } else {
372 if (isEndingElement(next, ALTITUDE)) {
373 // NOT YET IMPLEMENTED
374 popUnimplemented(next.asEndElement());
375 } else if (isEndingElement(next, COORDINATES)) {
376 // NOT YET IMPLEMENTED
377 popUnimplemented(next.asEndElement());
378 } else if (isEndingElement(next, ANNOTATION)) {
379 // NOT YET IMPLEMENTED
380 popUnimplemented(next.asEndElement());
381 } else {
382 handleUnexpectedEndElement(next.asEndElement());
383 }
384 }
385 } else if (next.isStartElement()) {
386 if (isStartingElement(next, ALTITUDE)) {
387 handleNotYetImplementedElement(next);
388 // homotypicalGroup = handleNom(state, reader, next, taxon,
389 // homotypicalGroup);
390 } else if (isStartingElement(next, COORDINATES)) {
391 handleNotYetImplementedElement(next);
392 } else if (isStartingElement(next, ANNOTATION)) {
393 handleNotYetImplementedElement(next);
394 } else {
395 handleUnexpectedStartElement(next);
396 }
397 } else if (next.isCharacters()) {
398 text += next.asCharacters().getData();
399 } else {
400 handleUnexpectedElement(next);
401 }
402 }
403 throw new IllegalStateException("<SpecimenType> has no closing tag");
404 }
405
406
407
408 private AgentBase<?> createCollector(String collectorStr) {
409 return createAuthor(collectorStr);
410 }
411
412
413 public List<DescriptionElementBase> handleMaterialsExamined(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
414 List<DescriptionElementBase> result = new ArrayList<DescriptionElementBase>();
415 while (reader.hasNext()) {
416 XMLEvent next = readNoWhitespace(reader);
417 if (isMyEndingElement(next, parentEvent)) {
418 if (result.isEmpty()){
419 fireWarningEvent("Materials examined created empty Individual Associations list", parentEvent, 4);
420 }
421 return result;
422 } else if (isStartingElement(next, SUB_HEADING)) {
423 handleNotYetImplementedElement(next);
424 } else if (isStartingElement(next, BR)) {
425 handleNotYetImplementedElement(next);
426 } else if (isStartingElement(next, GATHERING)) {
427 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(DerivedUnitType.DerivedUnit.DerivedUnit);
428 handleGathering(state, reader, next, facade);
429 SpecimenOrObservationBase<?> specimen;
430 if (facade.innerDerivedUnit() != null){
431 specimen = facade.innerDerivedUnit();
432 }else{
433 specimen = facade.innerFieldObservation();
434 }
435 IndividualsAssociation individualsAssociation = IndividualsAssociation.NewInstance();
436 individualsAssociation.setAssociatedSpecimenOrObservation(specimen);
437 result.add(individualsAssociation);
438 } else {
439 handleUnexpectedElement(next);
440 }
441 }
442 throw new IllegalStateException("<String> has no closing tag");
443
444 }
445
446
447
448 public String handleInLineGathering(MarkupImportState state, XMLEventReader reader, XMLEvent parentEvent) throws XMLStreamException {
449 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(DerivedUnitType.DerivedUnit.FieldObservation);
450 handleGathering(state, reader, parentEvent, facade);
451 FieldObservation fieldObservation = facade.innerFieldObservation();
452 String result = "<cdm:specimen uuid='%s'>%s</specimen>";
453 result = String.format(result, fieldObservation.getUuid(), fieldObservation.getTitleCache());
454 save(fieldObservation, state);
455 return result;
456 }
457
458
459
460
461
462 }