Latest AlgaTerra Import developments
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / algaterra / AlgaTerraEcoFactImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.algaterra;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.UUID;
19
20 import org.apache.commons.lang.StringUtils;
21 import org.apache.log4j.Logger;
22 import org.springframework.stereotype.Component;
23
24 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
25 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade.DerivedUnitType;
26 import eu.etaxonomy.cdm.io.algaterra.validation.AlgaTerraSpecimenImportValidator;
27 import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator;
28 import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState;
29 import eu.etaxonomy.cdm.io.common.IOValidator;
30 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
31 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
32 import eu.etaxonomy.cdm.model.common.CdmBase;
33 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
34 import eu.etaxonomy.cdm.model.common.Language;
35 import eu.etaxonomy.cdm.model.common.Marker;
36 import eu.etaxonomy.cdm.model.common.MarkerType;
37 import eu.etaxonomy.cdm.model.common.TermVocabulary;
38 import eu.etaxonomy.cdm.model.description.CategoricalData;
39 import eu.etaxonomy.cdm.model.description.DescriptionBase;
40 import eu.etaxonomy.cdm.model.description.Feature;
41 import eu.etaxonomy.cdm.model.description.MeasurementUnit;
42 import eu.etaxonomy.cdm.model.description.Modifier;
43 import eu.etaxonomy.cdm.model.description.QuantitativeData;
44 import eu.etaxonomy.cdm.model.description.State;
45 import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
46 import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
47 import eu.etaxonomy.cdm.model.description.TextData;
48 import eu.etaxonomy.cdm.model.occurrence.Collection;
49 import eu.etaxonomy.cdm.model.occurrence.DerivedUnitBase;
50 import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
51 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
52 import eu.etaxonomy.cdm.model.reference.Reference;
53
54
55 /**
56 * @author a.mueller
57 * @created 01.09.2012
58 */
59 @Component
60 public class AlgaTerraEcoFactImport extends AlgaTerraSpecimenImportBase {
61 private static final Logger logger = Logger.getLogger(AlgaTerraEcoFactImport.class);
62
63
64 private static int modCount = 5000;
65 private static final String pluralString = "eco facts";
66 private static final String dbTableName = "EcoFact"; //??
67
68
69 public AlgaTerraEcoFactImport(){
70 super(dbTableName, pluralString);
71 }
72
73
74
75 /* (non-Javadoc)
76 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery()
77 */
78 @Override
79 protected String getIdQuery(BerlinModelImportState state) {
80 String result = " SELECT EcoFactId " +
81 " FROM EcoFact " +
82 " ORDER BY EcoFact.DuplicateFk, EcoFact.EcoFactId ";
83 return result;
84 }
85
86 /* (non-Javadoc)
87 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
88 */
89 @Override
90 protected String getRecordQuery(BerlinModelImportConfigurator config) {
91 String strQuery =
92 " SELECT EcoFact.*, EcoFact.EcoFactId as unitId, " +
93 " tg.ID AS GazetteerId, tg.L2Code, tg.L3Code, tg.L4Code, tg.Country, tg.ISOCountry, " +
94 " ec.UUID as climateUuid, eh.UUID as habitatUuid, elf.UUID as lifeFormUuid " +
95 " FROM EcoFact " +
96 " LEFT OUTER JOIN TDWGGazetteer tg ON EcoFact.TDWGGazetteerFk = tg.ID " +
97 " LEFT OUTER JOIN EcoClimate ec ON EcoFact.ClimateFk = ec.ClimateId " +
98 " LEFT OUTER JOIN EcoHabitat eh ON EcoFact.HabitatFk = eh.HabitatId " +
99 " LEFT OUTER JOIN EcoLifeForm elf ON EcoFact.LifeFormFk = elf.LifeFormId " +
100 " WHERE (EcoFact.EcoFactId IN (" + ID_LIST_TOKEN + ") )"
101 + " ORDER BY EcoFact.DuplicateFk, EcoFact.EcoFactId "
102 ;
103 return strQuery;
104 }
105
106 /* (non-Javadoc)
107 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
108 */
109 public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState bmState) {
110 boolean success = true;
111
112 AlgaTerraImportState state = (AlgaTerraImportState)bmState;
113 try {
114 makeVocabulariesAndFeatures(state);
115 } catch (SQLException e1) {
116 logger.warn("Exception occurred when trying to create Ecofact vocabularies: " + e1.getMessage());
117 e1.printStackTrace();
118 }
119 Set<SpecimenOrObservationBase> objectsToSave = new HashSet<SpecimenOrObservationBase>();
120
121 //TODO do we still need this map? EcoFacts are not handled separate from Facts.
122 //However, they have duplicates on derived unit level. Also check duplicateFk.
123 Map<String, FieldObservation> ecoFactFieldObservationMap = (Map<String, FieldObservation>) partitioner.getObjectMap(ECO_FACT_FIELD_OBSERVATION_NAMESPACE);
124
125 ResultSet rs = partitioner.getResultSet();
126
127 try {
128
129 int i = 0;
130
131 //for each reference
132 while (rs.next()){
133
134 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
135
136 int ecoFactId = rs.getInt("EcoFactId");
137 Integer duplicateFk = nullSafeInt(rs, "DuplicateFk");
138
139 //FIXME RecordBasis is in Fact table, which is not part of the query anymore.
140 //Some EcoFacts have multiple RecordBasis types in Fact. Henning will check this.
141 // String recordBasis = rs.getString("RecordBasis");
142 String recordBasis = "PreservedSpecimen";
143
144 try {
145
146 //source ref
147 Reference<?> sourceRef = state.getTransactionalSourceReference();
148
149 //facade
150 DerivedUnitType type = makeDerivedUnitType(recordBasis);
151
152 DerivedUnitFacade facade;
153 //field observation
154 if (duplicateFk == null){
155 facade = DerivedUnitFacade.NewInstance(type);
156 handleFieldObservationSpecimen(rs, facade, state, partitioner);
157 handleEcoFactSpecificFieldObservation(rs,facade, state);
158 FieldObservation fieldObservation = facade.getFieldObservation(true);
159 ecoFactFieldObservationMap.put(String.valueOf(ecoFactId), fieldObservation);
160 }else{
161 FieldObservation fieldObservation = ecoFactFieldObservationMap.get(String.valueOf(duplicateFk));
162 facade = DerivedUnitFacade.NewInstance(type, fieldObservation);
163 }
164
165 handleFirstDerivedSpecimen(rs, facade, state, partitioner);
166 handleEcoFactSpecificDerivedUnit(rs,facade, state);
167
168
169 DerivedUnitBase<?> objectToSave = facade.innerDerivedUnit();
170 objectsToSave.add(objectToSave);
171
172
173 } catch (Exception e) {
174 logger.warn("Exception in ecoFact: ecoFactId " + ecoFactId + ". " + e.getMessage());
175 e.printStackTrace();
176 }
177
178 }
179
180 // logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
181
182 logger.warn("Taxa to save: " + objectsToSave.size());
183 getOccurrenceService().save(objectsToSave);
184
185 return success;
186 } catch (SQLException e) {
187 logger.error("SQLException:" + e);
188 return false;
189 }
190 }
191
192 protected String getDerivedUnitNameSpace(){
193 return ECO_FACT_DERIVED_UNIT_NAMESPACE;
194 }
195
196 protected String getFieldObservationNameSpace(){
197 return ECO_FACT_FIELD_OBSERVATION_NAMESPACE;
198 }
199
200
201
202 private void handleEcoFactSpecificFieldObservation(ResultSet rs, DerivedUnitFacade facade, AlgaTerraImportState state) throws SQLException {
203
204 Object alkalinityFlag = rs.getBoolean("AlkalinityFlag");
205
206 //alkalinity marker
207 if (alkalinityFlag != null){
208 MarkerType alkalinityMarkerType = getMarkerType(state, uuidMarkerAlkalinity, "Alkalinity", "Alkalinity", null);
209 boolean alkFlag = Boolean.valueOf(alkalinityFlag.toString());
210 Marker alkalinityMarker = Marker.NewInstance(alkalinityMarkerType, alkFlag);
211 facade.getFieldObservation(true).addMarker(alkalinityMarker);
212 }
213
214
215 DescriptionBase<?> fieldDescription = getFieldObservationDescription(facade);
216
217 //habitat, ecology, community, etc.
218 String habitat = rs.getString("HabitatExplanation");
219
220 if (isNotBlank(habitat)){
221 Feature habitatExplanation = getFeature(state, uuidFeatureHabitatExplanation, "Habitat Explanation", "HabitatExplanation", null, null);
222 TextData textData = TextData.NewInstance(habitatExplanation);
223 textData.putText(Language.DEFAULT(), habitat);
224 fieldDescription.addElement(textData);
225 }
226
227 String community = rs.getString("Comunity");
228 if (isNotBlank(community)){
229 Feature communityFeature = getFeature(state, uuidFeatureSpecimenCommunity, "Community", "The community of a specimen (e.g. other algae in the same sample)", null, null);
230 TextData textData = TextData.NewInstance(communityFeature);
231 textData.putText(Language.DEFAULT(), community);
232 fieldDescription.addElement(textData);
233 }
234
235 String additionalData = rs.getString("AdditionalData");
236 if (isNotBlank(additionalData)){ //or handle it as Annotation ??
237 Feature additionalDataFeature = getFeature(state, uuidFeatureAdditionalData, "Additional Data", "Additional Data", null, null);
238 TextData textData = TextData.NewInstance(additionalDataFeature);
239 textData.putText(Language.DEFAULT(), additionalData);
240 fieldDescription.addElement(textData);
241 }
242
243 String climateUuid = rs.getString("climateUuid");
244 String habitatUuid = rs.getString("habitatUuid");
245 String lifeFormUuid = rs.getString("lifeFormUuid");
246
247 addCategoricalValue(state, fieldDescription, climateUuid, uuidFeatureAlgaTerraClimate);
248 addCategoricalValue(state, fieldDescription, habitatUuid, Feature.HABITAT().getUuid());
249 addCategoricalValue(state, fieldDescription, lifeFormUuid, uuidFeatureAlgaTerraLifeForm);
250
251
252
253 //parameters
254 makeParameter(state, rs, getFieldObservationDescription(facade));
255
256 }
257
258 private void handleEcoFactSpecificDerivedUnit(ResultSet rs, DerivedUnitFacade facade, AlgaTerraImportState state) throws SQLException {
259 //collection
260 String voucher = rs.getString("Voucher");
261 if (StringUtils.isNotBlank(voucher)){
262 facade.setAccessionNumber(voucher);
263 }
264 }
265
266
267
268
269
270 private void addCategoricalValue(AlgaTerraImportState importState, DescriptionBase description, String uuidTerm, UUID featureUuid) {
271 if (uuidTerm != null){
272 State state = this.getStateTerm(importState, UUID.fromString(uuidTerm));
273 Feature feature = getFeature(importState, featureUuid);
274 CategoricalData categoricalData = CategoricalData.NewInstance(state, feature);
275 description.addElement(categoricalData);
276 }
277 }
278
279 private void makeParameter(AlgaTerraImportState state, ResultSet rs, DescriptionBase<?> descriptionBase) throws SQLException {
280 for (int i = 1; i <= 10; i++){
281 String valueStr = rs.getString(String.format("P%dValue", i));
282 String unitStr = rs.getString(String.format("P%dUnit", i));
283 String parameter = rs.getString(String.format("P%dParameter", i));
284 String method = rs.getString(String.format("P%dMethod", i));
285
286 //method
287 if (StringUtils.isNotBlank(method)){
288 logger.warn("Methods not yet handled: " + method);
289 }
290 //parameter
291 TermVocabulary<Feature> vocParameter = getVocabulary(uuidVocParameter, "Feature vocabulary for AlgaTerra measurement parameters", "Parameters", null, null, false, Feature.COMMON_NAME());
292 if (StringUtils.isNotBlank(parameter)){
293 UUID featureUuid = getParameterFeatureUuid(state, parameter);
294 Feature feature = getFeature(state, featureUuid, parameter, parameter, null, vocParameter);
295 QuantitativeData quantData = QuantitativeData.NewInstance(feature);
296
297 //unit
298 MeasurementUnit unit = getMeasurementUnit(state, unitStr);
299 quantData.setUnit(unit);
300 try {
301
302 Set<Modifier> valueModifier = new HashSet<Modifier>();
303 valueStr = normalizeAndModifyValue(state, valueStr, valueModifier);
304 //value
305 Float valueFlt = Float.valueOf(valueStr); //TODO maybe change model to Double ??
306
307 StatisticalMeasure measureSingleValue = getStatisticalMeasure(state, uuidStatMeasureSingleValue, "Value", "Single measurement value", null, null);
308 StatisticalMeasurementValue value = StatisticalMeasurementValue.NewInstance(measureSingleValue, valueFlt);
309 quantData.addStatisticalValue(value);
310 descriptionBase.addElement(quantData);
311
312 } catch (NumberFormatException e) {
313 logger.warn(String.format("Value '%s' can't be converted to double. Parameter %s not imported.", valueStr, parameter));
314 }
315 }else if (isNotBlank(valueStr) || isNotBlank(unitStr) ){
316 logger.warn("There is value or unit without parameter: " + i);
317 }
318
319
320 }
321
322 }
323
324 private String normalizeAndModifyValue(AlgaTerraImportState state, String valueStr, Set<Modifier> valueModifier) {
325 valueStr = valueStr.replace(",", ".");
326 if (valueStr.startsWith("<")){
327 TermVocabulary<Modifier> measurementValueModifierVocabulary = getVocabulary(uuidMeasurementValueModifier, "Measurement value modifier", "Measurement value modifier", null, null, false, Modifier.NewInstance());
328 Modifier modifier = getModifier(state, uuidModifierLowerThan, "Lower", "Lower than the given measurement value", "<", measurementValueModifierVocabulary);
329 valueModifier.add(modifier);
330 valueStr = valueStr.replace("<", "");
331 }
332 if (valueStr.startsWith(">")){
333 TermVocabulary<Modifier> measurementValueModifierVocabulary = getVocabulary(uuidMeasurementValueModifier, "Measurement value modifier", "Measurement value modifier", null, null, false, Modifier.NewInstance());
334 Modifier modifier = getModifier(state, uuidModifierGreaterThan, "Lower", "Lower than the given measurement value", "<", measurementValueModifierVocabulary);
335 valueModifier.add(modifier);
336 valueStr = valueStr.replace(">", "");
337 }
338 return valueStr;
339 }
340
341
342
343 private UUID getParameterFeatureUuid(AlgaTerraImportState state, String key) {
344 try {
345 return AlgaTerraImportTransformer.getFeatureUuid(key);
346 } catch (UndefinedTransformerMethodException e) {
347 throw new RuntimeException(e);
348 }
349 }
350
351
352
353 /**
354 * TODO move to InputTransformerBase
355 * @param state
356 * @param unitStr
357 * @return
358 */
359 private MeasurementUnit getMeasurementUnit(AlgaTerraImportState state, String unitStr) {
360 if (StringUtils.isNotBlank(unitStr)){
361 UUID uuid = AlgaTerraImportTransformer.getMeasurementUnitUuid(unitStr);
362 if (uuid != null){
363 return getMeasurementUnit(state, uuid, unitStr, unitStr, unitStr, null);
364 }else{
365 logger.warn("MeasurementUnit was not recognized");
366 return null;
367 }
368 }else{
369 return null;
370 }
371 }
372
373 private Feature makeFeature(DerivedUnitType type) {
374 if (type.equals(DerivedUnitType.DerivedUnit)){
375 return Feature.INDIVIDUALS_ASSOCIATION();
376 }else if (type.equals(DerivedUnitType.FieldObservation) || type.equals(DerivedUnitType.Observation) ){
377 return Feature.OBSERVATION();
378 }else if (type.equals(DerivedUnitType.Fossil) || type.equals(DerivedUnitType.LivingBeing) || type.equals(DerivedUnitType.Specimen )){
379 return Feature.SPECIMEN();
380 }
381 logger.warn("No feature defined for derived unit type: " + type);
382 return null;
383 }
384
385
386 private DerivedUnitType makeDerivedUnitType(String recordBasis) {
387 DerivedUnitType result = null;
388 if (StringUtils.isBlank(recordBasis)){
389 result = DerivedUnitType.DerivedUnit;
390 } else if (recordBasis.equalsIgnoreCase("FossileSpecimen")){
391 result = DerivedUnitType.Fossil;
392 }else if (recordBasis.equalsIgnoreCase("HumanObservation")){
393 result = DerivedUnitType.Observation;
394 }else if (recordBasis.equalsIgnoreCase("Literature")){
395 logger.warn("Literature record basis not yet supported");
396 result = DerivedUnitType.DerivedUnit;
397 }else if (recordBasis.equalsIgnoreCase("LivingSpecimen")){
398 result = DerivedUnitType.LivingBeing;
399 }else if (recordBasis.equalsIgnoreCase("MachineObservation")){
400 logger.warn("MachineObservation record basis not yet supported");
401 result = DerivedUnitType.Observation;
402 }else if (recordBasis.equalsIgnoreCase("PreservedSpecimen")){
403 result = DerivedUnitType.Specimen;
404 }
405 return result;
406 }
407
408 /* (non-Javadoc)
409 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
410 */
411 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
412 String nameSpace;
413 Class cdmClass;
414 Set<String> idSet;
415 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
416
417 try{
418 Set<String> fieldObservationIdSet = new HashSet<String>();
419 Set<String> termsIdSet = new HashSet<String>();
420 Set<String> collectionIdSet = new HashSet<String>();
421
422 while (rs.next()){
423 handleForeignKey(rs, fieldObservationIdSet, "DuplicateFk");
424 handleForeignKey(rs, termsIdSet, "ClimateFk");
425 handleForeignKey(rs, termsIdSet, "HabitatFk");
426 handleForeignKey(rs, termsIdSet, "LifeFormFk");
427 handleForeignKey(rs, collectionIdSet, "CollectionFk");
428 }
429
430 //field observation map for duplicates
431 nameSpace = AlgaTerraEcoFactImport.ECO_FACT_FIELD_OBSERVATION_NAMESPACE;
432 cdmClass = FieldObservation.class;
433 idSet = fieldObservationIdSet;
434 Map<String, FieldObservation> fieldObservationMap = (Map<String, FieldObservation>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
435 result.put(nameSpace, fieldObservationMap);
436
437 //collections
438 nameSpace = AlgaTerraCollectionImport.NAMESPACE_COLLECTION;
439 cdmClass = Collection.class;
440 idSet = collectionIdSet;
441 Map<String, Collection> collectionMap = (Map<String, Collection>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
442 result.put(nameSpace, collectionMap);
443
444 //sub-collections
445 nameSpace = AlgaTerraCollectionImport.NAMESPACE_SUBCOLLECTION;
446 cdmClass = Collection.class;
447 idSet = collectionIdSet;
448 Map<String, Collection> subCollectionMap = (Map<String, Collection>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
449 result.put(nameSpace, subCollectionMap);
450
451 //terms
452 nameSpace = AlgaTerraEcoFactImport.TERMS_NAMESPACE;
453 cdmClass = FieldObservation.class;
454 idSet = termsIdSet;
455 Map<String, DefinedTermBase> termMap = (Map<String, DefinedTermBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
456 result.put(nameSpace, termMap);
457
458 } catch (SQLException e) {
459 throw new RuntimeException(e);
460 }
461 return result;
462 }
463
464
465
466 /* (non-Javadoc)
467 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
468 */
469 @Override
470 protected boolean doCheck(BerlinModelImportState state){
471 IOValidator<BerlinModelImportState> validator = new AlgaTerraSpecimenImportValidator();
472 return validator.validate(state);
473 }
474
475
476 /* (non-Javadoc)
477 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
478 */
479 protected boolean isIgnore(BerlinModelImportState state){
480 return ! ((AlgaTerraImportState)state).getAlgaTerraConfigurator().isDoEcoFacts();
481 }
482
483 }