hide term vocabulary constructors and create factory methods instead
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelFactsImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import java.io.IOException;
13 import java.net.MalformedURLException;
14 import java.net.URI;
15 import java.net.URISyntaxException;
16 import java.net.URL;
17 import java.sql.ResultSet;
18 import java.sql.SQLException;
19 import java.util.Collection;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.Map;
23 import java.util.Set;
24
25 import org.apache.http.HttpException;
26 import org.apache.log4j.Logger;
27 import org.springframework.stereotype.Component;
28
29 import eu.etaxonomy.cdm.common.CdmUtils;
30 import eu.etaxonomy.cdm.common.mediaMetaData.ImageMetaData;
31 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
32 import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelFactsImportValidator;
33 import eu.etaxonomy.cdm.io.common.IOValidator;
34 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
35 import eu.etaxonomy.cdm.io.common.Source;
36 import eu.etaxonomy.cdm.model.common.Annotation;
37 import eu.etaxonomy.cdm.model.common.CdmBase;
38 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
39 import eu.etaxonomy.cdm.model.common.Language;
40 import eu.etaxonomy.cdm.model.common.Marker;
41 import eu.etaxonomy.cdm.model.common.MarkerType;
42 import eu.etaxonomy.cdm.model.common.TermVocabulary;
43 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
44 import eu.etaxonomy.cdm.model.description.Feature;
45 import eu.etaxonomy.cdm.model.description.TaxonDescription;
46 import eu.etaxonomy.cdm.model.description.TextData;
47 import eu.etaxonomy.cdm.model.media.ImageFile;
48 import eu.etaxonomy.cdm.model.media.Media;
49 import eu.etaxonomy.cdm.model.media.MediaRepresentation;
50 import eu.etaxonomy.cdm.model.reference.Reference;
51 import eu.etaxonomy.cdm.model.taxon.Taxon;
52 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
53 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
54
55 /**
56 * @author a.mueller
57 * @created 20.03.2008
58 * @version 1.0
59 */
60 @Component
61 public class BerlinModelFactsImport extends BerlinModelImportBase {
62 private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
63
64 public static final String NAMESPACE = "Fact";
65
66 public static final String SEQUENCE_PREFIX = "ORDER: ";
67
68 private int modCount = 10000;
69 private static final String pluralString = "facts";
70 private static final String dbTableName = "Fact";
71
72 //FIXME don't use as class variable
73 private Map<Integer, Feature> featureMap;
74
75 public BerlinModelFactsImport(){
76 super();
77 }
78
79
80 private TermVocabulary<Feature> getFeatureVocabulary(){
81 try {
82 //TODO work around until service method works
83 TermVocabulary<Feature> featureVocabulary = BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
84 //TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
85 return featureVocabulary;
86 } catch (UnknownCdmTypeException e) {
87 logger.error("Feature vocabulary not available. New vocabulary created");
88 return TermVocabulary.NewInstance("User Defined Feature Vocabulary", "User Defined Feature Vocabulary", null, null);
89 }
90 }
91
92 private Map<Integer, Feature> invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
93
94 Map<Integer, Feature> result = bmiConfig.getFeatureMap();
95 Source source = bmiConfig.getSource();
96
97 try {
98 //get data from database
99 String strQuery =
100 " SELECT FactCategory.* " +
101 " FROM FactCategory "+
102 " WHERE (1=1)";
103 ResultSet rs = source.getResultSet(strQuery) ;
104
105
106 TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
107 int i = 0;
108 //for each reference
109 while (rs.next()){
110
111 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
112
113 int factCategoryId = rs.getInt("factCategoryId");
114 String factCategory = rs.getString("factCategory");
115
116
117 Feature feature;
118 try {
119 feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
120 } catch (UnknownCdmTypeException e) {
121 logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
122 feature = Feature.NewInstance(factCategory, factCategory, null);
123 featureVocabulary.addTerm(feature);
124 feature.setSupportsTextData(true);
125 //TODO
126 // MaxFactNumber int Checked
127 // ExtensionTableName varchar(100) Checked
128 // Description nvarchar(1000) Checked
129 // locExtensionFormName nvarchar(80) Checked
130 // RankRestrictionFk int Checked
131 }
132
133 result.put(factCategoryId, feature);
134 }
135 Collection<Feature> col = result.values();
136 getTermService().save((Collection)col);
137 return result;
138 } catch (SQLException e) {
139 logger.error("SQLException:" + e);
140 return null;
141 }
142
143 }
144
145 /* (non-Javadoc)
146 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
147 */
148 @Override
149 protected boolean doInvoke(BerlinModelImportState state) {
150 featureMap = invokeFactCategories(state.getConfig());
151 return super.doInvoke(state);
152 }
153
154
155 /* (non-Javadoc)
156 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
157 */
158 @Override
159 protected String getRecordQuery(BerlinModelImportConfigurator config) {
160 String strQuery =
161 " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " +
162 " FROM Fact " +
163 " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
164 " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
165 " WHERE (FactId IN (" + ID_LIST_TOKEN + "))" +
166 " ORDER By Sequence";
167 return strQuery;
168 }
169
170
171 /* (non-Javadoc)
172 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
173 */
174 public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
175 boolean success = true ;
176 BerlinModelImportConfigurator config = state.getConfig();
177 Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
178 Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
179 Map<String, Reference> biblioRefMap = (Map<String, Reference>) partitioner.getObjectMap(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE);
180 Map<String, Reference> nomRefMap = (Map<String, Reference>) partitioner.getObjectMap(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE);
181
182 ResultSet rs = partitioner.getResultSet();
183
184 Reference<?> sourceRef = state.getConfig().getSourceReference();
185
186 try{
187 int i = 0;
188 //for each fact
189 while (rs.next()){
190 try{
191 if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
192
193 int factId = rs.getInt("factId");
194 Object taxonIdObj = rs.getObject("taxonId");
195 long taxonId = rs.getLong("taxonId");
196 Object factRefFkObj = rs.getObject("factRefFk");
197 Object categoryFkObj = rs.getObject("factCategoryFk");
198 Integer categoryFk = rs.getInt("factCategoryFk");
199 String details = rs.getString("Details");
200 String fact = CdmUtils.Nz(rs.getString("Fact"));
201 String notes = CdmUtils.Nz(rs.getString("notes"));
202 Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
203 Boolean publishFlag = rs.getBoolean("publishFlag");
204
205 TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
206 Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
207
208 if (taxonBase == null){
209 logger.warn("Taxon for Fact " + factId + " does not exist in store");
210 success = false;
211 }else{
212 Taxon taxon;
213 if ( taxonBase instanceof Taxon ) {
214 taxon = (Taxon) taxonBase;
215 }else{
216 logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
217 success = false;
218 continue;
219 }
220
221 TaxonDescription taxonDescription = null;
222 Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
223
224 boolean isImage = false;
225 Media media = null;
226 //for diptera images
227 if (categoryFk == 51){ //TODO check also FactCategory string
228 isImage = true;
229 media = Media.NewInstance();
230 taxonDescription = makeImage(state, fact, media, descriptionSet, taxon);
231 if (taxonDescription == null){
232 continue;
233 }
234 }
235 //all others (no image)
236 else{
237 for (TaxonDescription desc: descriptionSet){
238 if (! desc.isImageGallery()){
239 taxonDescription = desc;
240 }
241 }
242 if (taxonDescription == null){
243 taxonDescription = TaxonDescription.NewInstance();
244 taxonDescription.setTitleCache(sourceRef == null ? null : sourceRef.getTitleCache(), true);
245 taxon.addDescription(taxonDescription);
246 }
247 }
248
249 //textData
250 TextData textData = null;
251 boolean newTextData = true;
252
253 // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData
254 // description element append the fact text to the existing TextData
255 if(categoryFk == 31) {
256 Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
257 for (DescriptionElementBase descriptionElement : descriptionElements) {
258 String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
259 if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
260 textData = (TextData)descriptionElement;
261 String factTextStr = textData.getText(Language.DEFAULT());
262 // FIXME: Removing newlines doesn't work
263 if (factTextStr.contains("\\r\\n")) {
264 factTextStr = factTextStr.replaceAll("\\r\\n","");
265 }
266 StringBuilder factText = new StringBuilder(factTextStr);
267 factText.append(fact);
268 fact = factText.toString();
269 newTextData = false;
270 break;
271 }
272 }
273 }
274
275 if(newTextData == true) {
276 textData = TextData.NewInstance();
277 }
278
279 //for diptera database
280 if (categoryFk == 99 && notes.contains("<OriginalName>")){
281 // notes = notes.replaceAll("<OriginalName>", "");
282 // notes = notes.replaceAll("</OriginalName>", "");
283 fact = notes + ": " + fact ;
284 }
285 //TODO textData.putText(fact, bmiConfig.getFactLanguage()); //doesn't work because bmiConfig.getFactLanguage() is not not a persistent Language Object
286 //throws in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
287 if (isImage){
288 textData.addMedia(media);
289 textData.setFeature(Feature.IMAGE());
290 }else{
291 textData.putText(Language.DEFAULT(), fact);
292 textData.setFeature(feature);
293 }
294
295 //reference
296 Reference citation = null;
297 String factRefFk = String.valueOf(factRefFkObj);
298 if (factRefFkObj != null){
299 citation = getReferenceOnlyFromMaps(
300 biblioRefMap, nomRefMap, factRefFk);
301 }
302 if (citation == null && (factRefFkObj != null)){
303 logger.warn("Citation not found in referenceMap: " + factRefFk);
304 success = false;
305 }
306 if (citation != null || CdmUtils.isNotEmpty(details)){
307 DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
308 originalSource.setCitation(citation);
309 originalSource.setCitationMicroReference(details);
310 textData.addSource(originalSource);
311 }
312 taxonDescription.addElement(textData);
313 //doubtfulFlag
314 if (doubtfulFlag){
315 textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
316 }
317 //publisheFlag
318 textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
319 //Sequence
320 Integer sequence = rs.getInt("Sequence");
321 if (sequence != null && sequence != 999){
322 String strSequence = String.valueOf(sequence);
323 strSequence = SEQUENCE_PREFIX + strSequence;
324 //TODO make it an Extension when possible
325 //Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
326 Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
327 textData.addAnnotation(annotation);
328 }
329
330 // if (categoryFkObj == FACT_DESCRIPTION){
331 // //;
332 // }else if (categoryFkObj == FACT_OBSERVATION){
333 // //;
334 // }else if (categoryFkObj == FACT_DISTRIBUTION_EM){
335 // //
336 // }else {
337 // //TODO
338 // //logger.warn("FactCategory " + categoryFk + " not yet implemented");
339 // }
340
341 //notes
342 doCreatedUpdatedNotes(state, textData, rs);
343
344 //TODO
345 //Designation References -> unclear how to map to CDM
346 //factId -> OriginalSource for descriptionElements not yet implemented
347
348 //sequence -> textData is not an identifiable entity therefore extensions are not possible
349 //fact category better
350
351 taxaToSave.add(taxon);
352 }
353 } catch (Exception re){
354 logger.error("An exception occurred during the facts import");
355 re.printStackTrace();
356 success = false;
357 }
358 //put
359 }
360 logger.info("Facts handled: " + (i-1));
361 logger.info("Taxa to save: " + taxaToSave.size());
362 getTaxonService().save(taxaToSave);
363 }catch(SQLException e){
364 throw new RuntimeException(e);
365 }
366 return success;
367 }
368
369 /* (non-Javadoc)
370 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
371 */
372 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
373 String nameSpace;
374 Class cdmClass;
375 Set<String> idSet;
376 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
377
378 try{
379 Set<String> taxonIdSet = new HashSet<String>();
380 Set<String> referenceIdSet = new HashSet<String>();
381 Set<String> refDetailIdSet = new HashSet<String>();
382 while (rs.next()){
383 handleForeignKey(rs, taxonIdSet, "taxonId");
384 handleForeignKey(rs, referenceIdSet, "FactRefFk");
385 handleForeignKey(rs, referenceIdSet, "PTDesignationRefFk");
386 handleForeignKey(rs, refDetailIdSet, "FactRefDetailFk");
387 handleForeignKey(rs, refDetailIdSet, "PTDesignationRefDetailFk");
388 }
389
390 //taxon map
391 nameSpace = BerlinModelTaxonImport.NAMESPACE;
392 cdmClass = TaxonBase.class;
393 idSet = taxonIdSet;
394 Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
395 result.put(nameSpace, taxonMap);
396
397
398 //nom reference map
399 nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;
400 cdmClass = Reference.class;
401 idSet = referenceIdSet;
402 Map<String, Reference> nomReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
403 result.put(nameSpace, nomReferenceMap);
404
405 //biblio reference map
406 nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;
407 cdmClass = Reference.class;
408 idSet = referenceIdSet;
409 Map<String, Reference> biblioReferenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
410 result.put(nameSpace, biblioReferenceMap);
411
412 //nom refDetail map
413 nameSpace = BerlinModelRefDetailImport.NOM_REFDETAIL_NAMESPACE;
414 cdmClass = Reference.class;
415 idSet = refDetailIdSet;
416 Map<String, Reference> nomRefDetailMap= (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
417 result.put(nameSpace, nomRefDetailMap);
418
419 //biblio refDetail map
420 nameSpace = BerlinModelRefDetailImport.BIBLIO_REFDETAIL_NAMESPACE;
421 cdmClass = Reference.class;
422 idSet = refDetailIdSet;
423 Map<String, Reference> biblioRefDetailMap= (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
424 result.put(nameSpace, biblioRefDetailMap);
425
426 } catch (SQLException e) {
427 throw new RuntimeException(e);
428 }
429 return result;
430 }
431
432
433 /**
434 * @param state
435 * @param media
436 * @param media
437 * @param descriptionSet
438 *
439 */
440 private TaxonDescription makeImage(BerlinModelImportState state, String fact, Media media, Set<TaxonDescription> descriptionSet, Taxon taxon) {
441 TaxonDescription taxonDescription = null;
442 Reference sourceRef = state.getConfig().getSourceReference();
443 Integer size = null;
444 ImageMetaData imageMetaData = ImageMetaData.newInstance();
445 URI uri;
446 try {
447 uri = new URI(fact.trim());
448 } catch (URISyntaxException e) {
449 logger.warn("URISyntaxException. Image could not be imported: " + fact);
450 return null;
451 }
452 try {
453 imageMetaData.readMetaData(uri, 0);
454 } catch (IOException e) {
455 logger.error("IOError reading image metadata." , e);
456 } catch (HttpException e) {
457 logger.error("HttpException reading image metadata." , e);
458 }
459 MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageMetaData.getMimeType(), null);
460 media.addRepresentation(mediaRepresentation);
461 ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);
462 mediaRepresentation.addRepresentationPart(image);
463
464 taxonDescription = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
465
466 return taxonDescription;
467 }
468
469 private TaxonBase getTaxon(Map<String, TaxonBase> taxonMap, Object taxonIdObj, Long taxonId){
470 if (taxonIdObj != null){
471 return taxonMap.get(String.valueOf(taxonId));
472 }else{
473 return null;
474 }
475
476 }
477
478 private Feature getFeature(Map<Integer, Feature> featureMap, Object categoryFkObj, Integer categoryFk){
479 if (categoryFkObj != null){
480 return featureMap.get(categoryFk);
481 }else{
482 return null;
483 }
484
485 }
486
487
488 /* (non-Javadoc)
489 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
490 */
491 @Override
492 protected boolean doCheck(BerlinModelImportState state){
493 IOValidator<BerlinModelImportState> validator = new BerlinModelFactsImportValidator();
494 return validator.validate(state);
495 }
496
497 /* (non-Javadoc)
498 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
499 */
500 @Override
501 protected String getTableName() {
502 return dbTableName;
503 }
504
505 /* (non-Javadoc)
506 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
507 */
508 @Override
509 public String getPluralString() {
510 return pluralString;
511 }
512
513 /* (non-Javadoc)
514 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
515 */
516 protected boolean isIgnore(BerlinModelImportState state){
517 return ! state.getConfig().isDoFacts();
518 }
519
520
521 }