Merging cdmlib-io; pesi. Merge rev 8153-8536.
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelFactsImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import java.net.MalformedURLException;
13 import java.net.URISyntaxException;
14 import java.net.URL;
15 import java.sql.ResultSet;
16 import java.sql.SQLException;
17 import java.util.Collection;
18 import java.util.HashMap;
19 import java.util.HashSet;
20 import java.util.Map;
21 import java.util.Set;
22
23 import org.apache.log4j.Logger;
24 import org.springframework.stereotype.Component;
25
26 import eu.etaxonomy.cdm.common.CdmUtils;
27 import eu.etaxonomy.cdm.common.mediaMetaData.ImageMetaData;
28 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
29 import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelFactsImportValidator;
30 import eu.etaxonomy.cdm.io.common.IOValidator;
31 import eu.etaxonomy.cdm.io.common.MapWrapper;
32 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33 import eu.etaxonomy.cdm.io.common.Source;
34 import eu.etaxonomy.cdm.model.common.Annotation;
35 import eu.etaxonomy.cdm.model.common.CdmBase;
36 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
37 import eu.etaxonomy.cdm.model.common.Language;
38 import eu.etaxonomy.cdm.model.common.Marker;
39 import eu.etaxonomy.cdm.model.common.MarkerType;
40 import eu.etaxonomy.cdm.model.common.TermVocabulary;
41 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
42 import eu.etaxonomy.cdm.model.description.Feature;
43 import eu.etaxonomy.cdm.model.description.TaxonDescription;
44 import eu.etaxonomy.cdm.model.description.TextData;
45 import eu.etaxonomy.cdm.model.media.ImageFile;
46 import eu.etaxonomy.cdm.model.media.Media;
47 import eu.etaxonomy.cdm.model.media.MediaRepresentation;
48 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
49 import eu.etaxonomy.cdm.model.taxon.Taxon;
50 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
51 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
52
53 /**
54 * @author a.mueller
55 * @created 20.03.2008
56 * @version 1.0
57 */
58 @Component
59 public class BerlinModelFactsImport extends BerlinModelImportBase {
60 private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
61
62 public static final String NAMESPACE = "Fact";
63
64 public static final String SEQUENCE_PREFIX = "ORDER: ";
65
66 private int modCount = 10000;
67 private static final String pluralString = "facts";
68 private static final String dbTableName = "Fact";
69
70 //FIXME don't use as class variable
71 private MapWrapper<Feature> featureMap;
72
73 public BerlinModelFactsImport(){
74 super();
75 }
76
77
78 private TermVocabulary<Feature> getFeatureVocabulary(){
79 try {
80 //TODO work around until service method works
81 TermVocabulary<Feature> featureVocabulary = BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
82 //TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
83 return featureVocabulary;
84 } catch (UnknownCdmTypeException e) {
85 logger.error("Feature vocabulary not available. New vocabulary created");
86 return new TermVocabulary<Feature>() ;
87 }
88 }
89
90 private MapWrapper<Feature> invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
91
92 MapWrapper<Feature> result = bmiConfig.getFeatureMap();
93 Source source = bmiConfig.getSource();
94
95 try {
96 //get data from database
97 String strQuery =
98 " SELECT FactCategory.* " +
99 " FROM FactCategory "+
100 " WHERE (1=1)";
101 ResultSet rs = source.getResultSet(strQuery) ;
102
103
104 TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
105 int i = 0;
106 //for each reference
107 while (rs.next()){
108
109 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
110
111 int factCategoryId = rs.getInt("factCategoryId");
112 String factCategory = rs.getString("factCategory");
113
114
115 Feature feature;
116 try {
117 feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
118 } catch (UnknownCdmTypeException e) {
119 logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
120 feature = Feature.NewInstance(factCategory, factCategory, null);
121 feature.setVocabulary(featureVocabulary);
122 feature.setSupportsTextData(true);
123 //TODO
124 // MaxFactNumber int Checked
125 // ExtensionTableName varchar(100) Checked
126 // Description nvarchar(1000) Checked
127 // locExtensionFormName nvarchar(80) Checked
128 // RankRestrictionFk int Checked
129 }
130
131 result.put(factCategoryId, feature);
132 }
133 Collection<Feature> col = result.getAllValues();
134 getTermService().save((Collection)col);
135 return result;
136 } catch (SQLException e) {
137 logger.error("SQLException:" + e);
138 return null;
139 }
140
141 }
142
143 /* (non-Javadoc)
144 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
145 */
146 @Override
147 protected boolean doInvoke(BerlinModelImportState state) {
148 featureMap = invokeFactCategories(state.getConfig());
149 return super.doInvoke(state);
150 }
151
152
153 /* (non-Javadoc)
154 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
155 */
156 @Override
157 protected String getRecordQuery(BerlinModelImportConfigurator config) {
158 String strQuery =
159 " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " +
160 " FROM Fact " +
161 " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
162 " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
163 " WHERE (FactId IN (" + ID_LIST_TOKEN + "))" +
164 " ORDER By Sequence";
165 return strQuery;
166 }
167
168
169 /* (non-Javadoc)
170 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
171 */
172 public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) {
173 boolean success = true ;
174 BerlinModelImportConfigurator config = state.getConfig();
175 Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
176 Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>) partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
177 Map<String, ReferenceBase> biblioRefMap = (Map<String, ReferenceBase>) partitioner.getObjectMap(BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE);
178 Map<String, ReferenceBase> nomRefMap = (Map<String, ReferenceBase>) partitioner.getObjectMap(BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE);
179
180 ResultSet rs = partitioner.getResultSet();
181
182 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
183
184 try{
185 int i = 0;
186 //for each fact
187 while (rs.next()){
188 try{
189 if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
190
191 int factId = rs.getInt("factId");
192 Object taxonIdObj = rs.getObject("taxonId");
193 int taxonId = rs.getInt("taxonId");
194 Object factRefFkObj = rs.getObject("factRefFk");
195 Object categoryFkObj = rs.getObject("factCategoryFk");
196 Integer categoryFk = rs.getInt("factCategoryFk");
197 String details = rs.getString("Details");
198 String fact = CdmUtils.Nz(rs.getString("Fact"));
199 String notes = CdmUtils.Nz(rs.getString("notes"));
200 Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
201 Boolean publishFlag = rs.getBoolean("publishFlag");
202
203 TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
204 Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
205
206 if (taxonBase == null){
207 logger.warn("Taxon for Fact " + factId + " does not exist in store");
208 success = false;
209 }else{
210 Taxon taxon;
211 if ( taxonBase instanceof Taxon ) {
212 taxon = (Taxon) taxonBase;
213 }else{
214 logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
215 success = false;
216 continue;
217 }
218
219 TaxonDescription taxonDescription = null;
220 Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
221
222 boolean isImage = false;
223 Media media = null;
224 //for diptera images
225 if (categoryFk == 51){ //TODO check also FactCategory string
226 isImage = true;
227 media = Media.NewInstance();
228 taxonDescription = makeImage(state, fact, media, descriptionSet, taxon);
229 if (taxonDescription == null){
230 continue;
231 }
232 }
233 //all others (no image)
234 else{
235 for (TaxonDescription desc: descriptionSet){
236 if (! desc.isImageGallery()){
237 taxonDescription = desc;
238 }
239 }
240 if (taxonDescription == null){
241 taxonDescription = TaxonDescription.NewInstance();
242 taxonDescription.setTitleCache(sourceRef == null ? null : sourceRef.getTitleCache());
243 taxon.addDescription(taxonDescription);
244 }
245 }
246
247 //textData
248 TextData textData = null;
249 boolean newTextData = true;
250
251 // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData
252 // description element append the fact text to the existing TextData
253 if(categoryFk == 31) {
254 Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
255 for (DescriptionElementBase descriptionElement : descriptionElements) {
256 String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
257 if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
258 textData = (TextData)descriptionElement;
259 String factTextStr = textData.getText(Language.DEFAULT());
260 // FIXME: Removing newlines doesn't work
261 if (factTextStr.contains("\\r\\n")) {
262 factTextStr = factTextStr.replaceAll("\\r\\n","");
263 }
264 StringBuilder factText = new StringBuilder(factTextStr);
265 factText.append(fact);
266 fact = factText.toString();
267 newTextData = false;
268 break;
269 }
270 }
271 }
272
273 if(newTextData == true) {
274 textData = TextData.NewInstance();
275 }
276
277 //for diptera database
278 if (categoryFk == 99 && notes.contains("<OriginalName>")){
279 notes = notes.replaceAll("<OriginalName>", "");
280 notes = notes.replaceAll("</OriginalName>", "");
281 fact = notes + ": " + fact ;
282 }
283 //TODO textData.putText(fact, bmiConfig.getFactLanguage()); //doesn't work because bmiConfig.getFactLanguage() is not not a persistent Language Object
284 //throws in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
285 if (isImage){
286 textData.addMedia(media);
287 textData.setType(Feature.IMAGE());
288 }else{
289 textData.putText(fact, Language.DEFAULT());
290 textData.setType(feature);
291 }
292
293 //reference
294 ReferenceBase citation = null;
295 String factRefFk = String.valueOf(factRefFkObj);
296 if (factRefFkObj != null){
297 citation = getReferenceOnlyFromMaps(
298 biblioRefMap, nomRefMap, factRefFk);
299 }
300 if (citation == null && (factRefFkObj != null)){
301 logger.warn("Citation not found in referenceMap: " + factRefFk);
302 success = false;
303 }
304 if (citation != null || CdmUtils.isNotEmpty(details)){
305 DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
306 originalSource.setCitation(citation);
307 originalSource.setCitationMicroReference(details);
308 textData.addSource(originalSource);
309 }
310 taxonDescription.addElement(textData);
311 //doubtfulFlag
312 if (doubtfulFlag){
313 textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
314 }
315 //publisheFlag
316 textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
317 //Sequence
318 Integer sequence = rs.getInt("Sequence");
319 if (sequence != null && sequence != 999){
320 String strSequence = String.valueOf(sequence);
321 strSequence = SEQUENCE_PREFIX + strSequence;
322 //TODO make it an Extension when possible
323 //Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
324 Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
325 textData.addAnnotation(annotation);
326 }
327
328 // if (categoryFkObj == FACT_DESCRIPTION){
329 // //;
330 // }else if (categoryFkObj == FACT_OBSERVATION){
331 // //;
332 // }else if (categoryFkObj == FACT_DISTRIBUTION_EM){
333 // //
334 // }else {
335 // //TODO
336 // //logger.warn("FactCategory " + categoryFk + " not yet implemented");
337 // }
338
339 //notes
340 doCreatedUpdatedNotes(state, textData, rs);
341
342 //TODO
343 //Designation References -> unclear how to map to CDM
344 //factId -> OriginalSource for descriptionElements not yet implemented
345
346 //sequence -> textData is not an identifiable entity therefore extensions are not possible
347 //fact category better
348
349 taxaToSave.add(taxon);
350 }
351 } catch (Exception re){
352 logger.error("An exception occurred during the facts import");
353 re.printStackTrace();
354 success = false;
355 }
356 //put
357 }
358 logger.info("Facts handled: " + (i-1));
359 logger.info("Taxa to save: " + taxaToSave.size());
360 getTaxonService().save(taxaToSave);
361 }catch(SQLException e){
362 throw new RuntimeException(e);
363 }
364 return success;
365 }
366
367 /* (non-Javadoc)
368 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
369 */
370 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
371 String nameSpace;
372 Class cdmClass;
373 Set<String> idSet;
374 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
375
376 try{
377 Set<String> taxonIdSet = new HashSet<String>();
378 Set<String> referenceIdSet = new HashSet<String>();
379 Set<String> refDetailIdSet = new HashSet<String>();
380 while (rs.next()){
381 handleForeignKey(rs, taxonIdSet, "taxonId");
382 handleForeignKey(rs, referenceIdSet, "FactRefFk");
383 handleForeignKey(rs, referenceIdSet, "PTDesignationRefFk");
384 handleForeignKey(rs, refDetailIdSet, "FactRefDetailFk");
385 handleForeignKey(rs, refDetailIdSet, "PTDesignationRefDetailFk");
386 }
387
388 //taxon map
389 nameSpace = BerlinModelTaxonImport.NAMESPACE;
390 cdmClass = TaxonBase.class;
391 idSet = taxonIdSet;
392 Map<String, TaxonBase> taxonMap = (Map<String, TaxonBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
393 result.put(nameSpace, taxonMap);
394
395
396 //nom reference map
397 nameSpace = BerlinModelReferenceImport.NOM_REFERENCE_NAMESPACE;
398 cdmClass = ReferenceBase.class;
399 idSet = referenceIdSet;
400 Map<String, ReferenceBase> nomReferenceMap = (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
401 result.put(nameSpace, nomReferenceMap);
402
403 //biblio reference map
404 nameSpace = BerlinModelReferenceImport.BIBLIO_REFERENCE_NAMESPACE;
405 cdmClass = ReferenceBase.class;
406 idSet = referenceIdSet;
407 Map<String, ReferenceBase> biblioReferenceMap = (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
408 result.put(nameSpace, biblioReferenceMap);
409
410 //nom refDetail map
411 nameSpace = BerlinModelRefDetailImport.NOM_REFDETAIL_NAMESPACE;
412 cdmClass = ReferenceBase.class;
413 idSet = refDetailIdSet;
414 Map<String, ReferenceBase> nomRefDetailMap= (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
415 result.put(nameSpace, nomRefDetailMap);
416
417 //biblio refDetail map
418 nameSpace = BerlinModelRefDetailImport.BIBLIO_REFDETAIL_NAMESPACE;
419 cdmClass = ReferenceBase.class;
420 idSet = refDetailIdSet;
421 Map<String, ReferenceBase> biblioRefDetailMap= (Map<String, ReferenceBase>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
422 result.put(nameSpace, biblioRefDetailMap);
423
424 } catch (SQLException e) {
425 throw new RuntimeException(e);
426 }
427 return result;
428 }
429
430
431 /**
432 * @param state
433 * @param media
434 * @param media
435 * @param descriptionSet
436 *
437 */
438 private TaxonDescription makeImage(BerlinModelImportState state, String fact, Media media, Set<TaxonDescription> descriptionSet, Taxon taxon) {
439 TaxonDescription taxonDescription = null;
440 ReferenceBase sourceRef = state.getConfig().getSourceReference();
441 String uri = fact;
442 Integer size = null;
443 ImageMetaData imageMetaData = ImageMetaData.newInstance();
444 URL url;
445 try {
446 url = new URL(fact.trim());
447 } catch (MalformedURLException e) {
448 logger.warn("Malformed URL. Image could not be imported: " + CdmUtils.Nz(uri));
449 return null;
450 }
451 try {
452 imageMetaData.readMetaData(url.toURI(), 0);
453 }
454 catch(URISyntaxException e){
455 e.printStackTrace();
456 }
457 MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageMetaData.getMimeType(), null);
458 media.addRepresentation(mediaRepresentation);
459 ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);
460 mediaRepresentation.addRepresentationPart(image);
461
462 taxonDescription = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
463
464 return taxonDescription;
465 }
466
467 private TaxonBase getTaxon(Map<String, TaxonBase> taxonMap, Object taxonIdObj, Integer taxonId){
468 if (taxonIdObj != null){
469 return taxonMap.get(String.valueOf(taxonId));
470 }else{
471 return null;
472 }
473
474 }
475
476 private Feature getFeature(MapWrapper<Feature> featureMap, Object categoryFkObj, Integer categoryFk){
477 if (categoryFkObj != null){
478 return featureMap.get(categoryFk);
479 }else{
480 return null;
481 }
482
483 }
484
485
486 /* (non-Javadoc)
487 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
488 */
489 @Override
490 protected boolean doCheck(BerlinModelImportState state){
491 IOValidator<BerlinModelImportState> validator = new BerlinModelFactsImportValidator();
492 return validator.validate(state);
493 }
494
495 /* (non-Javadoc)
496 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
497 */
498 @Override
499 protected String getTableName() {
500 return dbTableName;
501 }
502
503 /* (non-Javadoc)
504 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
505 */
506 @Override
507 public String getPluralString() {
508 return pluralString;
509 }
510
511 /* (non-Javadoc)
512 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
513 */
514 protected boolean isIgnore(BerlinModelImportState state){
515 return ! state.getConfig().isDoFacts();
516 }
517
518
519 }