(no commit message)
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelFactsImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import java.net.MalformedURLException;
13 import java.net.URISyntaxException;
14 import java.net.URL;
15 import java.sql.ResultSet;
16 import java.sql.SQLException;
17 import java.util.Collection;
18 import java.util.HashSet;
19 import java.util.Set;
20
21 import org.apache.log4j.Logger;
22 import org.springframework.stereotype.Component;
23
24 import eu.etaxonomy.cdm.common.CdmUtils;
25 import eu.etaxonomy.cdm.common.mediaMetaData.ImageMetaData;
26 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
27 import eu.etaxonomy.cdm.io.common.ICdmIO;
28 import eu.etaxonomy.cdm.io.common.MapWrapper;
29 import eu.etaxonomy.cdm.io.common.Source;
30 import eu.etaxonomy.cdm.model.common.Annotation;
31 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
32 import eu.etaxonomy.cdm.model.common.Language;
33 import eu.etaxonomy.cdm.model.common.Marker;
34 import eu.etaxonomy.cdm.model.common.MarkerType;
35 import eu.etaxonomy.cdm.model.common.TermVocabulary;
36 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
37 import eu.etaxonomy.cdm.model.description.Feature;
38 import eu.etaxonomy.cdm.model.description.TaxonDescription;
39 import eu.etaxonomy.cdm.model.description.TextData;
40 import eu.etaxonomy.cdm.model.media.ImageFile;
41 import eu.etaxonomy.cdm.model.media.Media;
42 import eu.etaxonomy.cdm.model.media.MediaRepresentation;
43 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
44 import eu.etaxonomy.cdm.model.taxon.Taxon;
45 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
46 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
47
48 /**
49 * @author a.mueller
50 * @created 20.03.2008
51 * @version 1.0
52 */
53 @Component
54 public class BerlinModelFactsImport extends BerlinModelImportBase {
55 private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
56
57 public static final String SEQUENCE_PREFIX = "ORDER: ";
58
59 private int modCount = 10000;
60
61 public BerlinModelFactsImport(){
62 super();
63 }
64
65 /* (non-Javadoc)
66 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
67 */
68 @Override
69 protected boolean doCheck(BerlinModelImportState state){
70 boolean result = true;
71 BerlinModelImportConfigurator bmiConfig = state.getConfig();
72 logger.warn("Checking for Facts not yet fully implemented");
73 result &= checkDesignationRefsExist(bmiConfig);
74 return result;
75 }
76
77 private TermVocabulary<Feature> getFeatureVocabulary(){
78 try {
79 //TODO work around until service method works
80 TermVocabulary<Feature> featureVocabulary = BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
81 //TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
82 return featureVocabulary;
83 } catch (UnknownCdmTypeException e) {
84 logger.error("Feature vocabulary not available. New vocabulary created");
85 return new TermVocabulary<Feature>() ;
86 }
87 }
88
89 private MapWrapper<Feature> invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
90
91 MapWrapper<Feature> result = bmiConfig.getFeatureMap();
92 Source source = bmiConfig.getSource();
93
94 try {
95 //get data from database
96 String strQuery =
97 " SELECT FactCategory.* " +
98 " FROM FactCategory "+
99 " WHERE (1=1)";
100 ResultSet rs = source.getResultSet(strQuery) ;
101
102
103 TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
104 int i = 0;
105 //for each reference
106 while (rs.next()){
107
108 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
109
110 int factCategoryId = rs.getInt("factCategoryId");
111 String factCategory = rs.getString("factCategory");
112
113
114 Feature feature;
115 try {
116 feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
117 } catch (UnknownCdmTypeException e) {
118 logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
119 feature = Feature.NewInstance(factCategory, factCategory, null);
120 feature.setVocabulary(featureVocabulary);
121 feature.setSupportsTextData(true);
122 //TODO
123 // MaxFactNumber int Checked
124 // ExtensionTableName varchar(100) Checked
125 // Description nvarchar(1000) Checked
126 // locExtensionFormName nvarchar(80) Checked
127 // RankRestrictionFk int Checked
128 }
129
130 // featureMap.put(factCategoryId, feature);
131 result.put(factCategoryId, feature);
132
133 }
134 Collection<Feature> col = result.getAllValues();
135 getTermService().save((Collection)col);
136 return result;
137 } catch (SQLException e) {
138 logger.error("SQLException:" + e);
139 return null;
140 }
141
142 }
143
144
145 /* (non-Javadoc)
146 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
147 */
148 @Override
149 protected boolean doInvoke(BerlinModelImportState state) {
150 boolean result = true;
151
152 MapWrapper<TaxonBase> taxonMap = (MapWrapper<TaxonBase>)state.getStore(ICdmIO.TAXON_STORE);
153 MapWrapper<ReferenceBase> referenceMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.REFERENCE_STORE);
154 MapWrapper<ReferenceBase> nomRefMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.NOMREF_STORE);
155
156 Set<TaxonBase> taxonStore = new HashSet<TaxonBase>();
157
158 BerlinModelImportConfigurator config = state.getConfig();
159 Source source = config.getSource();
160
161 logger.info("start makeFacts ...");
162
163 MapWrapper<Feature> featureMap = invokeFactCategories(config);
164
165 try {
166 //get data from database
167 String strQuery =
168 " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " +
169 " FROM Fact " +
170 " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
171 " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
172 " WHERE (1=1)" +
173 " ORDER By Sequence";
174 ResultSet rs = source.getResultSet(strQuery) ;
175 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
176
177 int i = 0;
178 //for each fact
179 while (rs.next()){
180 try{
181 if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
182
183 int factId = rs.getInt("factId");
184 Object taxonIdObj = rs.getObject("taxonId");
185 int taxonId = rs.getInt("taxonId");
186 Object factRefFkObj = rs.getObject("factRefFk");
187 int factRefFk = rs.getInt("factRefFk");
188 Object categoryFkObj = rs.getObject("factCategoryFk");
189 Integer categoryFk = rs.getInt("factCategoryFk");
190 String details = rs.getString("Details");
191 String fact = CdmUtils.Nz(rs.getString("Fact"));
192 String notes = CdmUtils.Nz(rs.getString("notes"));
193 Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
194 Boolean publishFlag = rs.getBoolean("publishFlag");
195
196 TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
197 Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
198
199 if (taxonBase == null){
200 logger.warn("Taxon for Fact " + factId + " does not exist in store");
201 result = false;
202 }else{
203 Taxon taxon;
204 if ( taxonBase instanceof Taxon ) {
205 taxon = (Taxon) taxonBase;
206 }else{
207 logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
208 result = false;
209 continue;
210 }
211
212 TaxonDescription taxonDescription = null;
213 Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
214
215 boolean isImage = false;
216 Media media = null;
217 //for diptera images
218 if (categoryFk == 51){ //TODO check also FactCategory string
219 isImage = true;
220 media = Media.NewInstance();
221 taxonDescription = makeImage(state, fact, media, descriptionSet, taxon);
222 if (taxonDescription == null){
223 continue;
224 }
225 }
226 //all others (no image)
227 else{
228 for (TaxonDescription desc: descriptionSet){
229 if (! desc.isImageGallery()){
230 taxonDescription = desc;
231 }
232 }
233 if (taxonDescription == null){
234 taxonDescription = TaxonDescription.NewInstance();
235 taxonDescription.setTitleCache(sourceRef == null ? null:sourceRef.getTitleCache());
236 taxon.addDescription(taxonDescription);
237 }
238 }
239
240 //textData
241 TextData textData = null;
242 boolean newTextData = true;
243
244 // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData
245 // description element append the fact text to the existing TextData
246 if(categoryFk == 31) {
247 Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
248 for (DescriptionElementBase descriptionElement : descriptionElements) {
249 String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
250 if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
251 textData = (TextData)descriptionElement;
252 String factTextStr = textData.getText(Language.DEFAULT());
253 // FIXME: Removing newlines doesn't work
254 if (factTextStr.contains("\\r\\n")) {
255 factTextStr = factTextStr.replaceAll("\\r\\n","");
256 }
257 StringBuilder factText = new StringBuilder(factTextStr);
258 factText.append(fact);
259 fact = factText.toString();
260 newTextData = false;
261 break;
262 }
263 }
264 }
265
266 if(newTextData == true) { textData = TextData.NewInstance(); }
267
268
269
270 //for diptera database
271 if (categoryFk == 99 && notes.contains("<OriginalName>")){
272 notes = notes.replaceAll("<OriginalName>", "");
273 notes = notes.replaceAll("</OriginalName>", "");
274 fact = notes + ": " + fact ;
275 }
276 //TODO textData.putText(fact, bmiConfig.getFactLanguage()); //doesn't work because bmiConfig.getFactLanguage() is not not a persistent Language Object
277 //throws in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
278 if (isImage){
279 textData.addMedia(media);
280 textData.setType(Feature.IMAGE());
281 }else{
282 textData.putText(fact, Language.DEFAULT());
283 textData.setType(feature);
284 }
285
286 //
287 ReferenceBase citation;
288 if (factRefFkObj != null){
289 citation = referenceMap.get(factRefFk);
290 if (citation == null){
291 citation = nomRefMap.get(factRefFk);
292 }
293 if (citation == null && (factRefFk != 0)){
294 logger.warn("Citation not found in referenceMap: " + factRefFk);
295 result = false;
296 }
297 }else{
298 citation = null;
299 }
300
301 if (citation != null || CdmUtils.isNotEmpty(details)){
302 DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
303 originalSource.setCitation(citation);
304 originalSource.setCitationMicroReference(details);
305 textData.addSource(originalSource);
306 }
307 taxonDescription.addElement(textData);
308 //doubtfulFlag
309 if (doubtfulFlag){
310 textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
311 }
312 //publisheFlag
313 textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
314 //Sequence
315 Integer sequence = rs.getInt("Sequence");
316 if (sequence != null && sequence != 999){
317 String strSequence = String.valueOf(sequence);
318 strSequence = SEQUENCE_PREFIX + strSequence;
319 //TODO make it an Extension when possible
320 //Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
321 Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
322 textData.addAnnotation(annotation);
323 }
324
325 // if (categoryFkObj == FACT_DESCRIPTION){
326 // //;
327 // }else if (categoryFkObj == FACT_OBSERVATION){
328 // //;
329 // }else if (categoryFkObj == FACT_DISTRIBUTION_EM){
330 // //
331 // }else {
332 // //TODO
333 // //logger.warn("FactCategory " + categoryFk + " not yet implemented");
334 // }
335
336 //notes
337 doCreatedUpdatedNotes(state, textData, rs, "Fact");
338
339 //TODO
340 //Designation References -> unclear how to map to CDM
341 //factId -> OriginalSource for descriptionElements not yet implemented
342
343 //sequence -> textData is not an identifiable entity therefore extensions are not possible
344 //fact category better
345
346 taxonStore.add(taxon);
347 }
348 } catch (Exception re){
349 logger.error("An exception occurred during the facts import");
350 result = false;
351 }
352 //put
353 }
354 logger.info("Facts handled: " + (i-1));
355 logger.info("Taxa to save: " + taxonStore.size());
356 getTaxonService().save(taxonStore);
357
358 logger.info("end makeFacts ..." + getSuccessString(result));
359 return result;
360 } catch (SQLException e) {
361 logger.error("SQLException:" + e);
362 return false;
363 }
364
365 }
366
367 /**
368 * @param state
369 * @param media
370 * @param media
371 * @param descriptionSet
372 *
373 */
374 private TaxonDescription makeImage(BerlinModelImportState state, String fact, Media media, Set<TaxonDescription> descriptionSet, Taxon taxon) {
375 TaxonDescription taxonDescription = null;
376 ReferenceBase sourceRef = state.getConfig().getSourceReference();
377 String uri = fact;
378 Integer size = null;
379 ImageMetaData imageMetaData = ImageMetaData.newInstance();
380 URL url;
381 try {
382 url = new URL(fact.trim());
383 } catch (MalformedURLException e) {
384 logger.warn("Malformed URL. Image could not be imported: " + CdmUtils.Nz(uri));
385 return null;
386 }
387 try {
388 imageMetaData.readMetaData(url.toURI(), 0);
389 }
390 catch(URISyntaxException e){
391 e.printStackTrace();
392 }
393 MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageMetaData.getMimeType(), null);
394 media.addRepresentation(mediaRepresentation);
395 ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);
396 mediaRepresentation.addRepresentationPart(image);
397
398 taxonDescription = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
399
400 return taxonDescription;
401 }
402
403 private TaxonBase getTaxon(MapWrapper<TaxonBase> taxonMap, Object taxonIdObj, Integer taxonId){
404 if (taxonIdObj != null){
405 return taxonMap.get(taxonId);
406 }else{
407 return null;
408 }
409
410 }
411
412 private Feature getFeature(MapWrapper<Feature> featureMap, Object categoryFkObj, Integer categoryFk){
413 if (categoryFkObj != null){
414 return featureMap.get(categoryFk);
415 }else{
416 return null;
417 }
418
419 }
420
421 private boolean checkDesignationRefsExist(BerlinModelImportConfigurator config){
422 try {
423 boolean result = true;
424 Source source = config.getSource();
425 String strQueryArticlesWithoutJournal = "SELECT Count(*) as n " +
426 " FROM Fact " +
427 " WHERE (NOT (PTDesignationRefFk IS NULL) ) OR " +
428 " (NOT (PTDesignationRefDetailFk IS NULL) )";
429 ResultSet rs = source.getResultSet(strQueryArticlesWithoutJournal);
430 rs.next();
431 int count = rs.getInt("n");
432 if (count > 0){
433 System.out.println("========================================================");
434 logger.warn("There are "+count+" Facts with not empty designation references. Designation references are not imported.");
435
436 System.out.println("========================================================");
437 }
438 return result;
439 } catch (SQLException e) {
440 e.printStackTrace();
441 return false;
442 }
443
444 }
445
446 /* (non-Javadoc)
447 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
448 */
449 protected boolean isIgnore(BerlinModelImportState state){
450 return ! state.getConfig().isDoFacts();
451 }
452
453 }