c18df3040525154ea33283732846ac4a4441ddd2
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelFactsImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import java.net.MalformedURLException;
13 import java.net.URL;
14 import java.sql.ResultSet;
15 import java.sql.SQLException;
16 import java.util.Collection;
17 import java.util.HashSet;
18 import java.util.Set;
19
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.common.MediaMetaData.ImageMetaData;
25 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
26 import eu.etaxonomy.cdm.io.common.ICdmIO;
27 import eu.etaxonomy.cdm.io.common.MapWrapper;
28 import eu.etaxonomy.cdm.io.common.Source;
29 import eu.etaxonomy.cdm.model.common.Annotation;
30 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
31 import eu.etaxonomy.cdm.model.common.Language;
32 import eu.etaxonomy.cdm.model.common.Marker;
33 import eu.etaxonomy.cdm.model.common.MarkerType;
34 import eu.etaxonomy.cdm.model.common.TermVocabulary;
35 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
36 import eu.etaxonomy.cdm.model.description.Feature;
37 import eu.etaxonomy.cdm.model.description.TaxonDescription;
38 import eu.etaxonomy.cdm.model.description.TextData;
39 import eu.etaxonomy.cdm.model.media.ImageFile;
40 import eu.etaxonomy.cdm.model.media.Media;
41 import eu.etaxonomy.cdm.model.media.MediaRepresentation;
42 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
43 import eu.etaxonomy.cdm.model.taxon.Taxon;
44 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
45 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
46
47 /**
48 * @author a.mueller
49 * @created 20.03.2008
50 * @version 1.0
51 */
52 @Component
53 public class BerlinModelFactsImport extends BerlinModelImportBase {
54 private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
55
56 public static final String SEQUENCE_PREFIX = "ORDER: ";
57
58 private int modCount = 10000;
59
60 public BerlinModelFactsImport(){
61 super();
62 }
63
64 /* (non-Javadoc)
65 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
66 */
67 @Override
68 protected boolean doCheck(BerlinModelImportState state){
69 boolean result = true;
70 BerlinModelImportConfigurator bmiConfig = state.getConfig();
71 logger.warn("Checking for Facts not yet fully implemented");
72 result &= checkDesignationRefsExist(bmiConfig);
73 return result;
74 }
75
76 private TermVocabulary<Feature> getFeatureVocabulary(){
77 try {
78 //TODO work around until service method works
79 TermVocabulary<Feature> featureVocabulary = BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
80 //TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
81 return featureVocabulary;
82 } catch (UnknownCdmTypeException e) {
83 logger.error("Feature vocabulary not available. New vocabulary created");
84 return new TermVocabulary<Feature>() ;
85 }
86 }
87
88 private MapWrapper<Feature> invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
89
90 MapWrapper<Feature> result = bmiConfig.getFeatureMap();
91 Source source = bmiConfig.getSource();
92
93 try {
94 //get data from database
95 String strQuery =
96 " SELECT FactCategory.* " +
97 " FROM FactCategory "+
98 " WHERE (1=1)";
99 ResultSet rs = source.getResultSet(strQuery) ;
100
101
102 TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
103 int i = 0;
104 //for each reference
105 while (rs.next()){
106
107 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
108
109 int factCategoryId = rs.getInt("factCategoryId");
110 String factCategory = rs.getString("factCategory");
111
112
113 Feature feature;
114 try {
115 feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
116 } catch (UnknownCdmTypeException e) {
117 logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
118 feature = Feature.NewInstance(factCategory, factCategory, null);
119 feature.setVocabulary(featureVocabulary);
120 feature.setSupportsTextData(true);
121 //TODO
122 // MaxFactNumber int Checked
123 // ExtensionTableName varchar(100) Checked
124 // Description nvarchar(1000) Checked
125 // locExtensionFormName nvarchar(80) Checked
126 // RankRestrictionFk int Checked
127 }
128
129 // featureMap.put(factCategoryId, feature);
130 result.put(factCategoryId, feature);
131
132 }
133 Collection<Feature> col = result.getAllValues();
134 getTermService().saveTermsAll(col);
135 return result;
136 } catch (SQLException e) {
137 logger.error("SQLException:" + e);
138 return null;
139 }
140
141 }
142
143
144 /* (non-Javadoc)
145 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
146 */
147 @Override
148 protected boolean doInvoke(BerlinModelImportState state) {
149 boolean result = true;
150
151 MapWrapper<TaxonBase> taxonMap = (MapWrapper<TaxonBase>)state.getStore(ICdmIO.TAXON_STORE);
152 MapWrapper<ReferenceBase> referenceMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.REFERENCE_STORE);
153 MapWrapper<ReferenceBase> nomRefMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.NOMREF_STORE);
154
155 Set<TaxonBase> taxonStore = new HashSet<TaxonBase>();
156
157 BerlinModelImportConfigurator config = state.getConfig();
158 Source source = config.getSource();
159
160 logger.info("start makeFacts ...");
161
162 MapWrapper<Feature> featureMap = invokeFactCategories(config);
163
164 try {
165 //get data from database
166 String strQuery =
167 " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " +
168 " FROM Fact " +
169 " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
170 " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
171 " WHERE (1=1)" +
172 " ORDER By Sequence";
173 ResultSet rs = source.getResultSet(strQuery) ;
174 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
175
176 int i = 0;
177 //for each fact
178 while (rs.next()){
179 try{
180 if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
181
182 int factId = rs.getInt("factId");
183 Object taxonIdObj = rs.getObject("taxonId");
184 int taxonId = rs.getInt("taxonId");
185 Object factRefFkObj = rs.getObject("factRefFk");
186 int factRefFk = rs.getInt("factRefFk");
187 Object categoryFkObj = rs.getObject("factCategoryFk");
188 Integer categoryFk = rs.getInt("factCategoryFk");
189 String details = rs.getString("Details");
190 String fact = CdmUtils.Nz(rs.getString("Fact"));
191 String notes = CdmUtils.Nz(rs.getString("notes"));
192 Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
193 Boolean publishFlag = rs.getBoolean("publishFlag");
194
195 TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
196 Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
197
198 if (taxonBase == null){
199 logger.warn("Taxon for Fact " + factId + " does not exist in store");
200 result = false;
201 }else{
202 Taxon taxon;
203 if ( taxonBase instanceof Taxon ) {
204 taxon = (Taxon) taxonBase;
205 }else{
206 logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
207 result = false;
208 continue;
209 }
210
211 TaxonDescription taxonDescription = null;
212 Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
213
214 boolean isImage = false;
215 Media media = null;
216 //for diptera images
217 if (categoryFk == 51){ //TODO check also FactCategory string
218 isImage = true;
219 String uri = fact;
220 Integer size = null;
221 ImageMetaData imageMetaData = new ImageMetaData();
222 URL url;
223 try {
224 url = new URL(fact.trim());
225 } catch (MalformedURLException e) {
226 logger.warn("Malformed URL. Image could not be imported: " + CdmUtils.Nz(uri));
227 continue;
228 }
229 imageMetaData.readFrom(url);
230 media = Media.NewInstance();
231 MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageMetaData.getMimeType(), null);
232 media.addRepresentation(mediaRepresentation);
233 ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);
234 mediaRepresentation.addRepresentationPart(image);
235 for (TaxonDescription desc: descriptionSet){
236 if (desc.isImageGallery()){
237 taxonDescription = desc;
238 }
239 }
240 if (taxonDescription == null){
241 taxonDescription = TaxonDescription.NewInstance();
242 taxonDescription.setTitleCache(sourceRef == null ? "Image Galery":sourceRef.getTitleCache()+"-Image Galery");
243 taxon.addDescription(taxonDescription);
244 taxonDescription.setImageGallery(true);
245 }
246 }
247 //all others (no image)
248 else{
249 for (TaxonDescription desc: descriptionSet){
250 if (! desc.isImageGallery()){
251 taxonDescription = desc;
252 }
253 }
254 if (taxonDescription == null){
255 taxonDescription = TaxonDescription.NewInstance();
256 taxonDescription.setTitleCache(sourceRef == null ? null:sourceRef.getTitleCache());
257 taxon.addDescription(taxonDescription);
258 }
259 }
260
261 //textData
262 TextData textData = null;
263 boolean newTextData = true;
264
265 // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData
266 // description element append the fact text to the existing TextData
267 if(categoryFk == 31) {
268 Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
269 for (DescriptionElementBase descriptionElement : descriptionElements) {
270 String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
271 if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
272 textData = (TextData)descriptionElement;
273 String factTextStr = textData.getText(Language.DEFAULT());
274 // FIXME: Removing newlines doesn't work
275 if (factTextStr.contains("\\r\\n")) {
276 factTextStr = factTextStr.replaceAll("\\r\\n","");
277 }
278 StringBuilder factText = new StringBuilder(factTextStr);
279 factText.append(fact);
280 fact = factText.toString();
281 newTextData = false;
282 break;
283 }
284 }
285 }
286
287 if(newTextData == true) { textData = TextData.NewInstance(); }
288
289
290
291 //for diptera database
292 if (categoryFk == 99 && notes.contains("<OriginalName>")){
293 notes = notes.replaceAll("<OriginalName>", "");
294 notes = notes.replaceAll("</OriginalName>", "");
295 fact = notes + ": " + fact ;
296 }
297 //TODO textData.putText(fact, bmiConfig.getFactLanguage()); //doesn't work because bmiConfig.getFactLanguage() is not not a persistent Language Object
298 //throws in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
299 if (isImage){
300 textData.addMedia(media);
301 textData.setType(Feature.IMAGE());
302 }else{
303 textData.putText(fact, Language.DEFAULT());
304 textData.setType(feature);
305 }
306
307 //
308 ReferenceBase citation;
309 if (factRefFkObj != null){
310 citation = referenceMap.get(factRefFk);
311 if (citation == null){
312 citation = nomRefMap.get(factRefFk);
313 }
314 if (citation == null && (factRefFk != 0)){
315 logger.warn("Citation not found in referenceMap: " + factRefFk);
316 result = false;
317 }
318 }else{
319 citation = null;
320 }
321
322 if (citation != null || CdmUtils.isNotEmpty(details)){
323 DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
324 originalSource.setCitation(citation);
325 originalSource.setCitationMicroReference(details);
326 textData.addSource(originalSource);
327 }
328 taxonDescription.addElement(textData);
329 //doubtfulFlag
330 if (doubtfulFlag){
331 textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
332 }
333 //publisheFlag
334 textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
335 //Sequence
336 Integer sequence = rs.getInt("Sequence");
337 if (sequence != null && sequence != 999){
338 String strSequence = String.valueOf(sequence);
339 strSequence = SEQUENCE_PREFIX + strSequence;
340 //TODO make it an Extension when possible
341 //Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
342 Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
343 textData.addAnnotation(annotation);
344 }
345
346 // if (categoryFkObj == FACT_DESCRIPTION){
347 // //;
348 // }else if (categoryFkObj == FACT_OBSERVATION){
349 // //;
350 // }else if (categoryFkObj == FACT_DISTRIBUTION_EM){
351 // //
352 // }else {
353 // //TODO
354 // //logger.warn("FactCategory " + categoryFk + " not yet implemented");
355 // }
356
357 //notes
358 doCreatedUpdatedNotes(state, textData, rs, "Fact");
359
360 //TODO
361 //Designation References -> unclear how to map to CDM
362 //factId -> OriginalSource for descriptionElements not yet implemented
363 //sequence -> textData is not an identifiable entity therefore extensions are not possible
364 //fact category better
365
366 taxonStore.add(taxon);
367 }
368 } catch (Exception re){
369 logger.error("An exception occurred during the facts import");
370 result = false;
371 }
372 //put
373 }
374 logger.info("Facts handled: " + (i-1));
375 logger.info("Taxa to save: " + taxonStore.size());
376 getTaxonService().saveTaxonAll(taxonStore);
377
378 logger.info("end makeFacts ..." + getSuccessString(result));
379 return result;
380 } catch (SQLException e) {
381 logger.error("SQLException:" + e);
382 return false;
383 }
384
385 }
386
387 private TaxonBase getTaxon(MapWrapper<TaxonBase> taxonMap, Object taxonIdObj, Integer taxonId){
388 if (taxonIdObj != null){
389 return taxonMap.get(taxonId);
390 }else{
391 return null;
392 }
393
394 }
395
396 private Feature getFeature(MapWrapper<Feature> featureMap, Object categoryFkObj, Integer categoryFk){
397 if (categoryFkObj != null){
398 return featureMap.get(categoryFk);
399 }else{
400 return null;
401 }
402
403 }
404
405 private boolean checkDesignationRefsExist(BerlinModelImportConfigurator config){
406 try {
407 boolean result = true;
408 Source source = config.getSource();
409 String strQueryArticlesWithoutJournal = "SELECT Count(*) as n " +
410 " FROM Fact " +
411 " WHERE (NOT (PTDesignationRefFk IS NULL) ) OR " +
412 " (NOT (PTDesignationRefDetailFk IS NULL) )";
413 ResultSet rs = source.getResultSet(strQueryArticlesWithoutJournal);
414 rs.next();
415 int count = rs.getInt("n");
416 if (count > 0){
417 System.out.println("========================================================");
418 logger.warn("There are "+count+" Facts with not empty designation references. Designation references are not imported.");
419
420 System.out.println("========================================================");
421 }
422 return result;
423 } catch (SQLException e) {
424 e.printStackTrace();
425 return false;
426 }
427
428 }
429
430 /* (non-Javadoc)
431 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
432 */
433 protected boolean isIgnore(BerlinModelImportState state){
434 return ! state.getConfig().isDoFacts();
435 }
436
437 }