bugfix BerlinModel export
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelFactsImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import java.net.MalformedURLException;
13 import java.net.URL;
14 import java.sql.ResultSet;
15 import java.sql.SQLException;
16 import java.util.Collection;
17 import java.util.HashSet;
18 import java.util.Set;
19
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.common.MediaMetaData.ImageMetaData;
25 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
26 import eu.etaxonomy.cdm.io.common.ICdmIO;
27 import eu.etaxonomy.cdm.io.common.MapWrapper;
28 import eu.etaxonomy.cdm.io.common.Source;
29 import eu.etaxonomy.cdm.model.common.Annotation;
30 import eu.etaxonomy.cdm.model.common.DescriptionElementSource;
31 import eu.etaxonomy.cdm.model.common.Language;
32 import eu.etaxonomy.cdm.model.common.Marker;
33 import eu.etaxonomy.cdm.model.common.MarkerType;
34 import eu.etaxonomy.cdm.model.common.TermVocabulary;
35 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
36 import eu.etaxonomy.cdm.model.description.Feature;
37 import eu.etaxonomy.cdm.model.description.TaxonDescription;
38 import eu.etaxonomy.cdm.model.description.TextData;
39 import eu.etaxonomy.cdm.model.media.ImageFile;
40 import eu.etaxonomy.cdm.model.media.Media;
41 import eu.etaxonomy.cdm.model.media.MediaRepresentation;
42 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
43 import eu.etaxonomy.cdm.model.taxon.Taxon;
44 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
45 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
46
47 /**
48 * @author a.mueller
49 * @created 20.03.2008
50 * @version 1.0
51 */
52 @Component
53 public class BerlinModelFactsImport extends BerlinModelImportBase {
54 private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
55
56 public static final String SEQUENCE_PREFIX = "ORDER: ";
57
58 private int modCount = 10000;
59
60 public BerlinModelFactsImport(){
61 super();
62 }
63
64 /* (non-Javadoc)
65 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
66 */
67 @Override
68 protected boolean doCheck(BerlinModelImportState state){
69 boolean result = true;
70 BerlinModelImportConfigurator bmiConfig = state.getConfig();
71 logger.warn("Checking for Facts not yet fully implemented");
72 result &= checkDesignationRefsExist(bmiConfig);
73 return result;
74 }
75
76 private TermVocabulary<Feature> getFeatureVocabulary(){
77 try {
78 //TODO work around until service method works
79 TermVocabulary<Feature> featureVocabulary = BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
80 //TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
81 return featureVocabulary;
82 } catch (UnknownCdmTypeException e) {
83 logger.error("Feature vocabulary not available. New vocabulary created");
84 return new TermVocabulary<Feature>() ;
85 }
86 }
87
88 private MapWrapper<Feature> invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
89
90 MapWrapper<Feature> result = bmiConfig.getFeatureMap();
91 Source source = bmiConfig.getSource();
92
93 try {
94 //get data from database
95 String strQuery =
96 " SELECT FactCategory.* " +
97 " FROM FactCategory "+
98 " WHERE (1=1)";
99 ResultSet rs = source.getResultSet(strQuery) ;
100
101
102 TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
103 int i = 0;
104 //for each reference
105 while (rs.next()){
106
107 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
108
109 int factCategoryId = rs.getInt("factCategoryId");
110 String factCategory = rs.getString("factCategory");
111
112
113 Feature feature;
114 try {
115 feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
116 } catch (UnknownCdmTypeException e) {
117 logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
118 feature = Feature.NewInstance(factCategory, factCategory, null);
119 feature.setVocabulary(featureVocabulary);
120 feature.setSupportsTextData(true);
121 //TODO
122 // MaxFactNumber int Checked
123 // ExtensionTableName varchar(100) Checked
124 // Description nvarchar(1000) Checked
125 // locExtensionFormName nvarchar(80) Checked
126 // RankRestrictionFk int Checked
127 }
128
129 // featureMap.put(factCategoryId, feature);
130 result.put(factCategoryId, feature);
131
132 }
133 Collection<Feature> col = result.getAllValues();
134 getTermService().saveTermsAll(col);
135 return result;
136 } catch (SQLException e) {
137 logger.error("SQLException:" + e);
138 return null;
139 }
140
141 }
142
143
144 /* (non-Javadoc)
145 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
146 */
147 @Override
148 protected boolean doInvoke(BerlinModelImportState state) {
149 boolean result = true;
150
151 MapWrapper<TaxonBase> taxonMap = (MapWrapper<TaxonBase>)state.getStore(ICdmIO.TAXON_STORE);
152 MapWrapper<ReferenceBase> referenceMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.REFERENCE_STORE);
153 MapWrapper<ReferenceBase> nomRefMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.NOMREF_STORE);
154
155 Set<TaxonBase> taxonStore = new HashSet<TaxonBase>();
156
157 BerlinModelImportConfigurator config = state.getConfig();
158 Source source = config.getSource();
159
160 logger.info("start makeFacts ...");
161
162 MapWrapper<Feature> featureMap = invokeFactCategories(config);
163
164 try {
165 //get data from database
166 String strQuery =
167 " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " +
168 " FROM Fact " +
169 " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
170 " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
171 " WHERE (1=1)" +
172 " ORDER By Sequence";
173 ResultSet rs = source.getResultSet(strQuery) ;
174 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
175
176 int i = 0;
177 //for each fact
178 while (rs.next()){
179 try{
180 if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
181
182 int factId = rs.getInt("factId");
183 Object taxonIdObj = rs.getObject("taxonId");
184 int taxonId = rs.getInt("taxonId");
185 Object factRefFkObj = rs.getObject("factRefFk");
186 int factRefFk = rs.getInt("factRefFk");
187 Object categoryFkObj = rs.getObject("factCategoryFk");
188 Integer categoryFk = rs.getInt("factCategoryFk");
189 String details = rs.getString("Details");
190 String fact = CdmUtils.Nz(rs.getString("Fact"));
191 String notes = CdmUtils.Nz(rs.getString("notes"));
192 Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
193 Boolean publishFlag = rs.getBoolean("publishFlag");
194
195 TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
196 Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
197
198 if (taxonBase == null){
199 logger.warn("Taxon for Fact " + factId + " does not exist in store");
200 result = false;
201 }else{
202 Taxon taxon;
203 if ( taxonBase instanceof Taxon ) {
204 taxon = (Taxon) taxonBase;
205 }else{
206 logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
207 result = false;
208 continue;
209 }
210
211 TaxonDescription taxonDescription = null;
212 Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
213
214 boolean isImage = false;
215 Media media = null;
216 //for diptera images
217 if (categoryFk == 51){ //TODO check also FactCategory string
218 isImage = true;
219 media = Media.NewInstance();
220 taxonDescription = makeImage(state, fact, media, descriptionSet, taxon);
221 if (taxonDescription == null){
222 continue;
223 }
224 }
225 //all others (no image)
226 else{
227 for (TaxonDescription desc: descriptionSet){
228 if (! desc.isImageGallery()){
229 taxonDescription = desc;
230 }
231 }
232 if (taxonDescription == null){
233 taxonDescription = TaxonDescription.NewInstance();
234 taxonDescription.setTitleCache(sourceRef == null ? null:sourceRef.getTitleCache());
235 taxon.addDescription(taxonDescription);
236 }
237 }
238
239 //textData
240 TextData textData = null;
241 boolean newTextData = true;
242
243 // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData
244 // description element append the fact text to the existing TextData
245 if(categoryFk == 31) {
246 Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
247 for (DescriptionElementBase descriptionElement : descriptionElements) {
248 String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
249 if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
250 textData = (TextData)descriptionElement;
251 String factTextStr = textData.getText(Language.DEFAULT());
252 // FIXME: Removing newlines doesn't work
253 if (factTextStr.contains("\\r\\n")) {
254 factTextStr = factTextStr.replaceAll("\\r\\n","");
255 }
256 StringBuilder factText = new StringBuilder(factTextStr);
257 factText.append(fact);
258 fact = factText.toString();
259 newTextData = false;
260 break;
261 }
262 }
263 }
264
265 if(newTextData == true) { textData = TextData.NewInstance(); }
266
267
268
269 //for diptera database
270 if (categoryFk == 99 && notes.contains("<OriginalName>")){
271 notes = notes.replaceAll("<OriginalName>", "");
272 notes = notes.replaceAll("</OriginalName>", "");
273 fact = notes + ": " + fact ;
274 }
275 //TODO textData.putText(fact, bmiConfig.getFactLanguage()); //doesn't work because bmiConfig.getFactLanguage() is not not a persistent Language Object
276 //throws in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
277 if (isImage){
278 textData.addMedia(media);
279 textData.setType(Feature.IMAGE());
280 }else{
281 textData.putText(fact, Language.DEFAULT());
282 textData.setType(feature);
283 }
284
285 //
286 ReferenceBase citation;
287 if (factRefFkObj != null){
288 citation = referenceMap.get(factRefFk);
289 if (citation == null){
290 citation = nomRefMap.get(factRefFk);
291 }
292 if (citation == null && (factRefFk != 0)){
293 logger.warn("Citation not found in referenceMap: " + factRefFk);
294 result = false;
295 }
296 }else{
297 citation = null;
298 }
299
300 if (citation != null || CdmUtils.isNotEmpty(details)){
301 DescriptionElementSource originalSource = DescriptionElementSource.NewInstance();
302 originalSource.setCitation(citation);
303 originalSource.setCitationMicroReference(details);
304 textData.addSource(originalSource);
305 }
306 taxonDescription.addElement(textData);
307 //doubtfulFlag
308 if (doubtfulFlag){
309 textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
310 }
311 //publisheFlag
312 textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
313 //Sequence
314 Integer sequence = rs.getInt("Sequence");
315 if (sequence != null && sequence != 999){
316 String strSequence = String.valueOf(sequence);
317 strSequence = SEQUENCE_PREFIX + strSequence;
318 //TODO make it an Extension when possible
319 //Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
320 Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
321 textData.addAnnotation(annotation);
322 }
323
324 // if (categoryFkObj == FACT_DESCRIPTION){
325 // //;
326 // }else if (categoryFkObj == FACT_OBSERVATION){
327 // //;
328 // }else if (categoryFkObj == FACT_DISTRIBUTION_EM){
329 // //
330 // }else {
331 // //TODO
332 // //logger.warn("FactCategory " + categoryFk + " not yet implemented");
333 // }
334
335 //notes
336 doCreatedUpdatedNotes(state, textData, rs, "Fact");
337
338 //TODO
339 //Designation References -> unclear how to map to CDM
340 //factId -> OriginalSource for descriptionElements not yet implemented
341
342 //sequence -> textData is not an identifiable entity therefore extensions are not possible
343 //fact category better
344
345 taxonStore.add(taxon);
346 }
347 } catch (Exception re){
348 logger.error("An exception occurred during the facts import");
349 result = false;
350 }
351 //put
352 }
353 logger.info("Facts handled: " + (i-1));
354 logger.info("Taxa to save: " + taxonStore.size());
355 getTaxonService().saveTaxonAll(taxonStore);
356
357 logger.info("end makeFacts ..." + getSuccessString(result));
358 return result;
359 } catch (SQLException e) {
360 logger.error("SQLException:" + e);
361 return false;
362 }
363
364 }
365
366 /**
367 * @param state
368 * @param media
369 * @param media
370 * @param descriptionSet
371 *
372 */
373 private TaxonDescription makeImage(BerlinModelImportState state, String fact, Media media, Set<TaxonDescription> descriptionSet, Taxon taxon) {
374 TaxonDescription taxonDescription = null;
375 ReferenceBase sourceRef = state.getConfig().getSourceReference();
376 String uri = fact;
377 Integer size = null;
378 ImageMetaData imageMetaData = new ImageMetaData();
379 URL url;
380 try {
381 url = new URL(fact.trim());
382 } catch (MalformedURLException e) {
383 logger.warn("Malformed URL. Image could not be imported: " + CdmUtils.Nz(uri));
384 return null;
385 }
386 imageMetaData.readFrom(url);
387 MediaRepresentation mediaRepresentation = MediaRepresentation.NewInstance(imageMetaData.getMimeType(), null);
388 media.addRepresentation(mediaRepresentation);
389 ImageFile image = ImageFile.NewInstance(uri, size, imageMetaData);
390 mediaRepresentation.addRepresentationPart(image);
391
392 taxonDescription = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
393
394 return taxonDescription;
395 }
396
397 private TaxonBase getTaxon(MapWrapper<TaxonBase> taxonMap, Object taxonIdObj, Integer taxonId){
398 if (taxonIdObj != null){
399 return taxonMap.get(taxonId);
400 }else{
401 return null;
402 }
403
404 }
405
406 private Feature getFeature(MapWrapper<Feature> featureMap, Object categoryFkObj, Integer categoryFk){
407 if (categoryFkObj != null){
408 return featureMap.get(categoryFk);
409 }else{
410 return null;
411 }
412
413 }
414
415 private boolean checkDesignationRefsExist(BerlinModelImportConfigurator config){
416 try {
417 boolean result = true;
418 Source source = config.getSource();
419 String strQueryArticlesWithoutJournal = "SELECT Count(*) as n " +
420 " FROM Fact " +
421 " WHERE (NOT (PTDesignationRefFk IS NULL) ) OR " +
422 " (NOT (PTDesignationRefDetailFk IS NULL) )";
423 ResultSet rs = source.getResultSet(strQueryArticlesWithoutJournal);
424 rs.next();
425 int count = rs.getInt("n");
426 if (count > 0){
427 System.out.println("========================================================");
428 logger.warn("There are "+count+" Facts with not empty designation references. Designation references are not imported.");
429
430 System.out.println("========================================================");
431 }
432 return result;
433 } catch (SQLException e) {
434 e.printStackTrace();
435 return false;
436 }
437
438 }
439
440 /* (non-Javadoc)
441 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
442 */
443 protected boolean isIgnore(BerlinModelImportState state){
444 return ! state.getConfig().isDoFacts();
445 }
446
447 }