title for descriptions in BerlinModel import
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelFactsImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.Collection;
15 import java.util.HashSet;
16 import java.util.Set;
17
18 import org.apache.log4j.Logger;
19 import org.springframework.stereotype.Component;
20
21 import eu.etaxonomy.cdm.common.CdmUtils;
22 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
23 import eu.etaxonomy.cdm.io.common.ICdmIO;
24 import eu.etaxonomy.cdm.io.common.MapWrapper;
25 import eu.etaxonomy.cdm.io.common.Source;
26 import eu.etaxonomy.cdm.model.common.Annotation;
27 import eu.etaxonomy.cdm.model.common.Language;
28 import eu.etaxonomy.cdm.model.common.Marker;
29 import eu.etaxonomy.cdm.model.common.MarkerType;
30 import eu.etaxonomy.cdm.model.common.TermVocabulary;
31 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
32 import eu.etaxonomy.cdm.model.description.Feature;
33 import eu.etaxonomy.cdm.model.description.TaxonDescription;
34 import eu.etaxonomy.cdm.model.description.TextData;
35 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
36 import eu.etaxonomy.cdm.model.taxon.Taxon;
37 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
38 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
39
40 /**
41 * @author a.mueller
42 * @created 20.03.2008
43 * @version 1.0
44 */
45 @Component
46 public class BerlinModelFactsImport extends BerlinModelImportBase {
47 private static final Logger logger = Logger.getLogger(BerlinModelFactsImport.class);
48
49 public static final String SEQUENCE_PREFIX = "ORDER: ";
50
51 private int modCount = 10000;
52
53 public BerlinModelFactsImport(){
54 super();
55 }
56
57 /* (non-Javadoc)
58 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
59 */
60 @Override
61 protected boolean doCheck(BerlinModelImportState state){
62 boolean result = true;
63 BerlinModelImportConfigurator bmiConfig = state.getConfig();
64 logger.warn("Checking for Facts not yet fully implemented");
65 result &= checkDesignationRefsExist(bmiConfig);
66 return result;
67 }
68
69 private TermVocabulary<Feature> getFeatureVocabulary(){
70 try {
71 //TODO work around until service method works
72 TermVocabulary<Feature> featureVocabulary = BerlinModelTransformer.factCategory2Feature(1).getVocabulary();
73 //TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
74 return featureVocabulary;
75 } catch (UnknownCdmTypeException e) {
76 logger.error("Feature vocabulary not available. New vocabulary created");
77 return new TermVocabulary<Feature>() ;
78 }
79 }
80
81 private MapWrapper<Feature> invokeFactCategories(BerlinModelImportConfigurator bmiConfig){
82
83 MapWrapper<Feature> result = bmiConfig.getFeatureMap();
84 Source source = bmiConfig.getSource();
85
86 try {
87 //get data from database
88 String strQuery =
89 " SELECT FactCategory.* " +
90 " FROM FactCategory "+
91 " WHERE (1=1)";
92 ResultSet rs = source.getResultSet(strQuery) ;
93
94
95 TermVocabulary<Feature> featureVocabulary = getFeatureVocabulary();
96 int i = 0;
97 //for each reference
98 while (rs.next()){
99
100 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("FactCategories handled: " + (i-1));}
101
102 int factCategoryId = rs.getInt("factCategoryId");
103 String factCategory = rs.getString("factCategory");
104
105
106 Feature feature;
107 try {
108 feature = BerlinModelTransformer.factCategory2Feature(factCategoryId);
109 } catch (UnknownCdmTypeException e) {
110 logger.warn("New Feature (FactCategoryId: " + factCategoryId + ")");
111 feature = Feature.NewInstance(factCategory, factCategory, null);
112 feature.setVocabulary(featureVocabulary);
113 feature.setSupportsTextData(true);
114 //TODO
115 // MaxFactNumber int Checked
116 // ExtensionTableName varchar(100) Checked
117 // Description nvarchar(1000) Checked
118 // locExtensionFormName nvarchar(80) Checked
119 // RankRestrictionFk int Checked
120 }
121
122 // featureMap.put(factCategoryId, feature);
123 result.put(factCategoryId, feature);
124
125 }
126 Collection<Feature> col = result.getAllValues();
127 getTermService().saveTermsAll(col);
128 return result;
129 } catch (SQLException e) {
130 logger.error("SQLException:" + e);
131 return null;
132 }
133
134 }
135
136
137 /* (non-Javadoc)
138 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
139 */
140 @Override
141 protected boolean doInvoke(BerlinModelImportState state){
142 boolean result = true;
143
144 MapWrapper<TaxonBase> taxonMap = (MapWrapper<TaxonBase>)state.getStore(ICdmIO.TAXON_STORE);
145 MapWrapper<ReferenceBase> referenceMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.REFERENCE_STORE);
146 MapWrapper<ReferenceBase> nomRefMap = (MapWrapper<ReferenceBase>)state.getStore(ICdmIO.NOMREF_STORE);
147
148 Set<TaxonBase> taxonStore = new HashSet<TaxonBase>();
149
150 BerlinModelImportConfigurator config = state.getConfig();
151 Source source = config.getSource();
152
153 logger.info("start makeFacts ...");
154
155 MapWrapper<Feature> featureMap = invokeFactCategories(config);
156
157 try {
158 //get data from database
159 String strQuery =
160 " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " +
161 " FROM Fact " +
162 " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
163 " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
164 " WHERE (1=1)" +
165 " ORDER By Sequence";
166 ResultSet rs = source.getResultSet(strQuery) ;
167 ReferenceBase<?> sourceRef = state.getConfig().getSourceReference();
168
169 int i = 0;
170 //for each fact
171 while (rs.next()){
172 try{
173 if ((i++ % modCount) == 0){ logger.info("Facts handled: " + (i-1));}
174
175 int factId = rs.getInt("factId");
176 Object taxonIdObj = rs.getObject("taxonId");
177 int taxonId = rs.getInt("taxonId");
178 Object factRefFkObj = rs.getObject("factRefFk");
179 int factRefFk = rs.getInt("factRefFk");
180 Object categoryFkObj = rs.getObject("factCategoryFk");
181 Integer categoryFk = rs.getInt("factCategoryFk");
182 String details = rs.getString("Details");
183 String fact = CdmUtils.Nz(rs.getString("Fact"));
184 String notes = CdmUtils.Nz(rs.getString("notes"));
185 Boolean doubtfulFlag = rs.getBoolean("DoubtfulFlag");
186 Boolean publishFlag = rs.getBoolean("publishFlag");
187
188 TaxonBase taxonBase = getTaxon(taxonMap, taxonIdObj, taxonId);
189 Feature feature = getFeature(featureMap, categoryFkObj, categoryFk) ;
190
191 if (taxonBase == null){
192 logger.warn("Taxon for Fact " + factId + " does not exist in store");
193 result = false;
194 }else{
195 Taxon taxon;
196 if ( taxonBase instanceof Taxon ) {
197 taxon = (Taxon) taxonBase;
198 }else{
199 logger.warn("TaxonBase " + (taxonIdObj==null?"(null)":taxonIdObj) + " for Fact " + factId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
200 result = false;
201 continue;
202 }
203
204 TaxonDescription taxonDescription;
205 Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
206 if (descriptionSet.size() > 0) {
207 taxonDescription = descriptionSet.iterator().next();
208 }else{
209 taxonDescription = TaxonDescription.NewInstance();
210 taxonDescription.setTitleCache(sourceRef == null ? null:sourceRef.getTitleCache());
211 taxon.addDescription(taxonDescription);
212 }
213
214
215 //textData
216 TextData textData = null;
217 boolean newTextData = true;
218
219 // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData
220 // description element append the fact text to the existing TextData
221 if(categoryFk == 31) {
222 Set<DescriptionElementBase> descriptionElements = taxonDescription.getElements();
223 for (DescriptionElementBase descriptionElement : descriptionElements) {
224 String featureString = descriptionElement.getFeature().getRepresentation(Language.DEFAULT()).getLabel();
225 if (descriptionElement instanceof TextData && featureString.equals("Systematics")) { // TODO: test
226 textData = (TextData)descriptionElement;
227 String factTextStr = textData.getText(Language.DEFAULT());
228 // FIXME: Removing newlines doesn't work
229 if (factTextStr.contains("\\r\\n")) {
230 factTextStr = factTextStr.replaceAll("\\r\\n","");
231 }
232 StringBuilder factText = new StringBuilder(factTextStr);
233 factText.append(fact);
234 fact = factText.toString();
235 newTextData = false;
236 break;
237 }
238 }
239 }
240
241 if(newTextData == true) { textData = TextData.NewInstance(); }
242
243
244 //for diptera database
245 if (categoryFk == 99 && notes.contains("<OriginalName>")){
246 notes = notes.replaceAll("<OriginalName>", "");
247 notes = notes.replaceAll("</OriginalName>", "");
248 fact = notes + ": " + fact ;
249 }
250 //TODO textData.putText(fact, bmiConfig.getFactLanguage()); //doesn't work because bmiConfig.getFactLanguage() is not not a persistent Language Object
251 //throws in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
252 textData.putText(fact, Language.DEFAULT());
253 textData.setType(feature);
254
255 //
256 ReferenceBase citation;
257 if (factRefFkObj != null){
258 citation = referenceMap.get(factRefFk);
259 if (citation == null){
260 citation = nomRefMap.get(factRefFk);
261 }
262 if (citation == null && (factRefFk != 0)){
263 logger.warn("Citation not found in referenceMap: " + factRefFk);
264 result = false;
265 }
266 }else{
267 citation = null;
268 }
269
270
271 textData.setCitation(citation);
272 textData.setCitationMicroReference(details);
273 taxonDescription.addElement(textData);
274 //doubtfulFlag
275 if (doubtfulFlag){
276 textData.addMarker(Marker.NewInstance(MarkerType.IS_DOUBTFUL(), true));
277 }
278 //publisheFlag
279 textData.addMarker(Marker.NewInstance(MarkerType.PUBLISH(), publishFlag));
280 //Sequence
281 Integer sequence = rs.getInt("Sequence");
282 if (sequence != null && sequence != 999){
283 String strSequence = String.valueOf(sequence);
284 strSequence = SEQUENCE_PREFIX + strSequence;
285 //TODO make it an Extension when possible
286 //Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
287 Annotation annotation = Annotation.NewInstance(strSequence, Language.DEFAULT());
288 textData.addAnnotation(annotation);
289 }
290
291 // if (categoryFkObj == FACT_DESCRIPTION){
292 // //;
293 // }else if (categoryFkObj == FACT_OBSERVATION){
294 // //;
295 // }else if (categoryFkObj == FACT_DISTRIBUTION_EM){
296 // //
297 // }else {
298 // //TODO
299 // //logger.warn("FactCategory " + categoryFk + " not yet implemented");
300 // }
301
302 //notes
303 doCreatedUpdatedNotes(state, textData, rs, "Fact");
304
305 //TODO
306 //Designation References -> unclear how to map to CDM
307 //factId -> OriginalSource for descriptionElements not yet implemented
308 //sequence -> textData is not an identifiable entity therefore extensions are not possible
309 //fact category better
310
311 taxonStore.add(taxon);
312 }
313 } catch (RuntimeException re){
314 logger.error("A runtime exception occurred during the facts import");
315 result = false;
316 throw re;
317 }
318 //put
319 }
320 logger.info("Facts handled: " + (i-1));
321 logger.info("Taxa to save: " + taxonStore.size());
322 getTaxonService().saveTaxonAll(taxonStore);
323
324 logger.info("end makeFacts ..." + getSuccessString(result));
325 return result;
326 } catch (SQLException e) {
327 logger.error("SQLException:" + e);
328 return false;
329 }
330
331 }
332
333 private TaxonBase getTaxon(MapWrapper<TaxonBase> taxonMap, Object taxonIdObj, Integer taxonId){
334 if (taxonIdObj != null){
335 return taxonMap.get(taxonId);
336 }else{
337 return null;
338 }
339
340 }
341
342 private Feature getFeature(MapWrapper<Feature> featureMap, Object categoryFkObj, Integer categoryFk){
343 if (categoryFkObj != null){
344 return featureMap.get(categoryFk);
345 }else{
346 return null;
347 }
348
349 }
350
351 private boolean checkDesignationRefsExist(BerlinModelImportConfigurator config){
352 try {
353 boolean result = true;
354 Source source = config.getSource();
355 String strQueryArticlesWithoutJournal = "SELECT Count(*) as n " +
356 " FROM Fact " +
357 " WHERE (NOT (PTDesignationRefFk IS NULL) ) OR " +
358 " (NOT (PTDesignationRefDetailFk IS NULL) )";
359 ResultSet rs = source.getResultSet(strQueryArticlesWithoutJournal);
360 rs.next();
361 int count = rs.getInt("n");
362 if (count > 0){
363 System.out.println("========================================================");
364 logger.warn("There are "+count+" Facts with not empty designation references. Designation references are not imported.");
365
366 System.out.println("========================================================");
367 }
368 return result;
369 } catch (SQLException e) {
370 e.printStackTrace();
371 return false;
372 }
373
374 }
375
376 /* (non-Javadoc)
377 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
378 */
379 protected boolean isIgnore(BerlinModelImportState state){
380 return ! state.getConfig().isDoFacts();
381 }
382
383 }