2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.berlinModel
.in
;
12 import java
.io
.IOException
;
13 import java
.net
.MalformedURLException
;
15 import java
.net
.URISyntaxException
;
17 import java
.sql
.ResultSet
;
18 import java
.sql
.SQLException
;
19 import java
.util
.Collection
;
20 import java
.util
.HashMap
;
21 import java
.util
.HashSet
;
25 import org
.apache
.http
.HttpException
;
26 import org
.apache
.log4j
.Logger
;
27 import org
.springframework
.stereotype
.Component
;
29 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
30 import eu
.etaxonomy
.cdm
.common
.mediaMetaData
.ImageMetaData
;
31 import eu
.etaxonomy
.cdm
.io
.berlinModel
.BerlinModelTransformer
;
32 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.validation
.BerlinModelFactsImportValidator
;
33 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
34 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
35 import eu
.etaxonomy
.cdm
.io
.common
.Source
;
36 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
37 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
38 import eu
.etaxonomy
.cdm
.model
.common
.DescriptionElementSource
;
39 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
40 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
41 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
42 import eu
.etaxonomy
.cdm
.model
.common
.TermVocabulary
;
43 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
44 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
45 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
46 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
47 import eu
.etaxonomy
.cdm
.model
.media
.ImageFile
;
48 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
49 import eu
.etaxonomy
.cdm
.model
.media
.MediaRepresentation
;
50 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
51 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
52 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
53 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
61 public class BerlinModelFactsImport
extends BerlinModelImportBase
{
62 private static final Logger logger
= Logger
.getLogger(BerlinModelFactsImport
.class);
64 public static final String NAMESPACE
= "Fact";
66 public static final String SEQUENCE_PREFIX
= "ORDER: ";
68 private int modCount
= 10000;
69 private static final String pluralString
= "facts";
70 private static final String dbTableName
= "Fact";
72 //FIXME don't use as class variable
73 private Map
<Integer
, Feature
> featureMap
;
75 public BerlinModelFactsImport(){
80 private TermVocabulary
<Feature
> getFeatureVocabulary(){
82 //TODO work around until service method works
83 TermVocabulary
<Feature
> featureVocabulary
= BerlinModelTransformer
.factCategory2Feature(1).getVocabulary();
84 //TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
85 return featureVocabulary
;
86 } catch (UnknownCdmTypeException e
) {
87 logger
.error("Feature vocabulary not available. New vocabulary created");
88 return new TermVocabulary
<Feature
>() ;
92 private Map
<Integer
, Feature
> invokeFactCategories(BerlinModelImportConfigurator bmiConfig
){
94 Map
<Integer
, Feature
> result
= bmiConfig
.getFeatureMap();
95 Source source
= bmiConfig
.getSource();
98 //get data from database
100 " SELECT FactCategory.* " +
101 " FROM FactCategory "+
103 ResultSet rs
= source
.getResultSet(strQuery
) ;
106 TermVocabulary
<Feature
> featureVocabulary
= getFeatureVocabulary();
111 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info("FactCategories handled: " + (i
-1));}
113 int factCategoryId
= rs
.getInt("factCategoryId");
114 String factCategory
= rs
.getString("factCategory");
119 feature
= BerlinModelTransformer
.factCategory2Feature(factCategoryId
);
120 } catch (UnknownCdmTypeException e
) {
121 logger
.warn("New Feature (FactCategoryId: " + factCategoryId
+ ")");
122 feature
= Feature
.NewInstance(factCategory
, factCategory
, null);
123 featureVocabulary
.addTerm(feature
);
124 feature
.setSupportsTextData(true);
126 // MaxFactNumber int Checked
127 // ExtensionTableName varchar(100) Checked
128 // Description nvarchar(1000) Checked
129 // locExtensionFormName nvarchar(80) Checked
130 // RankRestrictionFk int Checked
133 result
.put(factCategoryId
, feature
);
135 Collection
<Feature
> col
= result
.values();
136 getTermService().save((Collection
)col
);
138 } catch (SQLException e
) {
139 logger
.error("SQLException:" + e
);
146 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
149 protected boolean doInvoke(BerlinModelImportState state
) {
150 featureMap
= invokeFactCategories(state
.getConfig());
151 return super.doInvoke(state
);
156 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
159 protected String
getRecordQuery(BerlinModelImportConfigurator config
) {
161 " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " +
163 " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
164 " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
165 " WHERE (FactId IN (" + ID_LIST_TOKEN
+ "))" +
166 " ORDER By Sequence";
172 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
174 public boolean doPartition(ResultSetPartitioner partitioner
, BerlinModelImportState state
) {
175 boolean success
= true ;
176 BerlinModelImportConfigurator config
= state
.getConfig();
177 Set
<TaxonBase
> taxaToSave
= new HashSet
<TaxonBase
>();
178 Map
<String
, TaxonBase
> taxonMap
= (Map
<String
, TaxonBase
>) partitioner
.getObjectMap(BerlinModelTaxonImport
.NAMESPACE
);
179 Map
<String
, Reference
> biblioRefMap
= (Map
<String
, Reference
>) partitioner
.getObjectMap(BerlinModelReferenceImport
.BIBLIO_REFERENCE_NAMESPACE
);
180 Map
<String
, Reference
> nomRefMap
= (Map
<String
, Reference
>) partitioner
.getObjectMap(BerlinModelReferenceImport
.NOM_REFERENCE_NAMESPACE
);
182 ResultSet rs
= partitioner
.getResultSet();
184 Reference
<?
> sourceRef
= state
.getConfig().getSourceReference();
191 if ((i
++ % modCount
) == 0){ logger
.info("Facts handled: " + (i
-1));}
193 int factId
= rs
.getInt("factId");
194 Object taxonIdObj
= rs
.getObject("taxonId");
195 int taxonId
= rs
.getInt("taxonId");
196 Object factRefFkObj
= rs
.getObject("factRefFk");
197 Object categoryFkObj
= rs
.getObject("factCategoryFk");
198 Integer categoryFk
= rs
.getInt("factCategoryFk");
199 String details
= rs
.getString("Details");
200 String fact
= CdmUtils
.Nz(rs
.getString("Fact"));
201 String notes
= CdmUtils
.Nz(rs
.getString("notes"));
202 Boolean doubtfulFlag
= rs
.getBoolean("DoubtfulFlag");
203 Boolean publishFlag
= rs
.getBoolean("publishFlag");
205 TaxonBase taxonBase
= getTaxon(taxonMap
, taxonIdObj
, taxonId
);
206 Feature feature
= getFeature(featureMap
, categoryFkObj
, categoryFk
) ;
208 if (taxonBase
== null){
209 logger
.warn("Taxon for Fact " + factId
+ " does not exist in store");
213 if ( taxonBase
instanceof Taxon
) {
214 taxon
= (Taxon
) taxonBase
;
216 logger
.warn("TaxonBase " + (taxonIdObj
==null?
"(null)":taxonIdObj
) + " for Fact " + factId
+ " was not of type Taxon but: " + taxonBase
.getClass().getSimpleName());
221 TaxonDescription taxonDescription
= null;
222 Set
<TaxonDescription
> descriptionSet
= taxon
.getDescriptions();
224 boolean isImage
= false;
227 if (categoryFk
== 51){ //TODO check also FactCategory string
229 media
= Media
.NewInstance();
230 taxonDescription
= makeImage(state
, fact
, media
, descriptionSet
, taxon
);
231 if (taxonDescription
== null){
235 //all others (no image)
237 for (TaxonDescription desc
: descriptionSet
){
238 if (! desc
.isImageGallery()){
239 taxonDescription
= desc
;
242 if (taxonDescription
== null){
243 taxonDescription
= TaxonDescription
.NewInstance();
244 taxonDescription
.setTitleCache(sourceRef
== null ?
null : sourceRef
.getTitleCache(), true);
245 taxon
.addDescription(taxonDescription
);
250 TextData textData
= null;
251 boolean newTextData
= true;
253 // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData
254 // description element append the fact text to the existing TextData
255 if(categoryFk
== 31) {
256 Set
<DescriptionElementBase
> descriptionElements
= taxonDescription
.getElements();
257 for (DescriptionElementBase descriptionElement
: descriptionElements
) {
258 String featureString
= descriptionElement
.getFeature().getRepresentation(Language
.DEFAULT()).getLabel();
259 if (descriptionElement
instanceof TextData
&& featureString
.equals("Systematics")) { // TODO: test
260 textData
= (TextData
)descriptionElement
;
261 String factTextStr
= textData
.getText(Language
.DEFAULT());
262 // FIXME: Removing newlines doesn't work
263 if (factTextStr
.contains("\\r\\n")) {
264 factTextStr
= factTextStr
.replaceAll("\\r\\n","");
266 StringBuilder factText
= new StringBuilder(factTextStr
);
267 factText
.append(fact
);
268 fact
= factText
.toString();
275 if(newTextData
== true) {
276 textData
= TextData
.NewInstance();
279 //for diptera database
280 if (categoryFk
== 99 && notes
.contains("<OriginalName>")){
281 // notes = notes.replaceAll("<OriginalName>", "");
282 // notes = notes.replaceAll("</OriginalName>", "");
283 fact
= notes
+ ": " + fact
;
285 //TODO textData.putText(fact, bmiConfig.getFactLanguage()); //doesn't work because bmiConfig.getFactLanguage() is not not a persistent Language Object
286 //throws in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
288 textData
.addMedia(media
);
289 textData
.setFeature(Feature
.IMAGE());
291 textData
.putText(fact
, Language
.DEFAULT());
292 textData
.setFeature(feature
);
296 Reference citation
= null;
297 String factRefFk
= String
.valueOf(factRefFkObj
);
298 if (factRefFkObj
!= null){
299 citation
= getReferenceOnlyFromMaps(
300 biblioRefMap
, nomRefMap
, factRefFk
);
302 if (citation
== null && (factRefFkObj
!= null)){
303 logger
.warn("Citation not found in referenceMap: " + factRefFk
);
306 if (citation
!= null || CdmUtils
.isNotEmpty(details
)){
307 DescriptionElementSource originalSource
= DescriptionElementSource
.NewInstance();
308 originalSource
.setCitation(citation
);
309 originalSource
.setCitationMicroReference(details
);
310 textData
.addSource(originalSource
);
312 taxonDescription
.addElement(textData
);
315 textData
.addMarker(Marker
.NewInstance(MarkerType
.IS_DOUBTFUL(), true));
318 textData
.addMarker(Marker
.NewInstance(MarkerType
.PUBLISH(), publishFlag
));
320 Integer sequence
= rs
.getInt("Sequence");
321 if (sequence
!= null && sequence
!= 999){
322 String strSequence
= String
.valueOf(sequence
);
323 strSequence
= SEQUENCE_PREFIX
+ strSequence
;
324 //TODO make it an Extension when possible
325 //Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
326 Annotation annotation
= Annotation
.NewInstance(strSequence
, Language
.DEFAULT());
327 textData
.addAnnotation(annotation
);
330 // if (categoryFkObj == FACT_DESCRIPTION){
332 // }else if (categoryFkObj == FACT_OBSERVATION){
334 // }else if (categoryFkObj == FACT_DISTRIBUTION_EM){
338 // //logger.warn("FactCategory " + categoryFk + " not yet implemented");
342 doCreatedUpdatedNotes(state
, textData
, rs
);
345 //Designation References -> unclear how to map to CDM
346 //factId -> OriginalSource for descriptionElements not yet implemented
348 //sequence -> textData is not an identifiable entity therefore extensions are not possible
349 //fact category better
351 taxaToSave
.add(taxon
);
353 } catch (Exception re
){
354 logger
.error("An exception occurred during the facts import");
355 re
.printStackTrace();
360 logger
.info("Facts handled: " + (i
-1));
361 logger
.info("Taxa to save: " + taxaToSave
.size());
362 getTaxonService().save(taxaToSave
);
363 }catch(SQLException e
){
364 throw new RuntimeException(e
);
370 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
372 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
376 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
379 Set
<String
> taxonIdSet
= new HashSet
<String
>();
380 Set
<String
> referenceIdSet
= new HashSet
<String
>();
381 Set
<String
> refDetailIdSet
= new HashSet
<String
>();
383 handleForeignKey(rs
, taxonIdSet
, "taxonId");
384 handleForeignKey(rs
, referenceIdSet
, "FactRefFk");
385 handleForeignKey(rs
, referenceIdSet
, "PTDesignationRefFk");
386 handleForeignKey(rs
, refDetailIdSet
, "FactRefDetailFk");
387 handleForeignKey(rs
, refDetailIdSet
, "PTDesignationRefDetailFk");
391 nameSpace
= BerlinModelTaxonImport
.NAMESPACE
;
392 cdmClass
= TaxonBase
.class;
394 Map
<String
, TaxonBase
> taxonMap
= (Map
<String
, TaxonBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
395 result
.put(nameSpace
, taxonMap
);
399 nameSpace
= BerlinModelReferenceImport
.NOM_REFERENCE_NAMESPACE
;
400 cdmClass
= Reference
.class;
401 idSet
= referenceIdSet
;
402 Map
<String
, Reference
> nomReferenceMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
403 result
.put(nameSpace
, nomReferenceMap
);
405 //biblio reference map
406 nameSpace
= BerlinModelReferenceImport
.BIBLIO_REFERENCE_NAMESPACE
;
407 cdmClass
= Reference
.class;
408 idSet
= referenceIdSet
;
409 Map
<String
, Reference
> biblioReferenceMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
410 result
.put(nameSpace
, biblioReferenceMap
);
413 nameSpace
= BerlinModelRefDetailImport
.NOM_REFDETAIL_NAMESPACE
;
414 cdmClass
= Reference
.class;
415 idSet
= refDetailIdSet
;
416 Map
<String
, Reference
> nomRefDetailMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
417 result
.put(nameSpace
, nomRefDetailMap
);
419 //biblio refDetail map
420 nameSpace
= BerlinModelRefDetailImport
.BIBLIO_REFDETAIL_NAMESPACE
;
421 cdmClass
= Reference
.class;
422 idSet
= refDetailIdSet
;
423 Map
<String
, Reference
> biblioRefDetailMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
424 result
.put(nameSpace
, biblioRefDetailMap
);
426 } catch (SQLException e
) {
427 throw new RuntimeException(e
);
437 * @param descriptionSet
440 private TaxonDescription
makeImage(BerlinModelImportState state
, String fact
, Media media
, Set
<TaxonDescription
> descriptionSet
, Taxon taxon
) {
441 TaxonDescription taxonDescription
= null;
442 Reference sourceRef
= state
.getConfig().getSourceReference();
444 ImageMetaData imageMetaData
= ImageMetaData
.newInstance();
447 uri
= new URI(fact
.trim());
448 } catch (URISyntaxException e
) {
449 logger
.warn("URISyntaxException. Image could not be imported: " + fact
);
453 imageMetaData
.readMetaData(uri
, 0);
454 } catch (IOException e
) {
455 logger
.error("IOError reading image metadata." , e
);
456 } catch (HttpException e
) {
457 logger
.error("HttpException reading image metadata." , e
);
459 MediaRepresentation mediaRepresentation
= MediaRepresentation
.NewInstance(imageMetaData
.getMimeType(), null);
460 media
.addRepresentation(mediaRepresentation
);
461 ImageFile image
= ImageFile
.NewInstance(uri
, size
, imageMetaData
);
462 mediaRepresentation
.addRepresentationPart(image
);
464 taxonDescription
= taxon
.getOrCreateImageGallery(sourceRef
== null ?
null :sourceRef
.getTitleCache());
466 return taxonDescription
;
469 private TaxonBase
getTaxon(Map
<String
, TaxonBase
> taxonMap
, Object taxonIdObj
, Integer taxonId
){
470 if (taxonIdObj
!= null){
471 return taxonMap
.get(String
.valueOf(taxonId
));
478 private Feature
getFeature(Map
<Integer
, Feature
> featureMap
, Object categoryFkObj
, Integer categoryFk
){
479 if (categoryFkObj
!= null){
480 return featureMap
.get(categoryFk
);
489 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
492 protected boolean doCheck(BerlinModelImportState state
){
493 IOValidator
<BerlinModelImportState
> validator
= new BerlinModelFactsImportValidator();
494 return validator
.validate(state
);
498 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
501 protected String
getTableName() {
506 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
509 public String
getPluralString() {
514 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
516 protected boolean isIgnore(BerlinModelImportState state
){
517 return ! state
.getConfig().isDoFacts();