2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.berlinModel
.in
;
12 import java
.net
.MalformedURLException
;
13 import java
.net
.URISyntaxException
;
15 import java
.sql
.ResultSet
;
16 import java
.sql
.SQLException
;
17 import java
.util
.Collection
;
18 import java
.util
.HashMap
;
19 import java
.util
.HashSet
;
23 import org
.apache
.log4j
.Logger
;
24 import org
.springframework
.stereotype
.Component
;
26 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
27 import eu
.etaxonomy
.cdm
.common
.mediaMetaData
.ImageMetaData
;
28 import eu
.etaxonomy
.cdm
.io
.berlinModel
.BerlinModelTransformer
;
29 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.validation
.BerlinModelFactsImportValidator
;
30 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
31 import eu
.etaxonomy
.cdm
.io
.common
.MapWrapper
;
32 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
33 import eu
.etaxonomy
.cdm
.io
.common
.Source
;
34 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
35 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
36 import eu
.etaxonomy
.cdm
.model
.common
.DescriptionElementSource
;
37 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
38 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
39 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
40 import eu
.etaxonomy
.cdm
.model
.common
.TermVocabulary
;
41 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
42 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
43 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
44 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
45 import eu
.etaxonomy
.cdm
.model
.media
.ImageFile
;
46 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
47 import eu
.etaxonomy
.cdm
.model
.media
.MediaRepresentation
;
48 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceBase
;
49 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
50 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
51 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
59 public class BerlinModelFactsImport
extends BerlinModelImportBase
{
60 private static final Logger logger
= Logger
.getLogger(BerlinModelFactsImport
.class);
62 public static final String NAMESPACE
= "Fact";
64 public static final String SEQUENCE_PREFIX
= "ORDER: ";
66 private int modCount
= 10000;
67 private static final String pluralString
= "facts";
68 private static final String dbTableName
= "Fact";
70 //FIXME don't use as class variable
71 private MapWrapper
<Feature
> featureMap
;
73 public BerlinModelFactsImport(){
78 private TermVocabulary
<Feature
> getFeatureVocabulary(){
80 //TODO work around until service method works
81 TermVocabulary
<Feature
> featureVocabulary
= BerlinModelTransformer
.factCategory2Feature(1).getVocabulary();
82 //TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
83 return featureVocabulary
;
84 } catch (UnknownCdmTypeException e
) {
85 logger
.error("Feature vocabulary not available. New vocabulary created");
86 return new TermVocabulary
<Feature
>() ;
90 private MapWrapper
<Feature
> invokeFactCategories(BerlinModelImportConfigurator bmiConfig
){
92 MapWrapper
<Feature
> result
= bmiConfig
.getFeatureMap();
93 Source source
= bmiConfig
.getSource();
96 //get data from database
98 " SELECT FactCategory.* " +
99 " FROM FactCategory "+
101 ResultSet rs
= source
.getResultSet(strQuery
) ;
104 TermVocabulary
<Feature
> featureVocabulary
= getFeatureVocabulary();
109 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info("FactCategories handled: " + (i
-1));}
111 int factCategoryId
= rs
.getInt("factCategoryId");
112 String factCategory
= rs
.getString("factCategory");
117 feature
= BerlinModelTransformer
.factCategory2Feature(factCategoryId
);
118 } catch (UnknownCdmTypeException e
) {
119 logger
.warn("New Feature (FactCategoryId: " + factCategoryId
+ ")");
120 feature
= Feature
.NewInstance(factCategory
, factCategory
, null);
121 feature
.setVocabulary(featureVocabulary
);
122 feature
.setSupportsTextData(true);
124 // MaxFactNumber int Checked
125 // ExtensionTableName varchar(100) Checked
126 // Description nvarchar(1000) Checked
127 // locExtensionFormName nvarchar(80) Checked
128 // RankRestrictionFk int Checked
131 result
.put(factCategoryId
, feature
);
133 Collection
<Feature
> col
= result
.getAllValues();
134 getTermService().save((Collection
)col
);
136 } catch (SQLException e
) {
137 logger
.error("SQLException:" + e
);
144 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
147 protected boolean doInvoke(BerlinModelImportState state
) {
148 featureMap
= invokeFactCategories(state
.getConfig());
149 return super.doInvoke(state
);
154 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
157 protected String
getRecordQuery(BerlinModelImportConfigurator config
) {
159 " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " +
161 " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
162 " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
163 " WHERE (FactId IN (" + ID_LIST_TOKEN
+ "))" +
164 " ORDER By Sequence";
170 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
172 public boolean doPartition(ResultSetPartitioner partitioner
, BerlinModelImportState state
) {
173 boolean success
= true ;
174 BerlinModelImportConfigurator config
= state
.getConfig();
175 Set
<TaxonBase
> taxaToSave
= new HashSet
<TaxonBase
>();
176 Map
<String
, TaxonBase
> taxonMap
= (Map
<String
, TaxonBase
>) partitioner
.getObjectMap(BerlinModelTaxonImport
.NAMESPACE
);
177 Map
<String
, ReferenceBase
> biblioRefMap
= (Map
<String
, ReferenceBase
>) partitioner
.getObjectMap(BerlinModelReferenceImport
.BIBLIO_REFERENCE_NAMESPACE
);
178 Map
<String
, ReferenceBase
> nomRefMap
= (Map
<String
, ReferenceBase
>) partitioner
.getObjectMap(BerlinModelReferenceImport
.NOM_REFERENCE_NAMESPACE
);
180 ResultSet rs
= partitioner
.getResultSet();
182 ReferenceBase
<?
> sourceRef
= state
.getConfig().getSourceReference();
189 if ((i
++ % modCount
) == 0){ logger
.info("Facts handled: " + (i
-1));}
191 int factId
= rs
.getInt("factId");
192 Object taxonIdObj
= rs
.getObject("taxonId");
193 int taxonId
= rs
.getInt("taxonId");
194 Object factRefFkObj
= rs
.getObject("factRefFk");
195 Object categoryFkObj
= rs
.getObject("factCategoryFk");
196 Integer categoryFk
= rs
.getInt("factCategoryFk");
197 String details
= rs
.getString("Details");
198 String fact
= CdmUtils
.Nz(rs
.getString("Fact"));
199 String notes
= CdmUtils
.Nz(rs
.getString("notes"));
200 Boolean doubtfulFlag
= rs
.getBoolean("DoubtfulFlag");
201 Boolean publishFlag
= rs
.getBoolean("publishFlag");
203 TaxonBase taxonBase
= getTaxon(taxonMap
, taxonIdObj
, taxonId
);
204 Feature feature
= getFeature(featureMap
, categoryFkObj
, categoryFk
) ;
206 if (taxonBase
== null){
207 logger
.warn("Taxon for Fact " + factId
+ " does not exist in store");
211 if ( taxonBase
instanceof Taxon
) {
212 taxon
= (Taxon
) taxonBase
;
214 logger
.warn("TaxonBase " + (taxonIdObj
==null?
"(null)":taxonIdObj
) + " for Fact " + factId
+ " was not of type Taxon but: " + taxonBase
.getClass().getSimpleName());
219 TaxonDescription taxonDescription
= null;
220 Set
<TaxonDescription
> descriptionSet
= taxon
.getDescriptions();
222 boolean isImage
= false;
225 if (categoryFk
== 51){ //TODO check also FactCategory string
227 media
= Media
.NewInstance();
228 taxonDescription
= makeImage(state
, fact
, media
, descriptionSet
, taxon
);
229 if (taxonDescription
== null){
233 //all others (no image)
235 for (TaxonDescription desc
: descriptionSet
){
236 if (! desc
.isImageGallery()){
237 taxonDescription
= desc
;
240 if (taxonDescription
== null){
241 taxonDescription
= TaxonDescription
.NewInstance();
242 taxonDescription
.setTitleCache(sourceRef
== null ?
null : sourceRef
.getTitleCache());
243 taxon
.addDescription(taxonDescription
);
248 TextData textData
= null;
249 boolean newTextData
= true;
251 // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData
252 // description element append the fact text to the existing TextData
253 if(categoryFk
== 31) {
254 Set
<DescriptionElementBase
> descriptionElements
= taxonDescription
.getElements();
255 for (DescriptionElementBase descriptionElement
: descriptionElements
) {
256 String featureString
= descriptionElement
.getFeature().getRepresentation(Language
.DEFAULT()).getLabel();
257 if (descriptionElement
instanceof TextData
&& featureString
.equals("Systematics")) { // TODO: test
258 textData
= (TextData
)descriptionElement
;
259 String factTextStr
= textData
.getText(Language
.DEFAULT());
260 // FIXME: Removing newlines doesn't work
261 if (factTextStr
.contains("\\r\\n")) {
262 factTextStr
= factTextStr
.replaceAll("\\r\\n","");
264 StringBuilder factText
= new StringBuilder(factTextStr
);
265 factText
.append(fact
);
266 fact
= factText
.toString();
273 if(newTextData
== true) {
274 textData
= TextData
.NewInstance();
277 //for diptera database
278 if (categoryFk
== 99 && notes
.contains("<OriginalName>")){
279 notes
= notes
.replaceAll("<OriginalName>", "");
280 notes
= notes
.replaceAll("</OriginalName>", "");
281 fact
= notes
+ ": " + fact
;
283 //TODO textData.putText(fact, bmiConfig.getFactLanguage()); //doesn't work because bmiConfig.getFactLanguage() is not not a persistent Language Object
284 //throws in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
286 textData
.addMedia(media
);
287 textData
.setType(Feature
.IMAGE());
289 textData
.putText(fact
, Language
.DEFAULT());
290 textData
.setType(feature
);
294 ReferenceBase citation
= null;
295 String factRefFk
= String
.valueOf(factRefFkObj
);
296 if (factRefFkObj
!= null){
297 citation
= getReferenceOnlyFromMaps(
298 biblioRefMap
, nomRefMap
, factRefFk
);
300 if (citation
== null && (factRefFkObj
!= null)){
301 logger
.warn("Citation not found in referenceMap: " + factRefFk
);
304 if (citation
!= null || CdmUtils
.isNotEmpty(details
)){
305 DescriptionElementSource originalSource
= DescriptionElementSource
.NewInstance();
306 originalSource
.setCitation(citation
);
307 originalSource
.setCitationMicroReference(details
);
308 textData
.addSource(originalSource
);
310 taxonDescription
.addElement(textData
);
313 textData
.addMarker(Marker
.NewInstance(MarkerType
.IS_DOUBTFUL(), true));
316 textData
.addMarker(Marker
.NewInstance(MarkerType
.PUBLISH(), publishFlag
));
318 Integer sequence
= rs
.getInt("Sequence");
319 if (sequence
!= null && sequence
!= 999){
320 String strSequence
= String
.valueOf(sequence
);
321 strSequence
= SEQUENCE_PREFIX
+ strSequence
;
322 //TODO make it an Extension when possible
323 //Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
324 Annotation annotation
= Annotation
.NewInstance(strSequence
, Language
.DEFAULT());
325 textData
.addAnnotation(annotation
);
328 // if (categoryFkObj == FACT_DESCRIPTION){
330 // }else if (categoryFkObj == FACT_OBSERVATION){
332 // }else if (categoryFkObj == FACT_DISTRIBUTION_EM){
336 // //logger.warn("FactCategory " + categoryFk + " not yet implemented");
340 doCreatedUpdatedNotes(state
, textData
, rs
);
343 //Designation References -> unclear how to map to CDM
344 //factId -> OriginalSource for descriptionElements not yet implemented
346 //sequence -> textData is not an identifiable entity therefore extensions are not possible
347 //fact category better
349 taxaToSave
.add(taxon
);
351 } catch (Exception re
){
352 logger
.error("An exception occurred during the facts import");
353 re
.printStackTrace();
358 logger
.info("Facts handled: " + (i
-1));
359 logger
.info("Taxa to save: " + taxaToSave
.size());
360 getTaxonService().save(taxaToSave
);
361 }catch(SQLException e
){
362 throw new RuntimeException(e
);
368 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
370 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
374 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
377 Set
<String
> taxonIdSet
= new HashSet
<String
>();
378 Set
<String
> referenceIdSet
= new HashSet
<String
>();
379 Set
<String
> refDetailIdSet
= new HashSet
<String
>();
381 handleForeignKey(rs
, taxonIdSet
, "taxonId");
382 handleForeignKey(rs
, referenceIdSet
, "FactRefFk");
383 handleForeignKey(rs
, referenceIdSet
, "PTDesignationRefFk");
384 handleForeignKey(rs
, refDetailIdSet
, "FactRefDetailFk");
385 handleForeignKey(rs
, refDetailIdSet
, "PTDesignationRefDetailFk");
389 nameSpace
= BerlinModelTaxonImport
.NAMESPACE
;
390 cdmClass
= TaxonBase
.class;
392 Map
<String
, TaxonBase
> taxonMap
= (Map
<String
, TaxonBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
393 result
.put(nameSpace
, taxonMap
);
397 nameSpace
= BerlinModelReferenceImport
.NOM_REFERENCE_NAMESPACE
;
398 cdmClass
= ReferenceBase
.class;
399 idSet
= referenceIdSet
;
400 Map
<String
, ReferenceBase
> nomReferenceMap
= (Map
<String
, ReferenceBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
401 result
.put(nameSpace
, nomReferenceMap
);
403 //biblio reference map
404 nameSpace
= BerlinModelReferenceImport
.BIBLIO_REFERENCE_NAMESPACE
;
405 cdmClass
= ReferenceBase
.class;
406 idSet
= referenceIdSet
;
407 Map
<String
, ReferenceBase
> biblioReferenceMap
= (Map
<String
, ReferenceBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
408 result
.put(nameSpace
, biblioReferenceMap
);
411 nameSpace
= BerlinModelRefDetailImport
.NOM_REFDETAIL_NAMESPACE
;
412 cdmClass
= ReferenceBase
.class;
413 idSet
= refDetailIdSet
;
414 Map
<String
, ReferenceBase
> nomRefDetailMap
= (Map
<String
, ReferenceBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
415 result
.put(nameSpace
, nomRefDetailMap
);
417 //biblio refDetail map
418 nameSpace
= BerlinModelRefDetailImport
.BIBLIO_REFDETAIL_NAMESPACE
;
419 cdmClass
= ReferenceBase
.class;
420 idSet
= refDetailIdSet
;
421 Map
<String
, ReferenceBase
> biblioRefDetailMap
= (Map
<String
, ReferenceBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
422 result
.put(nameSpace
, biblioRefDetailMap
);
424 } catch (SQLException e
) {
425 throw new RuntimeException(e
);
435 * @param descriptionSet
438 private TaxonDescription
makeImage(BerlinModelImportState state
, String fact
, Media media
, Set
<TaxonDescription
> descriptionSet
, Taxon taxon
) {
439 TaxonDescription taxonDescription
= null;
440 ReferenceBase sourceRef
= state
.getConfig().getSourceReference();
443 ImageMetaData imageMetaData
= ImageMetaData
.newInstance();
446 url
= new URL(fact
.trim());
447 } catch (MalformedURLException e
) {
448 logger
.warn("Malformed URL. Image could not be imported: " + CdmUtils
.Nz(uri
));
452 imageMetaData
.readMetaData(url
.toURI(), 0);
454 catch(URISyntaxException e
){
457 MediaRepresentation mediaRepresentation
= MediaRepresentation
.NewInstance(imageMetaData
.getMimeType(), null);
458 media
.addRepresentation(mediaRepresentation
);
459 ImageFile image
= ImageFile
.NewInstance(uri
, size
, imageMetaData
);
460 mediaRepresentation
.addRepresentationPart(image
);
462 taxonDescription
= taxon
.getOrCreateImageGallery(sourceRef
== null ?
null :sourceRef
.getTitleCache());
464 return taxonDescription
;
467 private TaxonBase
getTaxon(Map
<String
, TaxonBase
> taxonMap
, Object taxonIdObj
, Integer taxonId
){
468 if (taxonIdObj
!= null){
469 return taxonMap
.get(String
.valueOf(taxonId
));
476 private Feature
getFeature(MapWrapper
<Feature
> featureMap
, Object categoryFkObj
, Integer categoryFk
){
477 if (categoryFkObj
!= null){
478 return featureMap
.get(categoryFk
);
487 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
490 protected boolean doCheck(BerlinModelImportState state
){
491 IOValidator
<BerlinModelImportState
> validator
= new BerlinModelFactsImportValidator();
492 return validator
.validate(state
);
496 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
499 protected String
getTableName() {
504 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
507 public String
getPluralString() {
512 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
514 protected boolean isIgnore(BerlinModelImportState state
){
515 return ! state
.getConfig().isDoFacts();