2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.berlinModel
.in
;
12 import java
.io
.IOException
;
14 import java
.net
.URISyntaxException
;
15 import java
.sql
.ResultSet
;
16 import java
.sql
.SQLException
;
17 import java
.util
.HashMap
;
18 import java
.util
.HashSet
;
21 import java
.util
.UUID
;
23 import javax
.mail
.MethodNotSupportedException
;
25 import org
.apache
.commons
.lang
.StringUtils
;
26 import org
.apache
.http
.HttpException
;
27 import org
.apache
.log4j
.Logger
;
28 import org
.springframework
.stereotype
.Component
;
30 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
31 import eu
.etaxonomy
.cdm
.common
.media
.ImageInfo
;
32 import eu
.etaxonomy
.cdm
.io
.berlinModel
.BerlinModelTransformer
;
33 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.validation
.BerlinModelFactsImportValidator
;
34 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
35 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
36 import eu
.etaxonomy
.cdm
.io
.common
.Source
;
37 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
38 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
39 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
40 import eu
.etaxonomy
.cdm
.model
.common
.DescriptionElementSource
;
41 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
42 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
43 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
44 import eu
.etaxonomy
.cdm
.model
.common
.TermVocabulary
;
45 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
46 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
47 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
48 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
49 import eu
.etaxonomy
.cdm
.model
.media
.ImageFile
;
50 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
51 import eu
.etaxonomy
.cdm
.model
.media
.MediaRepresentation
;
52 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
53 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
54 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
55 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
63 public class BerlinModelFactsImport
extends BerlinModelImportBase
{
64 private static final Logger logger
= Logger
.getLogger(BerlinModelFactsImport
.class);
66 public static final String NAMESPACE
= "Fact";
68 public static final String SEQUENCE_PREFIX
= "ORDER: ";
70 private int modCount
= 10000;
71 private static final String pluralString
= "facts";
72 private static final String dbTableName
= "Fact";
74 //FIXME don't use as class variable
75 private Map
<Integer
, Feature
> featureMap
;
77 public BerlinModelFactsImport(){
82 private TermVocabulary
<Feature
> getFeatureVocabulary(){
84 //TODO work around until service method works
85 TermVocabulary
<Feature
> featureVocabulary
= BerlinModelTransformer
.factCategory2Feature(1).getVocabulary();
86 //TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
87 return featureVocabulary
;
88 } catch (UnknownCdmTypeException e
) {
89 logger
.error("Feature vocabulary not available. New vocabulary created");
90 return TermVocabulary
.NewInstance("User Defined Feature Vocabulary", "User Defined Feature Vocabulary", null, null);
94 private Map
<Integer
, Feature
> invokeFactCategories(BerlinModelImportState state
){
96 Map
<Integer
, Feature
> result
= state
.getConfig().getFeatureMap();
97 Source source
= state
.getConfig().getSource();
100 //get data from database
102 " SELECT FactCategory.* " +
103 " FROM FactCategory "+
105 ResultSet rs
= source
.getResultSet(strQuery
) ;
108 TermVocabulary
<Feature
> featureVocabulary
= getFeatureVocabulary();
113 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info("FactCategories handled: " + (i
-1));}
115 int factCategoryId
= rs
.getInt("factCategoryId");
116 String factCategory
= rs
.getString("factCategory");
120 feature
= BerlinModelTransformer
.factCategory2Feature(factCategoryId
);
121 } catch (UnknownCdmTypeException e
) {
122 UUID featureUuid
= null;
123 featureUuid
= BerlinModelTransformer
.getFeatureUuid(String
.valueOf(factCategoryId
+"-"+factCategory
));
124 if (featureUuid
== null){
125 logger
.warn("New Feature (FactCategoryId: " + factCategoryId
+ ")");
126 featureUuid
= UUID
.randomUUID();
128 feature
= getFeature(state
, featureUuid
, factCategory
, factCategory
, null, featureVocabulary
);
131 // MaxFactNumber int Checked
132 // ExtensionTableName varchar(100) Checked
133 // Description nvarchar(1000) Checked
134 // locExtensionFormName nvarchar(80) Checked
135 // RankRestrictionFk int Checked
138 result
.put(factCategoryId
, feature
);
141 } catch (SQLException e
) {
142 logger
.error("SQLException:" + e
);
144 } catch (UndefinedTransformerMethodException e1
) {
145 logger
.error("UndefinedTransformerMethodException:" + e1
);
146 e1
.printStackTrace();
153 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
156 protected void doInvoke(BerlinModelImportState state
) {
157 featureMap
= invokeFactCategories(state
);
158 super.doInvoke(state
);
165 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
168 protected String
getIdQuery(BerlinModelImportState state
) {
169 String result
= super.getIdQuery(state
);
170 if (StringUtils
.isNotBlank(state
.getConfig().getFactFilter())){
171 result
+= " WHERE " + state
.getConfig().getFactFilter();
173 result
= super.getIdQuery(state
);
175 result
+= getOrderBy(state
.getConfig());
182 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
185 protected String
getRecordQuery(BerlinModelImportConfigurator config
) {
187 " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " +
189 " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
190 " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
191 " WHERE (FactId IN (" + ID_LIST_TOKEN
+ "))";
192 strQuery
+= getOrderBy(config
);
198 private String
getOrderBy(BerlinModelImportConfigurator config
) {
201 if (config
.getSource().checkColumnExists("Fact", "Sequence")){
202 result
= " ORDER By Fact.Sequence, Fact.FactId";
204 result
= " ORDER By Fact.FactId";
206 } catch (MethodNotSupportedException e
) {
207 logger
.info("checkColumnExists not supported");
208 result
= " ORDER By Fact.FactId";
215 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
217 public boolean doPartition(ResultSetPartitioner partitioner
, BerlinModelImportState state
) {
218 boolean success
= true ;
219 BerlinModelImportConfigurator config
= state
.getConfig();
220 Set
<TaxonBase
> taxaToSave
= new HashSet
<TaxonBase
>();
221 Map
<String
, TaxonBase
> taxonMap
= (Map
<String
, TaxonBase
>) partitioner
.getObjectMap(BerlinModelTaxonImport
.NAMESPACE
);
222 Map
<String
, Reference
> biblioRefMap
= (Map
<String
, Reference
>) partitioner
.getObjectMap(BerlinModelReferenceImport
.BIBLIO_REFERENCE_NAMESPACE
);
223 Map
<String
, Reference
> nomRefMap
= (Map
<String
, Reference
>) partitioner
.getObjectMap(BerlinModelReferenceImport
.NOM_REFERENCE_NAMESPACE
);
225 ResultSet rs
= partitioner
.getResultSet();
227 Reference
<?
> sourceRef
= state
.getTransactionalSourceReference();
234 if ((i
++ % modCount
) == 0){ logger
.info("Facts handled: " + (i
-1));}
236 int factId
= rs
.getInt("factId");
237 Object taxonIdObj
= rs
.getObject("taxonId");
238 long taxonId
= rs
.getLong("taxonId");
239 Object factRefFkObj
= rs
.getObject("factRefFk");
240 Object categoryFkObj
= rs
.getObject("factCategoryFk");
241 Integer categoryFk
= rs
.getInt("factCategoryFk");
242 String details
= rs
.getString("Details");
243 String fact
= CdmUtils
.Nz(rs
.getString("Fact"));
244 String notes
= CdmUtils
.Nz(rs
.getString("notes"));
245 Boolean doubtfulFlag
= rs
.getBoolean("DoubtfulFlag");
247 TaxonBase
<?
> taxonBase
= getTaxon(taxonMap
, taxonIdObj
, taxonId
);
248 Feature feature
= getFeature(featureMap
, categoryFkObj
, categoryFk
) ;
250 if (taxonBase
== null){
251 logger
.warn("Taxon for Fact " + factId
+ " does not exist in store");
254 TaxonDescription taxonDescription
;
255 if ( (taxonDescription
= getMyTaxonDescripion(taxonBase
, state
, categoryFk
, taxonIdObj
, taxonId
, factId
, fact
, sourceRef
)) == null){
261 TextData textData
= null;
262 boolean newTextData
= true;
264 // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData
265 // description element append the fact text to the existing TextData
266 if(categoryFk
== 31) {
267 Set
<DescriptionElementBase
> descriptionElements
= taxonDescription
.getElements();
268 for (DescriptionElementBase descriptionElement
: descriptionElements
) {
269 String featureString
= descriptionElement
.getFeature().getRepresentation(Language
.DEFAULT()).getLabel();
270 if (descriptionElement
instanceof TextData
&& featureString
.equals("Systematics")) { // TODO: test
271 textData
= (TextData
)descriptionElement
;
272 String factTextStr
= textData
.getText(Language
.DEFAULT());
273 // FIXME: Removing newlines doesn't work
274 if (factTextStr
.contains("\\r\\n")) {
275 factTextStr
= factTextStr
.replaceAll("\\r\\n","");
277 StringBuilder factText
= new StringBuilder(factTextStr
);
278 factText
.append(fact
);
279 fact
= factText
.toString();
286 if(newTextData
== true) {
287 textData
= TextData
.NewInstance();
290 //for diptera database
291 if (categoryFk
== 99 && notes
.contains("<OriginalName>")){
292 // notes = notes.replaceAll("<OriginalName>", "");
293 // notes = notes.replaceAll("</OriginalName>", "");
294 fact
= notes
+ ": " + fact
;
297 if (categoryFk
== 14 && state
.getConfig().isRemoveHttpMapsAnchor() && fact
.contains("<a href")){
298 //example <a href="http://euromed.luomus.fi/euromed_map.php?taxon=280629&size=medium">distribution</a>
299 fact
= fact
.replace("<a href=\"", "").replace("\">distribution</a>", "");
302 //TODO textData.putText(fact, bmiConfig.getFactLanguage()); //doesn't work because bmiConfig.getFactLanguage() is not not a persistent Language Object
303 //throws in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
304 if (! taxonDescription
.isImageGallery()){
305 textData
.putText(Language
.DEFAULT(), fact
);
306 textData
.setFeature(feature
);
310 Reference citation
= null;
311 String factRefFk
= String
.valueOf(factRefFkObj
);
312 if (factRefFkObj
!= null){
313 citation
= getReferenceOnlyFromMaps(biblioRefMap
, nomRefMap
, factRefFk
);
315 if (citation
== null && (factRefFkObj
!= null)){
316 logger
.warn("Citation not found in referenceMap: " + factRefFk
);
319 if (citation
!= null || CdmUtils
.isNotEmpty(details
)){
320 DescriptionElementSource originalSource
= DescriptionElementSource
.NewInstance();
321 originalSource
.setCitation(citation
);
322 originalSource
.setCitationMicroReference(details
);
323 textData
.addSource(originalSource
);
325 taxonDescription
.addElement(textData
);
328 textData
.addMarker(Marker
.NewInstance(MarkerType
.IS_DOUBTFUL(), true));
331 String strPublishFlag
= "publishFlag";
332 boolean publishFlagExists
= state
.getConfig().getSource().checkColumnExists(dbTableName
, strPublishFlag
);
333 if (publishFlagExists
){
334 Boolean publishFlag
= rs
.getBoolean(strPublishFlag
);
335 textData
.addMarker(Marker
.NewInstance(MarkerType
.PUBLISH(), publishFlag
));
339 Integer sequence
= rs
.getInt("Sequence");
340 if (sequence
!= null && sequence
!= 999){
341 String strSequence
= String
.valueOf(sequence
);
342 strSequence
= SEQUENCE_PREFIX
+ strSequence
;
343 //TODO make it an Extension when possible
344 //Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
345 Annotation annotation
= Annotation
.NewInstance(strSequence
, Language
.DEFAULT());
346 textData
.addAnnotation(annotation
);
349 // if (categoryFkObj == FACT_DESCRIPTION){
351 // }else if (categoryFkObj == FACT_OBSERVATION){
353 // }else if (categoryFkObj == FACT_DISTRIBUTION_EM){
357 // //logger.warn("FactCategory " + categoryFk + " not yet implemented");
361 doCreatedUpdatedNotes(state
, textData
, rs
);
364 //Designation References -> unclear how to map to CDM
365 //factId -> OriginalSource for descriptionElements not yet implemented
367 //sequence -> textData is not an identifiable entity therefore extensions are not possible
368 //fact category better
370 taxaToSave
.add(taxonBase
);
372 } catch (Exception re
){
373 logger
.error("An exception occurred during the facts import");
374 re
.printStackTrace();
379 logger
.info("Facts handled: " + (i
-1));
380 logger
.info("Taxa to save: " + taxaToSave
.size());
381 getTaxonService().save(taxaToSave
);
382 }catch(SQLException e
){
383 throw new RuntimeException(e
);
388 private TaxonDescription
getMyTaxonDescripion(TaxonBase taxonBase
, BerlinModelImportState state
, Integer categoryFk
, Object taxonIdObj
, long taxonId
, int factId
, String fact
, Reference
<?
> sourceRef
) {
390 if ( taxonBase
instanceof Taxon
) {
391 taxon
= (Taxon
) taxonBase
;
393 logger
.warn("TaxonBase " + (taxonIdObj
==null?
"(null)":taxonIdObj
) + " for Fact " + factId
+ " was not of type Taxon but: " + taxonBase
.getClass().getSimpleName());
397 TaxonDescription taxonDescription
= null;
398 Set
<TaxonDescription
> descriptionSet
= taxon
.getDescriptions();
400 boolean isImage
= false;
403 if (categoryFk
== 51){ //TODO check also FactCategory string
405 media
= Media
.NewInstance();
406 taxonDescription
= makeImage(state
, fact
, media
, descriptionSet
, taxon
);
410 if (taxonDescription
== null){
414 TextData textData
= null;
415 for (DescriptionElementBase el
: taxonDescription
.getElements()){
416 if (el
.isInstanceOf(TextData
.class)){
417 textData
= CdmBase
.deproxy(el
, TextData
.class);
420 if (textData
== null){
421 textData
= TextData
.NewInstance(Feature
.IMAGE());
422 taxonDescription
.addElement(textData
);
424 textData
.addMedia(media
);
426 //all others (no image) -> getDescription
428 for (TaxonDescription desc
: descriptionSet
){
429 if (! desc
.isImageGallery()){
430 taxonDescription
= desc
;
433 if (taxonDescription
== null){
434 taxonDescription
= TaxonDescription
.NewInstance();
435 taxonDescription
.setTitleCache(sourceRef
== null ?
null : sourceRef
.getTitleCache(), true);
436 taxon
.addDescription(taxonDescription
);
439 return taxonDescription
;
444 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
446 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
450 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
453 Set
<String
> taxonIdSet
= new HashSet
<String
>();
454 Set
<String
> referenceIdSet
= new HashSet
<String
>();
455 Set
<String
> refDetailIdSet
= new HashSet
<String
>();
457 handleForeignKey(rs
, taxonIdSet
, "taxonId");
458 handleForeignKey(rs
, referenceIdSet
, "FactRefFk");
459 handleForeignKey(rs
, referenceIdSet
, "PTDesignationRefFk");
460 handleForeignKey(rs
, refDetailIdSet
, "FactRefDetailFk");
461 handleForeignKey(rs
, refDetailIdSet
, "PTDesignationRefDetailFk");
465 nameSpace
= BerlinModelTaxonImport
.NAMESPACE
;
466 cdmClass
= TaxonBase
.class;
468 Map
<String
, TaxonBase
> taxonMap
= (Map
<String
, TaxonBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
469 result
.put(nameSpace
, taxonMap
);
473 nameSpace
= BerlinModelReferenceImport
.NOM_REFERENCE_NAMESPACE
;
474 cdmClass
= Reference
.class;
475 idSet
= referenceIdSet
;
476 Map
<String
, Reference
> nomReferenceMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
477 result
.put(nameSpace
, nomReferenceMap
);
479 //biblio reference map
480 nameSpace
= BerlinModelReferenceImport
.BIBLIO_REFERENCE_NAMESPACE
;
481 cdmClass
= Reference
.class;
482 idSet
= referenceIdSet
;
483 Map
<String
, Reference
> biblioReferenceMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
484 result
.put(nameSpace
, biblioReferenceMap
);
487 nameSpace
= BerlinModelRefDetailImport
.NOM_REFDETAIL_NAMESPACE
;
488 cdmClass
= Reference
.class;
489 idSet
= refDetailIdSet
;
490 Map
<String
, Reference
> nomRefDetailMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
491 result
.put(nameSpace
, nomRefDetailMap
);
493 //biblio refDetail map
494 nameSpace
= BerlinModelRefDetailImport
.BIBLIO_REFDETAIL_NAMESPACE
;
495 cdmClass
= Reference
.class;
496 idSet
= refDetailIdSet
;
497 Map
<String
, Reference
> biblioRefDetailMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
498 result
.put(nameSpace
, biblioRefDetailMap
);
500 } catch (SQLException e
) {
501 throw new RuntimeException(e
);
511 * @param descriptionSet
514 private TaxonDescription
makeImage(BerlinModelImportState state
, String fact
, Media media
, Set
<TaxonDescription
> descriptionSet
, Taxon taxon
) {
515 TaxonDescription taxonDescription
= null;
516 Reference sourceRef
= state
.getTransactionalSourceReference();
518 ImageInfo imageInfo
= null;
521 uri
= new URI(fact
.trim());
522 } catch (URISyntaxException e
) {
523 logger
.warn("URISyntaxException. Image could not be imported: " + fact
);
527 imageInfo
= ImageInfo
.NewInstance(uri
, 0);
528 } catch (IOException e
) {
529 logger
.error("IOError reading image metadata." , e
);
530 } catch (HttpException e
) {
531 logger
.error("HttpException reading image metadata." , e
);
533 MediaRepresentation mediaRepresentation
= MediaRepresentation
.NewInstance(imageInfo
.getMimeType(), null);
534 media
.addRepresentation(mediaRepresentation
);
535 ImageFile image
= ImageFile
.NewInstance(uri
, size
, imageInfo
);
536 mediaRepresentation
.addRepresentationPart(image
);
538 taxonDescription
= taxon
.getOrCreateImageGallery(sourceRef
== null ?
null :sourceRef
.getTitleCache());
540 return taxonDescription
;
543 private TaxonBase
getTaxon(Map
<String
, TaxonBase
> taxonMap
, Object taxonIdObj
, Long taxonId
){
544 if (taxonIdObj
!= null){
545 return taxonMap
.get(String
.valueOf(taxonId
));
552 private Feature
getFeature(Map
<Integer
, Feature
> featureMap
, Object categoryFkObj
, Integer categoryFk
){
553 if (categoryFkObj
!= null){
554 return featureMap
.get(categoryFk
);
563 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
566 protected boolean doCheck(BerlinModelImportState state
){
567 IOValidator
<BerlinModelImportState
> validator
= new BerlinModelFactsImportValidator();
568 return validator
.validate(state
);
572 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
575 protected String
getTableName() {
580 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
583 public String
getPluralString() {
588 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
590 protected boolean isIgnore(BerlinModelImportState state
){
591 return ! state
.getConfig().isDoFacts();