2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.berlinModel
.in
;
12 import java
.io
.IOException
;
14 import java
.net
.URISyntaxException
;
15 import java
.sql
.ResultSet
;
16 import java
.sql
.SQLException
;
17 import java
.util
.HashMap
;
18 import java
.util
.HashSet
;
21 import java
.util
.UUID
;
23 import org
.apache
.commons
.lang
.StringUtils
;
24 import org
.apache
.http
.HttpException
;
25 import org
.apache
.log4j
.Logger
;
26 import org
.springframework
.stereotype
.Component
;
28 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
29 import eu
.etaxonomy
.cdm
.common
.media
.ImageInfo
;
30 import eu
.etaxonomy
.cdm
.io
.berlinModel
.BerlinModelTransformer
;
31 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.validation
.BerlinModelFactsImportValidator
;
32 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
33 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
34 import eu
.etaxonomy
.cdm
.io
.common
.Source
;
35 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
36 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
37 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
38 import eu
.etaxonomy
.cdm
.model
.common
.DescriptionElementSource
;
39 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
40 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
41 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
42 import eu
.etaxonomy
.cdm
.model
.common
.TermVocabulary
;
43 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
44 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
45 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
46 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
47 import eu
.etaxonomy
.cdm
.model
.media
.ImageFile
;
48 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
49 import eu
.etaxonomy
.cdm
.model
.media
.MediaRepresentation
;
50 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
51 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
52 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
53 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
61 public class BerlinModelFactsImport
extends BerlinModelImportBase
{
62 private static final Logger logger
= Logger
.getLogger(BerlinModelFactsImport
.class);
64 public static final String NAMESPACE
= "Fact";
66 public static final String SEQUENCE_PREFIX
= "ORDER: ";
68 private int modCount
= 10000;
69 private static final String pluralString
= "facts";
70 private static final String dbTableName
= "Fact";
72 //FIXME don't use as class variable
73 private Map
<Integer
, Feature
> featureMap
;
75 public BerlinModelFactsImport(){
76 super(dbTableName
, pluralString
);
80 private TermVocabulary
<Feature
> getFeatureVocabulary(){
82 //TODO work around until service method works
83 TermVocabulary
<Feature
> featureVocabulary
= BerlinModelTransformer
.factCategory2Feature(1).getVocabulary();
84 //TermVocabulary<Feature> vocabulary = getTermService().getVocabulary(vocabularyUuid);
85 return featureVocabulary
;
86 } catch (UnknownCdmTypeException e
) {
87 logger
.error("Feature vocabulary not available. New vocabulary created");
88 return TermVocabulary
.NewInstance("User Defined Feature Vocabulary", "User Defined Feature Vocabulary", null, null);
92 private Map
<Integer
, Feature
> invokeFactCategories(BerlinModelImportState state
){
94 Map
<Integer
, Feature
> result
= state
.getConfig().getFeatureMap();
95 Source source
= state
.getConfig().getSource();
98 //get data from database
100 " SELECT FactCategory.* " +
101 " FROM FactCategory "+
103 ResultSet rs
= source
.getResultSet(strQuery
) ;
106 TermVocabulary
<Feature
> featureVocabulary
= getFeatureVocabulary();
111 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info("FactCategories handled: " + (i
-1));}
113 int factCategoryId
= rs
.getInt("factCategoryId");
114 String factCategory
= rs
.getString("factCategory");
118 feature
= BerlinModelTransformer
.factCategory2Feature(factCategoryId
);
119 } catch (UnknownCdmTypeException e
) {
120 UUID featureUuid
= null;
121 featureUuid
= BerlinModelTransformer
.getFeatureUuid(String
.valueOf(factCategoryId
+"-"+factCategory
));
122 if (featureUuid
== null){
123 logger
.warn("New Feature (FactCategoryId: " + factCategoryId
+ ")");
124 featureUuid
= UUID
.randomUUID();
126 feature
= getFeature(state
, featureUuid
, factCategory
, factCategory
, null, featureVocabulary
);
129 // MaxFactNumber int Checked
130 // ExtensionTableName varchar(100) Checked
131 // Description nvarchar(1000) Checked
132 // locExtensionFormName nvarchar(80) Checked
133 // RankRestrictionFk int Checked
136 result
.put(factCategoryId
, feature
);
139 } catch (SQLException e
) {
140 logger
.error("SQLException:" + e
);
142 } catch (UndefinedTransformerMethodException e1
) {
143 logger
.error("UndefinedTransformerMethodException:" + e1
);
144 e1
.printStackTrace();
151 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#doInvoke(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
154 protected void doInvoke(BerlinModelImportState state
) {
155 featureMap
= invokeFactCategories(state
);
156 super.doInvoke(state
);
163 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
166 protected String
getIdQuery(BerlinModelImportState state
) {
167 String result
= super.getIdQuery(state
);
168 if (StringUtils
.isNotBlank(state
.getConfig().getFactFilter())){
169 result
+= " WHERE " + state
.getConfig().getFactFilter();
171 result
= super.getIdQuery(state
);
173 result
+= getOrderBy(state
.getConfig());
180 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
183 protected String
getRecordQuery(BerlinModelImportConfigurator config
) {
185 " SELECT Fact.*, PTaxon.RIdentifier as taxonId, RefDetail.Details " +
187 " INNER JOIN PTaxon ON Fact.PTNameFk = PTaxon.PTNameFk AND Fact.PTRefFk = PTaxon.PTRefFk " +
188 " LEFT OUTER JOIN RefDetail ON Fact.FactRefDetailFk = RefDetail.RefDetailId AND Fact.FactRefFk = RefDetail.RefFk " +
189 " WHERE (FactId IN (" + ID_LIST_TOKEN
+ "))";
190 strQuery
+= getOrderBy(config
);
196 private String
getOrderBy(BerlinModelImportConfigurator config
) {
199 if (config
.getSource().checkColumnExists("Fact", "Sequence")){
200 result
= " ORDER By Fact.Sequence, Fact.FactId";
202 result
= " ORDER By Fact.FactId";
204 } catch (NoSuchMethodException e
) {
205 logger
.info("checkColumnExists not supported");
206 result
= " ORDER By Fact.FactId";
213 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
215 public boolean doPartition(ResultSetPartitioner partitioner
, BerlinModelImportState state
) {
216 boolean success
= true ;
217 BerlinModelImportConfigurator config
= state
.getConfig();
218 Set
<TaxonBase
> taxaToSave
= new HashSet
<TaxonBase
>();
219 Map
<String
, TaxonBase
> taxonMap
= (Map
<String
, TaxonBase
>) partitioner
.getObjectMap(BerlinModelTaxonImport
.NAMESPACE
);
220 Map
<String
, Reference
> biblioRefMap
= (Map
<String
, Reference
>) partitioner
.getObjectMap(BerlinModelReferenceImport
.BIBLIO_REFERENCE_NAMESPACE
);
221 Map
<String
, Reference
> nomRefMap
= (Map
<String
, Reference
>) partitioner
.getObjectMap(BerlinModelReferenceImport
.NOM_REFERENCE_NAMESPACE
);
223 ResultSet rs
= partitioner
.getResultSet();
225 Reference
<?
> sourceRef
= state
.getTransactionalSourceReference();
232 if ((i
++ % modCount
) == 0){ logger
.info("Facts handled: " + (i
-1));}
234 int factId
= rs
.getInt("factId");
235 Object taxonIdObj
= rs
.getObject("taxonId");
236 long taxonId
= rs
.getLong("taxonId");
237 Object factRefFkObj
= rs
.getObject("factRefFk");
238 Object categoryFkObj
= rs
.getObject("factCategoryFk");
239 Integer categoryFk
= rs
.getInt("factCategoryFk");
240 String details
= rs
.getString("Details");
241 String fact
= CdmUtils
.Nz(rs
.getString("Fact"));
242 String notes
= CdmUtils
.Nz(rs
.getString("notes"));
243 Boolean doubtfulFlag
= rs
.getBoolean("DoubtfulFlag");
245 TaxonBase
<?
> taxonBase
= getTaxon(taxonMap
, taxonIdObj
, taxonId
);
246 Feature feature
= getFeature(featureMap
, categoryFkObj
, categoryFk
) ;
248 if (taxonBase
== null){
249 logger
.warn("Taxon for Fact " + factId
+ " does not exist in store");
252 TaxonDescription taxonDescription
;
253 if ( (taxonDescription
= getMyTaxonDescripion(taxonBase
, state
, categoryFk
, taxonIdObj
, taxonId
, factId
, fact
, sourceRef
)) == null){
259 TextData textData
= null;
260 boolean newTextData
= true;
262 // For Cichorieae DB: If fact category is 31 (Systematics) and there is already a Systematics TextData
263 // description element append the fact text to the existing TextData
264 if(categoryFk
== 31) {
265 Set
<DescriptionElementBase
> descriptionElements
= taxonDescription
.getElements();
266 for (DescriptionElementBase descriptionElement
: descriptionElements
) {
267 String featureString
= descriptionElement
.getFeature().getRepresentation(Language
.DEFAULT()).getLabel();
268 if (descriptionElement
instanceof TextData
&& featureString
.equals("Systematics")) { // TODO: test
269 textData
= (TextData
)descriptionElement
;
270 String factTextStr
= textData
.getText(Language
.DEFAULT());
271 // FIXME: Removing newlines doesn't work
272 if (factTextStr
.contains("\\r\\n")) {
273 factTextStr
= factTextStr
.replaceAll("\\r\\n","");
275 StringBuilder factText
= new StringBuilder(factTextStr
);
276 factText
.append(fact
);
277 fact
= factText
.toString();
284 if(newTextData
== true) {
285 textData
= TextData
.NewInstance();
288 //for diptera database
289 if (categoryFk
== 99 && notes
.contains("<OriginalName>")){
290 // notes = notes.replaceAll("<OriginalName>", "");
291 // notes = notes.replaceAll("</OriginalName>", "");
292 fact
= notes
+ ": " + fact
;
295 if (categoryFk
== 14 && state
.getConfig().isRemoveHttpMapsAnchor() && fact
.contains("<a href")){
296 //example <a href="http://euromed.luomus.fi/euromed_map.php?taxon=280629&size=medium">distribution</a>
297 fact
= fact
.replace("<a href=\"", "").replace("\">distribution</a>", "");
300 //TODO textData.putText(fact, bmiConfig.getFactLanguage()); //doesn't work because bmiConfig.getFactLanguage() is not not a persistent Language Object
301 //throws in thread "main" org.springframework.dao.InvalidDataAccessApiUsageException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language; nested exception is org.hibernate.TransientObjectException: object references an unsaved transient instance - save the transient instance before flushing: eu.etaxonomy.cdm.model.common.Language
302 if (! taxonDescription
.isImageGallery()){
303 textData
.putText(Language
.DEFAULT(), fact
);
304 textData
.setFeature(feature
);
308 Reference
<?
> citation
= null;
309 String factRefFk
= String
.valueOf(factRefFkObj
);
310 if (factRefFkObj
!= null){
311 citation
= getReferenceOnlyFromMaps(biblioRefMap
, nomRefMap
, factRefFk
);
313 if (citation
== null && (factRefFkObj
!= null)){
314 logger
.warn("Citation not found in referenceMap: " + factRefFk
);
317 if (citation
!= null || StringUtils
.isNotBlank(details
)){
318 DescriptionElementSource originalSource
= DescriptionElementSource
.NewPrimarySourceInstance(citation
, details
);
319 textData
.addSource(originalSource
);
321 taxonDescription
.addElement(textData
);
324 textData
.addMarker(Marker
.NewInstance(MarkerType
.IS_DOUBTFUL(), true));
327 String strPublishFlag
= "publishFlag";
328 boolean publishFlagExists
= state
.getConfig().getSource().checkColumnExists(dbTableName
, strPublishFlag
);
329 if (publishFlagExists
){
330 Boolean publishFlag
= rs
.getBoolean(strPublishFlag
);
331 textData
.addMarker(Marker
.NewInstance(MarkerType
.PUBLISH(), publishFlag
));
335 Integer sequence
= rs
.getInt("Sequence");
336 if (sequence
!= null && sequence
!= 999){
337 String strSequence
= String
.valueOf(sequence
);
338 strSequence
= SEQUENCE_PREFIX
+ strSequence
;
339 //TODO make it an Extension when possible
340 //Extension datesExtension = Extension.NewInstance(textData, strSequence, ExtensionType.ORDER());
341 Annotation annotation
= Annotation
.NewInstance(strSequence
, Language
.DEFAULT());
342 textData
.addAnnotation(annotation
);
345 // if (categoryFkObj == FACT_DESCRIPTION){
347 // }else if (categoryFkObj == FACT_OBSERVATION){
349 // }else if (categoryFkObj == FACT_DISTRIBUTION_EM){
353 // //logger.warn("FactCategory " + categoryFk + " not yet implemented");
357 doCreatedUpdatedNotes(state
, textData
, rs
);
360 //Designation References -> unclear how to map to CDM
361 //factId -> OriginalSource for descriptionElements not yet implemented
363 //sequence -> textData is not an identifiable entity therefore extensions are not possible
364 //fact category better
366 taxaToSave
.add(taxonBase
);
368 } catch (Exception re
){
369 logger
.error("An exception occurred during the facts import");
370 re
.printStackTrace();
375 logger
.info("Facts handled: " + (i
-1));
376 logger
.info("Taxa to save: " + taxaToSave
.size());
377 getTaxonService().save(taxaToSave
);
378 }catch(SQLException e
){
379 throw new RuntimeException(e
);
384 private TaxonDescription
getMyTaxonDescripion(TaxonBase taxonBase
, BerlinModelImportState state
, Integer categoryFk
, Object taxonIdObj
, long taxonId
, int factId
, String fact
, Reference
<?
> sourceRef
) {
386 if ( taxonBase
instanceof Taxon
) {
387 taxon
= (Taxon
) taxonBase
;
389 logger
.warn("TaxonBase " + (taxonIdObj
==null?
"(null)":taxonIdObj
) + " for Fact " + factId
+ " was not of type Taxon but: " + taxonBase
.getClass().getSimpleName());
393 TaxonDescription taxonDescription
= null;
394 Set
<TaxonDescription
> descriptionSet
= taxon
.getDescriptions();
396 boolean isImage
= false;
399 if (categoryFk
== 51){ //TODO check also FactCategory string
401 media
= Media
.NewInstance();
402 taxonDescription
= makeImage(state
, fact
, media
, descriptionSet
, taxon
);
406 if (taxonDescription
== null){
410 TextData textData
= null;
411 for (DescriptionElementBase el
: taxonDescription
.getElements()){
412 if (el
.isInstanceOf(TextData
.class)){
413 textData
= CdmBase
.deproxy(el
, TextData
.class);
416 if (textData
== null){
417 textData
= TextData
.NewInstance(Feature
.IMAGE());
418 taxonDescription
.addElement(textData
);
420 textData
.addMedia(media
);
422 //all others (no image) -> getDescription
424 for (TaxonDescription desc
: descriptionSet
){
425 if (! desc
.isImageGallery()){
426 taxonDescription
= desc
;
429 if (taxonDescription
== null){
430 taxonDescription
= TaxonDescription
.NewInstance();
431 taxonDescription
.setTitleCache(sourceRef
== null ?
null : sourceRef
.getTitleCache(), true);
432 taxon
.addDescription(taxonDescription
);
435 return taxonDescription
;
440 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
442 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
446 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
449 Set
<String
> taxonIdSet
= new HashSet
<String
>();
450 Set
<String
> referenceIdSet
= new HashSet
<String
>();
451 Set
<String
> refDetailIdSet
= new HashSet
<String
>();
453 handleForeignKey(rs
, taxonIdSet
, "taxonId");
454 handleForeignKey(rs
, referenceIdSet
, "FactRefFk");
455 handleForeignKey(rs
, referenceIdSet
, "PTDesignationRefFk");
456 handleForeignKey(rs
, refDetailIdSet
, "FactRefDetailFk");
457 handleForeignKey(rs
, refDetailIdSet
, "PTDesignationRefDetailFk");
461 nameSpace
= BerlinModelTaxonImport
.NAMESPACE
;
462 cdmClass
= TaxonBase
.class;
464 Map
<String
, TaxonBase
> taxonMap
= (Map
<String
, TaxonBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
465 result
.put(nameSpace
, taxonMap
);
469 nameSpace
= BerlinModelReferenceImport
.NOM_REFERENCE_NAMESPACE
;
470 cdmClass
= Reference
.class;
471 idSet
= referenceIdSet
;
472 Map
<String
, Reference
> nomReferenceMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
473 result
.put(nameSpace
, nomReferenceMap
);
475 //biblio reference map
476 nameSpace
= BerlinModelReferenceImport
.BIBLIO_REFERENCE_NAMESPACE
;
477 cdmClass
= Reference
.class;
478 idSet
= referenceIdSet
;
479 Map
<String
, Reference
> biblioReferenceMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
480 result
.put(nameSpace
, biblioReferenceMap
);
483 nameSpace
= BerlinModelRefDetailImport
.NOM_REFDETAIL_NAMESPACE
;
484 cdmClass
= Reference
.class;
485 idSet
= refDetailIdSet
;
486 Map
<String
, Reference
> nomRefDetailMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
487 result
.put(nameSpace
, nomRefDetailMap
);
489 //biblio refDetail map
490 nameSpace
= BerlinModelRefDetailImport
.BIBLIO_REFDETAIL_NAMESPACE
;
491 cdmClass
= Reference
.class;
492 idSet
= refDetailIdSet
;
493 Map
<String
, Reference
> biblioRefDetailMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
494 result
.put(nameSpace
, biblioRefDetailMap
);
496 } catch (SQLException e
) {
497 throw new RuntimeException(e
);
507 * @param descriptionSet
510 private TaxonDescription
makeImage(BerlinModelImportState state
, String fact
, Media media
, Set
<TaxonDescription
> descriptionSet
, Taxon taxon
) {
511 TaxonDescription taxonDescription
= null;
512 Reference sourceRef
= state
.getTransactionalSourceReference();
514 ImageInfo imageInfo
= null;
517 uri
= new URI(fact
.trim());
518 } catch (URISyntaxException e
) {
519 logger
.warn("URISyntaxException. Image could not be imported: " + fact
);
523 imageInfo
= ImageInfo
.NewInstance(uri
, 0);
524 } catch (IOException e
) {
525 logger
.error("IOError reading image metadata." , e
);
526 } catch (HttpException e
) {
527 logger
.error("HttpException reading image metadata." , e
);
529 MediaRepresentation mediaRepresentation
= MediaRepresentation
.NewInstance(imageInfo
.getMimeType(), null);
530 media
.addRepresentation(mediaRepresentation
);
531 ImageFile image
= ImageFile
.NewInstance(uri
, size
, imageInfo
);
532 mediaRepresentation
.addRepresentationPart(image
);
534 taxonDescription
= taxon
.getOrCreateImageGallery(sourceRef
== null ?
null :sourceRef
.getTitleCache());
536 return taxonDescription
;
539 private TaxonBase
getTaxon(Map
<String
, TaxonBase
> taxonMap
, Object taxonIdObj
, Long taxonId
){
540 if (taxonIdObj
!= null){
541 return taxonMap
.get(String
.valueOf(taxonId
));
548 private Feature
getFeature(Map
<Integer
, Feature
> featureMap
, Object categoryFkObj
, Integer categoryFk
){
549 if (categoryFkObj
!= null){
550 return featureMap
.get(categoryFk
);
559 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
562 protected boolean doCheck(BerlinModelImportState state
){
563 IOValidator
<BerlinModelImportState
> validator
= new BerlinModelFactsImportValidator();
564 return validator
.validate(state
);
568 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
570 protected boolean isIgnore(BerlinModelImportState state
){
571 return ! state
.getConfig().isDoFacts();