2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.berlinModel
.in
;
12 import java
.sql
.ResultSet
;
13 import java
.sql
.SQLException
;
14 import java
.util
.ArrayList
;
15 import java
.util
.HashMap
;
16 import java
.util
.HashSet
;
17 import java
.util
.List
;
21 import org
.apache
.commons
.lang
.StringUtils
;
22 import org
.apache
.log4j
.Logger
;
23 import org
.springframework
.stereotype
.Component
;
25 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
26 import eu
.etaxonomy
.cdm
.hibernate
.HibernateProxyHelper
;
27 import eu
.etaxonomy
.cdm
.io
.berlinModel
.BerlinModelTransformer
;
28 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.validation
.BerlinModelOccurrenceImportValidator
;
29 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
30 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
31 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
32 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
33 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
34 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
35 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
36 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
37 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
38 import eu
.etaxonomy
.cdm
.model
.description
.PresenceAbsenceTermBase
;
39 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
40 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
41 import eu
.etaxonomy
.cdm
.model
.location
.TdwgArea
;
42 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
43 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
44 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
45 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
54 public class BerlinModelOccurrenceImport
extends BerlinModelImportBase
{
55 private static final Logger logger
= Logger
.getLogger(BerlinModelOccurrenceImport
.class);
57 public static final String NAMESPACE
= "Occurrence";
60 private static int modCount
= 5000;
61 private static final String pluralString
= "occurrences";
62 private static final String dbTableName
= "emOccurrence"; //??
65 public BerlinModelOccurrenceImport(){
70 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery()
73 protected String
getIdQuery(BerlinModelImportState state
) {
74 String result
= " SELECT occurrenceId FROM " + getTableName();
75 if (StringUtils
.isNotBlank(state
.getConfig().getOccurrenceFilter())){
76 result
+= " WHERE " + state
.getConfig().getOccurrenceFilter();
82 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
85 protected String
getRecordQuery(BerlinModelImportConfigurator config
) {
86 String strQuery
= //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
87 " SELECT DISTINCT PTaxon.RIdentifier AS taxonId, emOccurrence.OccurrenceId, emOccurrence.Native, emOccurrence.Introduced, " +
88 " emOccurrence.Cultivated, emOccurSumCat.emOccurSumCatId, emOccurSumCat.Short, emOccurSumCat.Description, " +
89 " emOccurSumCat.OutputCode, emArea.AreaId, emArea.TDWGCode, emArea.EMCode " +
90 " FROM emOccurrence INNER JOIN " +
91 " emArea ON emOccurrence.AreaFk = emArea.AreaId INNER JOIN " +
92 " PTaxon ON emOccurrence.PTNameFk = PTaxon.PTNameFk AND emOccurrence.PTRefFk = PTaxon.PTRefFk LEFT OUTER JOIN " +
93 " emOccurSumCat ON emOccurrence.SummaryStatus = emOccurSumCat.emOccurSumCatId LEFT OUTER JOIN " +
94 " emOccurrenceSource ON emOccurrence.OccurrenceId = emOccurrenceSource.OccurrenceFk " +
95 " WHERE (emOccurrence.OccurrenceId IN (" + ID_LIST_TOKEN
+ ") )" +
96 " ORDER BY PTaxon.RIdentifier";
101 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
103 public boolean doPartition(ResultSetPartitioner partitioner
, BerlinModelImportState state
) {
104 boolean success
= true;
105 Set
<TaxonBase
> taxaToSave
= new HashSet
<TaxonBase
>();
107 Map
<String
, TaxonBase
> taxonMap
= (Map
<String
, TaxonBase
>) partitioner
.getObjectMap(BerlinModelTaxonImport
.NAMESPACE
);
109 ResultSet rs
= partitioner
.getResultSet();
112 //map to store the mapping of duplicate berlin model occurrences to their real distributions
113 //duplicated may occurr due to area mappings from BM areas to TDWG areas
114 Map
<Integer
, String
> duplicateMap
= new HashMap
<Integer
, String
>();
116 TaxonDescription oldDescription
= null;
118 int countDescriptions
= 0;
119 int countDistributions
= 0;
120 int countDuplicates
= 0;
124 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info("Facts handled: " + (i
-1));}
126 int occurrenceId
= rs
.getInt("OccurrenceId");
127 int newTaxonId
= rs
.getInt("taxonId");
128 String tdwgCodeString
= rs
.getString("TDWGCode");
129 String emCodeString
= rs
.getString("EMCode");
130 Integer emStatusId
= (Integer
)rs
.getObject("emOccurSumCatId");
134 PresenceAbsenceTermBase
<?
> status
= null;
135 String alternativeStatusString
= null;
136 if (emStatusId
!= null){
137 status
= BerlinModelTransformer
.occStatus2PresenceAbsence(emStatusId
);
139 String
[] stringArray
= new String
[]{rs
.getString("Native"), rs
.getString("Introduced"), rs
.getString("Cultivated")};
140 alternativeStatusString
= CdmUtils
.concat(",", stringArray
);
144 List
<NamedArea
> areas
= new ArrayList
<NamedArea
>();
145 if (tdwgCodeString
!= null){
147 String
[] tdwgCodes
= new String
[]{tdwgCodeString
};
148 if (state
.getConfig().isSplitTdwgCodes()){
149 tdwgCodes
= tdwgCodeString
.split(";");
152 for (String tdwgCode
: tdwgCodes
){
153 NamedArea area
= TdwgArea
.getAreaByTdwgAbbreviation(tdwgCode
.trim());
155 area
= getOtherAreas(state
, emCodeString
, tdwgCodeString
);
163 Reference
<?
> sourceRef
= state
.getTransactionalSourceReference();
164 //create description(elements)
165 TaxonDescription taxonDescription
= getTaxonDescription(newTaxonId
, oldTaxonId
, oldDescription
, taxonMap
, occurrenceId
, sourceRef
);
166 if (areas
.size()== 0){
167 NamedArea area
= getOtherAreas(state
, emCodeString
, tdwgCodeString
);
172 if (areas
.size() == 0){
173 String areaId
= rs
.getString("AreaId");
174 logger
.warn("No areas defined for occurrence " + occurrenceId
+ ". EMCode: " + CdmUtils
.Nz(emCodeString
).trim() + ". AreaId: " + areaId
);
176 for (NamedArea area
: areas
){
177 Distribution distribution
= Distribution
.NewInstance(area
, status
);
179 AnnotationType annotationType
= AnnotationType
.EDITORIAL();
180 Annotation annotation
= Annotation
.NewInstance(alternativeStatusString
, annotationType
, null);
181 distribution
.addAnnotation(annotation
);
182 distribution
.addMarker(Marker
.NewInstance(MarkerType
.PUBLISH(), false));
184 // distribution.setCitation(sourceRef);
185 if (taxonDescription
!= null) {
186 Distribution duplicate
= checkIsNoDuplicate(taxonDescription
, distribution
, duplicateMap
, occurrenceId
);
187 if (duplicate
== null){
188 taxonDescription
.addElement(distribution
);
189 distribution
.addSource(String
.valueOf(occurrenceId
), NAMESPACE
, state
.getTransactionalSourceReference(), null);
190 countDistributions
++;
191 if (taxonDescription
!= oldDescription
){
192 taxaToSave
.add(taxonDescription
.getTaxon());
193 oldDescription
= taxonDescription
;
198 duplicate
.addSource(String
.valueOf(occurrenceId
), NAMESPACE
, state
.getTransactionalSourceReference(), null);
199 logger
.info("Distribution is duplicate"); }
201 logger
.warn("Distribution " + area
.getLabel() + " ignored. OccurrenceId = " + occurrenceId
);
206 } catch (UnknownCdmTypeException e
) {
207 logger
.error("Unknown presenceAbsence status id: " + emStatusId
);
214 logger
.info("Distributions: " + countDistributions
+ ", Descriptions: " + countDescriptions
);
215 logger
.info("Duplicate occurrences: " + (countDuplicates
));
217 logger
.info("Taxa to save: " + taxaToSave
.size());
218 getTaxonService().save(taxaToSave
);
221 } catch (SQLException e
) {
222 logger
.error("SQLException:" + e
);
230 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
232 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
236 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
239 Set
<String
> taxonIdSet
= new HashSet
<String
>();
241 handleForeignKey(rs
, taxonIdSet
, "taxonId");
245 nameSpace
= BerlinModelTaxonImport
.NAMESPACE
;
246 cdmClass
= TaxonBase
.class;
248 Map
<String
, TaxonBase
> objectMap
= (Map
<String
, TaxonBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
249 result
.put(nameSpace
, objectMap
);
251 } catch (SQLException e
) {
252 throw new RuntimeException(e
);
260 * Tests if a distribution with the same tdwgArea and the same status already exists in the description.
261 * If so the old distribution is returned
264 * @return false, if dupplicate exists. True otherwise.
266 private Distribution
checkIsNoDuplicate(TaxonDescription description
, Distribution distribution
, Map
<Integer
, String
> duplicateMap
, Integer bmDistributionId
){
267 for (DescriptionElementBase descElBase
: description
.getElements()){
268 if (descElBase
.isInstanceOf(Distribution
.class)){
269 Distribution oldDistr
= HibernateProxyHelper
.deproxy(descElBase
, Distribution
.class);
270 NamedArea oldArea
= oldDistr
.getArea();
271 if (oldArea
!= null && oldArea
.equals(distribution
.getArea())){
272 PresenceAbsenceTermBase
<?
> oldStatus
= oldDistr
.getStatus();
273 if (oldStatus
!= null && oldStatus
.equals(distribution
.getStatus())){
274 duplicateMap
.put(bmDistributionId
, oldDistr
.getSources().iterator().next().getIdInSource());
284 * Use same TaxonDescription if two records belong to the same taxon
287 * @param oldDescription
291 private TaxonDescription
getTaxonDescription(int newTaxonId
, int oldTaxonId
, TaxonDescription oldDescription
, Map
<String
, TaxonBase
> taxonMap
, int occurrenceId
, Reference
<?
> sourceSec
){
292 TaxonDescription result
= null;
293 if (oldDescription
== null || newTaxonId
!= oldTaxonId
){
294 TaxonBase taxonBase
= taxonMap
.get(String
.valueOf(newTaxonId
));
296 //TaxonBase taxonBase = Taxon.NewInstance(BotanicalName.NewInstance(Rank.SPECIES()), null);
298 if ( taxonBase
instanceof Taxon
) {
299 taxon
= (Taxon
) taxonBase
;
300 } else if (taxonBase
!= null) {
301 logger
.warn("TaxonBase for Occurrence " + occurrenceId
+ " was not of type Taxon but: " + taxonBase
.getClass().getSimpleName());
304 logger
.warn("TaxonBase for Occurrence " + occurrenceId
+ " is null.");
307 Set
<TaxonDescription
> descriptionSet
= taxon
.getDescriptions();
308 if (descriptionSet
.size() > 0) {
309 result
= descriptionSet
.iterator().next();
311 result
= TaxonDescription
.NewInstance();
312 result
.setTitleCache(sourceSec
.getTitleCache(), true);
313 taxon
.addDescription(result
);
316 result
= oldDescription
;
323 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
326 protected boolean doCheck(BerlinModelImportState state
){
327 IOValidator
<BerlinModelImportState
> validator
= new BerlinModelOccurrenceImportValidator();
328 return validator
.validate(state
);
332 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getTableName()
335 protected String
getTableName() {
340 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getPluralString()
343 public String
getPluralString() {
348 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
350 protected boolean isIgnore(BerlinModelImportState state
){
351 return ! state
.getConfig().isDoOccurrence();