2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.berlinModel
.in
;
12 import java
.sql
.ResultSet
;
13 import java
.sql
.SQLException
;
14 import java
.util
.ArrayList
;
15 import java
.util
.HashMap
;
16 import java
.util
.HashSet
;
17 import java
.util
.List
;
21 import org
.apache
.commons
.lang
.StringUtils
;
22 import org
.apache
.log4j
.Logger
;
23 import org
.springframework
.stereotype
.Component
;
25 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
26 import eu
.etaxonomy
.cdm
.hibernate
.HibernateProxyHelper
;
27 import eu
.etaxonomy
.cdm
.io
.berlinModel
.BerlinModelTransformer
;
28 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.validation
.BerlinModelOccurrenceImportValidator
;
29 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
30 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
31 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
32 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
33 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
34 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
35 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
36 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
37 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
38 import eu
.etaxonomy
.cdm
.model
.description
.PresenceAbsenceTermBase
;
39 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
40 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
41 import eu
.etaxonomy
.cdm
.model
.location
.TdwgArea
;
42 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
43 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
44 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
45 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
54 public class BerlinModelOccurrenceImport
extends BerlinModelImportBase
{
55 private static final Logger logger
= Logger
.getLogger(BerlinModelOccurrenceImport
.class);
57 public static final String NAMESPACE
= "Occurrence";
60 private static int modCount
= 5000;
61 private static final String pluralString
= "occurrences";
62 private static final String dbTableName
= "emOccurrence"; //??
65 public BerlinModelOccurrenceImport(){
66 super(dbTableName
, pluralString
);
70 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery()
73 protected String
getIdQuery(BerlinModelImportState state
) {
74 String result
= " SELECT occurrenceId FROM " + getTableName();
75 if (StringUtils
.isNotBlank(state
.getConfig().getOccurrenceFilter())){
76 result
+= " WHERE " + state
.getConfig().getOccurrenceFilter();
82 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
85 protected String
getRecordQuery(BerlinModelImportConfigurator config
) {
86 String emCode
= config
.isIncludesAreaEmCode()?
", emArea.EMCode" : "";
87 String strQuery
= //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
88 " SELECT DISTINCT PTaxon.RIdentifier AS taxonId, emOccurrence.OccurrenceId, emOccurrence.Native, emOccurrence.Introduced, " +
89 " emOccurrence.Cultivated, emOccurSumCat.emOccurSumCatId, emOccurSumCat.Short, emOccurSumCat.Description, " +
90 " emOccurSumCat.OutputCode, emArea.AreaId, emArea.TDWGCode " + emCode
+
91 " FROM emOccurrence INNER JOIN " +
92 " emArea ON emOccurrence.AreaFk = emArea.AreaId INNER JOIN " +
93 " PTaxon ON emOccurrence.PTNameFk = PTaxon.PTNameFk AND emOccurrence.PTRefFk = PTaxon.PTRefFk LEFT OUTER JOIN " +
94 " emOccurSumCat ON emOccurrence.SummaryStatus = emOccurSumCat.emOccurSumCatId LEFT OUTER JOIN " +
95 " emOccurrenceSource ON emOccurrence.OccurrenceId = emOccurrenceSource.OccurrenceFk " +
96 " WHERE (emOccurrence.OccurrenceId IN (" + ID_LIST_TOKEN
+ ") )" +
97 " ORDER BY PTaxon.RIdentifier";
102 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
105 public boolean doPartition(ResultSetPartitioner partitioner
, BerlinModelImportState state
) {
106 boolean success
= true;
107 Set
<TaxonBase
> taxaToSave
= new HashSet
<TaxonBase
>();
109 Map
<String
, TaxonBase
<?
>> taxonMap
= (Map
<String
, TaxonBase
<?
>>) partitioner
.getObjectMap(BerlinModelTaxonImport
.NAMESPACE
);
111 ResultSet rs
= partitioner
.getResultSet();
114 //map to store the mapping of duplicate berlin model occurrences to their real distributions
115 //duplicated may occurr due to area mappings from BM areas to TDWG areas
116 Map
<Integer
, String
> duplicateMap
= new HashMap
<Integer
, String
>();
118 TaxonDescription oldDescription
= null;
120 int countDescriptions
= 0;
121 int countDistributions
= 0;
122 int countDuplicates
= 0;
126 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info("Facts handled: " + (i
-1));}
128 int occurrenceId
= rs
.getInt("OccurrenceId");
129 int newTaxonId
= rs
.getInt("taxonId");
130 String tdwgCodeString
= rs
.getString("TDWGCode");
131 String emCodeString
= state
.getConfig().isIncludesAreaEmCode() ? rs
.getString("EMCode") : null;
132 Integer emStatusId
= nullSafeInt(rs
, "emOccurSumCatId");
136 PresenceAbsenceTermBase
<?
> status
= null;
137 String alternativeStatusString
= null;
138 if (emStatusId
!= null){
139 status
= BerlinModelTransformer
.occStatus2PresenceAbsence(emStatusId
);
141 String
[] stringArray
= new String
[]{rs
.getString("Native"), rs
.getString("Introduced"), rs
.getString("Cultivated")};
142 alternativeStatusString
= CdmUtils
.concat(",", stringArray
);
146 List
<NamedArea
> areas
= new ArrayList
<NamedArea
>();
147 if (tdwgCodeString
!= null){
149 String
[] tdwgCodes
= new String
[]{tdwgCodeString
};
150 if (state
.getConfig().isSplitTdwgCodes()){
151 tdwgCodes
= tdwgCodeString
.split(";");
154 for (String tdwgCode
: tdwgCodes
){
155 NamedArea area
= TdwgArea
.getAreaByTdwgAbbreviation(tdwgCode
.trim());
157 area
= getOtherAreas(state
, emCodeString
, tdwgCodeString
);
165 Reference
<?
> sourceRef
= state
.getTransactionalSourceReference();
166 //create description(elements)
167 TaxonDescription taxonDescription
= getTaxonDescription(newTaxonId
, oldTaxonId
, oldDescription
, taxonMap
, occurrenceId
, sourceRef
);
168 if (areas
.size()== 0){
169 NamedArea area
= getOtherAreas(state
, emCodeString
, tdwgCodeString
);
174 if (areas
.size() == 0){
175 String areaId
= rs
.getString("AreaId");
176 logger
.warn("No areas defined for occurrence " + occurrenceId
+ ". EMCode: " + CdmUtils
.Nz(emCodeString
).trim() + ". AreaId: " + areaId
);
178 for (NamedArea area
: areas
){
179 Distribution distribution
= Distribution
.NewInstance(area
, status
);
181 AnnotationType annotationType
= AnnotationType
.EDITORIAL();
182 Annotation annotation
= Annotation
.NewInstance(alternativeStatusString
, annotationType
, null);
183 distribution
.addAnnotation(annotation
);
184 distribution
.addMarker(Marker
.NewInstance(MarkerType
.PUBLISH(), false));
186 // distribution.setCitation(sourceRef);
187 if (taxonDescription
!= null) {
188 Distribution duplicate
= checkIsNoDuplicate(taxonDescription
, distribution
, duplicateMap
, occurrenceId
);
189 if (duplicate
== null){
190 taxonDescription
.addElement(distribution
);
191 distribution
.addImportSource(String
.valueOf(occurrenceId
), NAMESPACE
, state
.getTransactionalSourceReference(), null);
192 countDistributions
++;
193 if (taxonDescription
!= oldDescription
){
194 taxaToSave
.add(taxonDescription
.getTaxon());
195 oldDescription
= taxonDescription
;
200 duplicate
.addImportSource(String
.valueOf(occurrenceId
), NAMESPACE
, state
.getTransactionalSourceReference(), null);
201 logger
.info("Distribution is duplicate"); }
203 logger
.warn("Distribution " + area
.getLabel() + " ignored. OccurrenceId = " + occurrenceId
);
208 } catch (UnknownCdmTypeException e
) {
209 logger
.error("Unknown presenceAbsence status id: " + emStatusId
);
216 logger
.info("Distributions: " + countDistributions
+ ", Descriptions: " + countDescriptions
);
217 logger
.info("Duplicate occurrences: " + (countDuplicates
));
219 logger
.info("Taxa to save: " + taxaToSave
.size());
220 getTaxonService().save(taxaToSave
);
223 } catch (SQLException e
) {
224 logger
.error("SQLException:" + e
);
232 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
234 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
238 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
241 Set
<String
> taxonIdSet
= new HashSet
<String
>();
243 handleForeignKey(rs
, taxonIdSet
, "taxonId");
247 nameSpace
= BerlinModelTaxonImport
.NAMESPACE
;
248 cdmClass
= TaxonBase
.class;
250 Map
<String
, TaxonBase
> objectMap
= (Map
<String
, TaxonBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
251 result
.put(nameSpace
, objectMap
);
253 } catch (SQLException e
) {
254 throw new RuntimeException(e
);
262 * Tests if a distribution with the same tdwgArea and the same status already exists in the description.
263 * If so the old distribution is returned
266 * @return false, if dupplicate exists. True otherwise.
268 private Distribution
checkIsNoDuplicate(TaxonDescription description
, Distribution distribution
, Map
<Integer
, String
> duplicateMap
, Integer bmDistributionId
){
269 for (DescriptionElementBase descElBase
: description
.getElements()){
270 if (descElBase
.isInstanceOf(Distribution
.class)){
271 Distribution oldDistr
= HibernateProxyHelper
.deproxy(descElBase
, Distribution
.class);
272 NamedArea oldArea
= oldDistr
.getArea();
273 if (oldArea
!= null && oldArea
.equals(distribution
.getArea())){
274 PresenceAbsenceTermBase
<?
> oldStatus
= oldDistr
.getStatus();
275 if (oldStatus
!= null && oldStatus
.equals(distribution
.getStatus())){
276 duplicateMap
.put(bmDistributionId
, oldDistr
.getSources().iterator().next().getIdInSource());
286 * Use same TaxonDescription if two records belong to the same taxon
289 * @param oldDescription
293 private TaxonDescription
getTaxonDescription(int newTaxonId
, int oldTaxonId
, TaxonDescription oldDescription
, Map
<String
, TaxonBase
<?
>> taxonMap
, int occurrenceId
, Reference
<?
> sourceSec
){
294 TaxonDescription result
= null;
295 if (oldDescription
== null || newTaxonId
!= oldTaxonId
){
296 TaxonBase
<?
> taxonBase
= taxonMap
.get(String
.valueOf(newTaxonId
));
298 //TaxonBase taxonBase = Taxon.NewInstance(BotanicalName.NewInstance(Rank.SPECIES()), null);
300 if ( taxonBase
instanceof Taxon
) {
301 taxon
= (Taxon
) taxonBase
;
302 } else if (taxonBase
!= null) {
303 logger
.warn("TaxonBase for Occurrence " + occurrenceId
+ " was not of type Taxon but: " + taxonBase
.getClass().getSimpleName());
306 logger
.warn("TaxonBase for Occurrence " + occurrenceId
+ " is null.");
309 Set
<TaxonDescription
> descriptionSet
= taxon
.getDescriptions();
310 if (descriptionSet
.size() > 0) {
311 result
= descriptionSet
.iterator().next();
313 result
= TaxonDescription
.NewInstance();
314 result
.setTitleCache(sourceSec
.getTitleCache(), true);
315 taxon
.addDescription(result
);
318 result
= oldDescription
;
325 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
328 protected boolean doCheck(BerlinModelImportState state
){
329 IOValidator
<BerlinModelImportState
> validator
= new BerlinModelOccurrenceImportValidator();
330 return validator
.validate(state
);
335 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
337 protected boolean isIgnore(BerlinModelImportState state
){
338 if (! state
.getConfig().isDoOccurrence()){
341 if (!this.checkSqlServerColumnExists(state
.getConfig().getSource(), "emOccurrence", "OccurrenceId")){
342 logger
.error("emOccurrence table or emOccurrenceId does not exist. Must ignore occurrence import");