2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.berlinModel
.in
;
12 import java
.sql
.ResultSet
;
13 import java
.sql
.SQLException
;
14 import java
.util
.ArrayList
;
15 import java
.util
.HashMap
;
16 import java
.util
.HashSet
;
17 import java
.util
.List
;
21 import org
.apache
.log4j
.Logger
;
22 import org
.springframework
.stereotype
.Component
;
24 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
25 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.validation
.BerlinModelOccurrenceSourceImportValidator
;
26 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
27 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
28 import eu
.etaxonomy
.cdm
.io
.common
.Source
;
29 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
30 import eu
.etaxonomy
.cdm
.model
.common
.DescriptionElementSource
;
31 import eu
.etaxonomy
.cdm
.model
.common
.OriginalSourceType
;
32 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
33 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
34 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
35 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameBase
;
36 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
45 public class BerlinModelOccurrenceSourceImport
extends BerlinModelImportBase
{
46 private static final Logger logger
= Logger
.getLogger(BerlinModelOccurrenceSourceImport
.class);
48 private static int modCount
= 5000;
49 private static final String pluralString
= "occurrence sources";
50 private static final String dbTableName
= "emOccurrenceSource"; //??
53 private Map
<String
, Integer
> sourceNumberRefIdMap
;
54 private Set
<String
> unfoundReferences
= new HashSet
<String
>();
57 public BerlinModelOccurrenceSourceImport(){
58 super(dbTableName
, pluralString
);
62 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getIdQuery()
65 protected String
getIdQuery(BerlinModelImportState state
) {
66 String result
= "SELECT occurrenceSourceId FROM " + getTableName();
67 if (state
.getConfig().getOccurrenceSourceFilter() != null){
68 result
+= " WHERE " + state
.getConfig().getOccurrenceSourceFilter();
74 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
77 protected String
getRecordQuery(BerlinModelImportConfigurator config
) {
78 String strQuery
= //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
80 " FROM emOccurrenceSource " +
81 " WHERE (OccurrenceSourceId IN (" + ID_LIST_TOKEN
+ ") )" +
89 protected void doInvoke(BerlinModelImportState state
) {
90 unfoundReferences
= new HashSet
<String
>();
93 sourceNumberRefIdMap
= makeSourceNumberReferenceIdMap(state
);
94 } catch (SQLException e
) {
96 throw new RuntimeException(e
);
98 super.doInvoke(state
);
99 sourceNumberRefIdMap
= null;
100 if (unfoundReferences
.size()>0){
101 String unfound
= "'" + CdmUtils
.concat("','", unfoundReferences
.toArray(new String
[]{})) + "'";
102 logger
.warn("Not found references: " + unfound
);
108 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#doPartition(eu.etaxonomy.cdm.io.berlinModel.in.ResultSetPartitioner, eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportState)
110 public boolean doPartition(ResultSetPartitioner partitioner
, BerlinModelImportState state
) {
111 boolean success
= true;
112 ResultSet rs
= partitioner
.getResultSet();
114 Set
<DescriptionElementBase
> objectsToSave
= new HashSet
<DescriptionElementBase
>();
120 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info("occurrence sources handled: " + (i
-1));}
122 Integer occurrenceSourceId
= rs
.getInt("OccurrenceSourceId");
123 Integer occurrenceFk
=nullSafeInt(rs
, "OccurrenceFk");
124 String sourceNumber
= rs
.getString("SourceNumber");
125 String oldName
= rs
.getString("OldName");
126 Integer oldNameFk
= nullSafeInt(rs
, "OldNameFk");
128 Distribution distribution
= (Distribution
)state
.getRelatedObject(BerlinModelOccurrenceImport
.NAMESPACE
, String
.valueOf(occurrenceFk
));
130 if (distribution
== null){
131 //distribution = duplicateMap.get(occurrenceFk);
133 if (distribution
!= null){
134 Integer refId
= sourceNumberRefIdMap
.get(sourceNumber
);
135 Reference
<?
> ref
= getReference(refId
, state
);
138 DescriptionElementSource originalSource
= DescriptionElementSource
.NewInstance(OriginalSourceType
.PrimaryTaxonomicSource
);
139 originalSource
.setCitation(ref
);
140 TaxonNameBase
<?
, ?
> taxonName
;
141 taxonName
= getName(state
, oldName
, oldNameFk
);
142 if (taxonName
!= null){
143 originalSource
.setNameUsedInSource(taxonName
);
144 }else if(isNotBlank(oldName
)){
145 originalSource
.setOriginalNameString(oldName
);
147 distribution
.addSource(originalSource
);
149 logger
.warn("reference for sourceNumber "+sourceNumber
+" could not be found. OccurrenceSourceId: " + occurrenceSourceId
);
150 unfoundReferences
.add(sourceNumber
);
153 logger
.warn("distribution ("+occurrenceFk
+") for occurrence source (" + occurrenceSourceId
+ ") could not be found." );
157 logger
.info("Distributions to save: " + objectsToSave
.size());
158 getDescriptionService().saveDescriptionElement(objectsToSave
);
161 } catch (SQLException e
) {
162 logger
.error("SQLException:" + e
);
169 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
171 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
175 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
178 Set
<String
> occurrenceIdSet
= new HashSet
<String
>();
179 Set
<String
> referenceIdSet
= new HashSet
<String
>();
180 Set
<String
> nameIdSet
= new HashSet
<String
>();
181 Set
<String
> sourceNumberSet
= new HashSet
<String
>();
183 handleForeignKey(rs
, occurrenceIdSet
, "occurrenceFk");
184 handleForeignKey(rs
, nameIdSet
, "oldNameFk");
185 sourceNumberSet
.add(CdmUtils
.NzTrim(rs
.getString("SourceNumber")));
188 sourceNumberSet
.remove("");
189 referenceIdSet
= handleSourceNumber(rs
, sourceNumberSet
, result
);
193 nameSpace
= BerlinModelOccurrenceImport
.NAMESPACE
;
194 cdmClass
= Distribution
.class;
195 idSet
= occurrenceIdSet
;
196 Map
<String
, Distribution
> occurrenceMap
= (Map
<String
, Distribution
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
197 result
.put(nameSpace
, occurrenceMap
);
200 nameSpace
= BerlinModelTaxonNameImport
.NAMESPACE
;
201 cdmClass
= TaxonNameBase
.class;
203 Map
<String
, TaxonNameBase
> nameMap
= (Map
<String
, TaxonNameBase
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
204 result
.put(nameSpace
, nameMap
);
207 nameSpace
= BerlinModelReferenceImport
.NOM_REFERENCE_NAMESPACE
;
208 cdmClass
= Reference
.class;
209 idSet
= referenceIdSet
;
210 Map
<String
, Reference
> nomReferenceMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
211 result
.put(nameSpace
, nomReferenceMap
);
213 //biblio reference map
214 nameSpace
= BerlinModelReferenceImport
.BIBLIO_REFERENCE_NAMESPACE
;
215 cdmClass
= Reference
.class;
216 idSet
= referenceIdSet
;
217 Map
<String
, Reference
> biblioReferenceMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
218 result
.put(nameSpace
, biblioReferenceMap
);
221 } catch (SQLException e
) {
222 throw new RuntimeException(e
);
227 private Set
<String
> handleSourceNumber(ResultSet rs
, Set
<String
> sourceNumberSet
, Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
) {
228 Map
<String
, Integer
> sourceNumberReferenceIdMap
= this.sourceNumberRefIdMap
;
229 Set
<String
> referenceIdSet
= new HashSet
<String
>();
231 for(String sourceNumber
: sourceNumberSet
){
232 Integer refId
= sourceNumberReferenceIdMap
.get(sourceNumber
);
233 referenceIdSet
.add(String
.valueOf(refId
));
235 return referenceIdSet
;
246 boolean isFirstTimeNoNameByService
= true;
247 private TaxonNameBase
<?
, ?
> getName(BerlinModelImportState state
, String oldName
, Integer oldNameFk
) {
248 TaxonNameBase
<?
,?
> taxonName
= (TaxonNameBase
)state
.getRelatedObject(BerlinModelTaxonNameImport
.NAMESPACE
, String
.valueOf(oldNameFk
));
249 if (taxonName
== null && oldName
!= null){
250 if (isFirstTimeNoNameByService
){
251 logger
.warn("oldName not checked against names in BerlinModel. Just take it as a string");
252 isFirstTimeNoNameByService
= false;
254 List
<NonViralName
> names
= new ArrayList
<NonViralName
>();
255 // names = getNameService().getNamesByNameCache(oldName);
256 if (names
.size() == 1){
259 if (names
.size()> 2){
260 logger
.info("There is more than one name matching oldName: " + oldName
+ ".");
263 //taxonName = nameParser.parseSimpleName(oldName);
270 * Creates a map which maps source numbers on references
273 * @throws SQLException
275 private Map
<String
, Integer
> makeSourceNumberReferenceIdMap(BerlinModelImportState state
) throws SQLException
{
276 Map
<String
, Integer
> result
= new HashMap
<String
, Integer
>();
278 Source source
= state
.getConfig().getSource();
279 String strQuery
= " SELECT RefId, IdInSource " +
281 " WHERE (IdInSource IS NOT NULL) AND (IdInSource NOT LIKE '') ";
283 ResultSet rs
= source
.getResultSet(strQuery
) ;
285 int refId
= rs
.getInt("RefId");
286 String idInSource
= rs
.getString("IdInSource");
287 if (idInSource
!= null){
288 String
[] singleSources
= idInSource
.split("\\|");
289 for (String singleSource
: singleSources
){
290 singleSource
= singleSource
.trim();
291 result
.put(singleSource
, refId
);
300 private Reference
getReference(Integer refId
, BerlinModelImportState state
) {
301 Reference
<?
> ref
= (Reference
)state
.getRelatedObject(BerlinModelReferenceImport
.BIBLIO_REFERENCE_NAMESPACE
, String
.valueOf(refId
));
303 ref
= (Reference
)state
.getRelatedObject(BerlinModelReferenceImport
.NOM_REFERENCE_NAMESPACE
, String
.valueOf(refId
));;
312 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
315 protected boolean doCheck(BerlinModelImportState state
){
316 IOValidator
<BerlinModelImportState
> validator
= new BerlinModelOccurrenceSourceImportValidator();
317 return validator
.validate(state
);
321 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
323 protected boolean isIgnore(BerlinModelImportState state
){
324 if (! state
.getConfig().isDoOccurrence()){
327 if (!this.checkSqlServerColumnExists(state
.getConfig().getSource(), "emOccurrenceSource", "OccurrenceSourceId")){
328 logger
.error("emOccurrenceSource table or emOccurrenceSourceId does not exist. Must ignore occurrence import");