2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.globis
;
12 import java
.sql
.ResultSet
;
13 import java
.sql
.SQLException
;
14 import java
.util
.HashMap
;
15 import java
.util
.HashSet
;
19 import org
.apache
.commons
.lang
.StringUtils
;
20 import org
.apache
.log4j
.Logger
;
21 import org
.springframework
.stereotype
.Component
;
23 import com
.yourkit
.util
.Strings
;
25 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
26 import eu
.etaxonomy
.cdm
.io
.algaterra
.AlgaTerraCollectionImport
;
27 import eu
.etaxonomy
.cdm
.io
.algaterra
.AlgaTerraSpecimenImport
;
28 import eu
.etaxonomy
.cdm
.io
.berlinModel
.in
.BerlinModelTaxonImport
;
29 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
30 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
31 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.UndefinedTransformerMethodException
;
32 import eu
.etaxonomy
.cdm
.io
.globis
.validation
.GlobisCurrentSpeciesImportValidator
;
33 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
34 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
35 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
36 import eu
.etaxonomy
.cdm
.model
.description
.CommonTaxonName
;
37 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
38 import eu
.etaxonomy
.cdm
.model
.description
.PresenceTerm
;
39 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
40 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
41 import eu
.etaxonomy
.cdm
.model
.location
.WaterbodyOrCountry
;
42 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
43 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
44 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
45 import eu
.etaxonomy
.cdm
.model
.occurrence
.FieldObservation
;
46 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
47 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
48 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
49 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
50 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
51 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.StringNotParsableException
;
52 import eu
.etaxonomy
.cdm
.strategy
.parser
.INonViralNameParser
;
53 import eu
.etaxonomy
.cdm
.strategy
.parser
.NonViralNameParserImpl
;
62 public class GlobisCurrentSpeciesImport
extends GlobisImportBase
<Taxon
> {
63 private static final Logger logger
= Logger
.getLogger(GlobisCurrentSpeciesImport
.class);
65 private int modCount
= 10000;
66 private static final String pluralString
= "current taxa";
67 private static final String dbTableName
= "current_species";
68 private static final Class cdmTargetClass
= Taxon
.class; //not needed
70 public GlobisCurrentSpeciesImport(){
71 super(pluralString
, dbTableName
, cdmTargetClass
);
78 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
81 protected String
getIdQuery() {
82 String strRecordQuery
=
83 " SELECT IDcurrentspec " +
84 " FROM " + dbTableName
;
85 return strRecordQuery
;
92 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
95 protected String
getRecordQuery(GlobisImportConfigurator config
) {
96 String strRecordQuery
=
97 " SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
98 " cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " +
99 " FROM " + getTableName() + " cs " +
100 " WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN
+ ") )";
101 return strRecordQuery
;
107 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
110 public boolean doPartition(ResultSetPartitioner partitioner
, GlobisImportState state
) {
111 boolean success
= true;
113 Set
<TaxonBase
> objectsToSave
= new HashSet
<TaxonBase
>();
115 Map
<String
, Taxon
> taxonMap
= (Map
<String
, Taxon
>) partitioner
.getObjectMap(TAXON_NAMESPACE
);
116 // Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
118 ResultSet rs
= partitioner
.getResultSet();
120 Classification classification
= getClassification(state
);
129 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info(pluralString
+ " handled: " + (i
-1));}
131 Integer taxonId
= rs
.getInt("IDcurrentspec");
134 //String dtSpcJahr -> ignore !
135 //empty: fiSpcLiteratur
145 Reference
<?
> sourceRef
= state
.getTransactionalSourceReference();
146 Taxon nextHigherTaxon
= null;
148 boolean hasNewParent
= false; //true if any parent is new
151 Taxon species
= createObject(rs
, state
);
154 String familyStr
= rs
.getString("dtSpcFamakt");
155 String subFamilyStr
= rs
.getString("dtSpcSubfamakt");
156 String tribeStr
= rs
.getString("dtSpcTribakt");
159 Taxon family
= getTaxon(state
, rs
, familyStr
, null, Rank
.FAMILY(), null, taxonMap
);
162 Taxon subFamily
= getTaxon(state
, rs
, subFamilyStr
, null, Rank
.SUBFAMILY(), null, taxonMap
);
163 Taxon subFamilyParent
= getParent(subFamily
, classification
);
164 if (subFamilyParent
!= null){
165 if (! compareTaxa(family
, subFamilyParent
)){
166 logger
.warn("Current family and parent of subfamily are not equal: " + taxonId
);
169 classification
.addParentChild(family
, subFamily
, sourceRef
, null);
171 nextHigherTaxon
= subFamily
;
174 Taxon tribe
= getTaxon(state
, rs
, tribeStr
, null, Rank
.TRIBE(), null, taxonMap
);
176 Taxon tribeParent
= getParent(tribe
, classification
);
177 if (tribeParent
!= null){
178 if (! compareTaxa(subFamily
, tribeParent
)){
179 logger
.warn("Current subFamily and parent of tribe are not equal: " + taxonId
);
182 classification
.addParentChild(subFamily
, tribe
, sourceRef
, null);
184 nextHigherTaxon
= tribe
;
189 String genusStr
= rs
.getString("dtSpcGenusakt");
190 String genusAuthorStr
= rs
.getString("dtSpcGenusaktauthor");
191 Taxon genus
= getTaxon(state
, rs
, genusStr
, null, Rank
.GENUS(), genusAuthorStr
, taxonMap
);
192 Taxon genusParent
= getParent(genus
, classification
);
194 if (genusParent
!= null){
195 if (! compareTaxa(genusParent
, nextHigherTaxon
)){
196 logger
.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId
);
199 classification
.addParentChild(nextHigherTaxon
, genus
, sourceRef
, null);
201 nextHigherTaxon
= genus
;
204 String subGenusStr
= CdmBase
.deproxy(species
.getName(), ZoologicalName
.class).getInfraGenericEpithet();
205 String subGenusAuthorStr
= rs
.getString("dtSpcSubgenaktauthor");
206 boolean hasSubgenus
= StringUtils
.isNotBlank(subGenusStr
) || StringUtils
.isNotBlank(subGenusAuthorStr
);
208 Taxon subGenus
= getTaxon(state
, rs
, genusStr
, subGenusStr
, Rank
.SUBGENUS(), subGenusAuthorStr
, taxonMap
);
209 classification
.addParentChild(nextHigherTaxon
, subGenus
, sourceRef
, null);
210 nextHigherTaxon
= subGenus
;
213 classification
.addParentChild(nextHigherTaxon
, species
, sourceRef
, null);
215 handleCountries(state
, rs
, species
);
217 handleCommonNames(state
, rs
, species
);
219 this.doIdCreatedUpdatedNotes(state
, species
, rs
, taxonId
, TAXON_NAMESPACE
);
221 objectsToSave
.add(species
);
224 } catch (Exception e
) {
225 logger
.warn("Exception in current_species: IDcurrentspec " + taxonId
+ ". " + e
.getMessage());
226 // e.printStackTrace();
231 // logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
233 logger
.warn(pluralString
+ " to save: " + objectsToSave
.size());
234 getTaxonService().save(objectsToSave
);
237 } catch (SQLException e
) {
238 logger
.error("SQLException:" + e
);
243 private void handleCountries(GlobisImportState state
, ResultSet rs
, Taxon species
) throws SQLException
{
244 String countriesStr
= rs
.getString("dtSpcCountries");
245 if (isBlank(countriesStr
)){
248 String
[] countriesSplit
= countriesStr
.split(";");
249 for (String countryStr
: countriesSplit
){
250 if (isBlank(countryStr
)){
253 countryStr
= normalizeCountry(countryStr
);
256 WaterbodyOrCountry country
= getCountry(state
, countryStr
);
258 if (country
!= null){
259 TaxonDescription desc
= getTaxonDescription(species
, state
.getTransactionalSourceReference(), false, true);
260 Distribution distribution
= Distribution
.NewInstance(country
, PresenceTerm
.PRESENT());
261 desc
.addElement(distribution
);
263 logger
.warn("Country string not recognized: " + countryStr
);
274 private String
normalizeCountry(String countryStr
) {
275 String result
= countryStr
.trim();
276 if (result
.endsWith(".")){
277 result
= result
.substring(0,result
.length() - 1);
282 private void handleCommonNames(GlobisImportState state
, ResultSet rs
, Taxon species
) throws SQLException
{
283 String commonNamesStr
= rs
.getString("vernacularnames");
284 if (isBlank(commonNamesStr
)){
287 String
[] commonNamesSplit
= commonNamesStr
.split(";");
288 for (String commonNameStr
: commonNamesSplit
){
289 if (isBlank(commonNameStr
)){
292 Language language
= null; //TODO
293 CommonTaxonName commonName
= CommonTaxonName
.NewInstance(commonNameStr
, language
);
294 TaxonDescription desc
= getTaxonDescription(species
, state
.getTransactionalSourceReference(), false, true);
295 desc
.addElement(commonName
);
303 * Compares 2 taxa, returns true of both taxa look similar
305 * @param nextHigherTaxon
308 private boolean compareTaxa(Taxon taxon1
, Taxon taxon2
) {
309 ZoologicalName name1
= CdmBase
.deproxy(taxon1
.getName(), ZoologicalName
.class);
310 ZoologicalName name2
= CdmBase
.deproxy(taxon2
.getName(), ZoologicalName
.class);
311 if (!name1
.getRank().equals(name2
.getRank())){
314 if (! name1
.getTitleCache().equals(name2
.getTitleCache())){
323 private Taxon
getParent(Taxon child
, Classification classification
) {
324 for (TaxonNode node
: child
.getTaxonNodes()){
325 if (node
.getClassification().equals(classification
)){
326 if (node
.getParent() != null){
327 return node
.getParent().getTaxon();
339 private Taxon
getTaxon(GlobisImportState state
, ResultSet rs
, String uninomial
, String infraGenericEpi
, Rank rank
, String author
, Map
<String
, Taxon
> taxonMap
) {
340 if (isBlank(uninomial
)){
344 String keyEpithet
= StringUtils
.isNotBlank(infraGenericEpi
)? infraGenericEpi
: uninomial
;
346 String key
= keyEpithet
+ "@" + CdmUtils
.Nz(author
) + "@" + rank
.getTitleCache();
347 Taxon taxon
= taxonMap
.get(key
);
349 ZoologicalName name
= ZoologicalName
.NewInstance(rank
);
350 name
.setGenusOrUninomial(uninomial
);
351 if (isNotBlank(infraGenericEpi
)){
352 name
.setInfraGenericEpithet(infraGenericEpi
);
354 taxon
= Taxon
.NewInstance(name
, state
.getTransactionalSourceReference());
356 taxonMap
.put(key
, taxon
);
357 handleAuthorAndYear(author
, name
);
358 getTaxonService().save(taxon
);
365 //fast and dirty is enough here
366 private Classification classification
;
368 private Classification
getClassification(GlobisImportState state
) {
369 if (this.classification
== null){
370 String name
= state
.getConfig().getClassificationName();
371 Reference
<?
> reference
= state
.getTransactionalSourceReference();
372 this.classification
= Classification
.NewInstance(name
, reference
, Language
.DEFAULT());
373 classification
.setUuid(state
.getConfig().getClassificationUuid());
374 getClassificationService().save(classification
);
376 return this.classification
;
381 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
383 public Taxon
createObject(ResultSet rs
, GlobisImportState state
)
384 throws SQLException
{
385 String speciesEpi
= rs
.getString("dtSpcSpcakt");
386 String subGenusEpi
= rs
.getString("dtSpcSubgenakt");
387 String genusEpi
= rs
.getString("dtSpcGenusakt");
388 String author
= rs
.getString("dtSpcAutor");
391 ZoologicalName zooName
= ZoologicalName
.NewInstance(Rank
.SPECIES());
392 zooName
.setSpecificEpithet(speciesEpi
);
393 if (StringUtils
.isNotBlank(subGenusEpi
)){
394 zooName
.setInfraGenericEpithet(subGenusEpi
);
396 zooName
.setGenusOrUninomial(genusEpi
);
397 handleAuthorAndYear(author
, zooName
);
399 Taxon taxon
= Taxon
.NewInstance(zooName
, state
.getTransactionalSourceReference());
409 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
411 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
415 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
417 Set
<String
> taxonIdSet
= new HashSet
<String
>();
420 // handleForeignKey(rs, taxonIdSet, "taxonId");
424 nameSpace
= TAXON_NAMESPACE
;
425 cdmClass
= Taxon
.class;
427 Map
<String
, Taxon
> objectMap
= (Map
<String
, Taxon
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
428 result
.put(nameSpace
, objectMap
);
431 } catch (SQLException e
) {
432 throw new RuntimeException(e
);
438 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
441 protected boolean doCheck(GlobisImportState state
){
442 IOValidator
<GlobisImportState
> validator
= new GlobisCurrentSpeciesImportValidator();
443 return validator
.validate(state
);
448 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
450 protected boolean isIgnore(GlobisImportState state
){
451 return ! state
.getConfig().isDoCurrentTaxa();