2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.globis
;
12 import java
.sql
.ResultSet
;
13 import java
.sql
.SQLException
;
14 import java
.util
.HashMap
;
15 import java
.util
.HashSet
;
18 import java
.util
.regex
.Matcher
;
19 import java
.util
.regex
.Pattern
;
21 import org
.apache
.commons
.lang
.StringUtils
;
22 import org
.apache
.log4j
.Logger
;
23 import org
.springframework
.stereotype
.Component
;
25 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
26 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
.DerivedUnitType
;
27 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
28 import eu
.etaxonomy
.cdm
.io
.common
.IImportConfigurator
;
29 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
30 import eu
.etaxonomy
.cdm
.io
.common
.ResultSetPartitioner
;
31 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.IMappingImport
;
32 import eu
.etaxonomy
.cdm
.io
.globis
.validation
.GlobisReferenceImportValidator
;
33 import eu
.etaxonomy
.cdm
.io
.globis
.validation
.GlobisSpecTaxaImportValidator
;
34 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
35 import eu
.etaxonomy
.cdm
.model
.common
.Extension
;
36 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
37 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
38 import eu
.etaxonomy
.cdm
.model
.location
.WaterbodyOrCountry
;
39 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
40 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
41 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignation
;
42 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignationStatus
;
43 import eu
.etaxonomy
.cdm
.model
.name
.SpecimenTypeDesignationTest
;
44 import eu
.etaxonomy
.cdm
.model
.name
.ZoologicalName
;
45 import eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
;
46 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivationEvent
;
47 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivationEventType
;
48 import eu
.etaxonomy
.cdm
.model
.occurrence
.DerivedUnitBase
;
49 import eu
.etaxonomy
.cdm
.model
.occurrence
.FieldObservation
;
50 import eu
.etaxonomy
.cdm
.model
.occurrence
.Specimen
;
51 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
52 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
53 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceType
;
54 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
55 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
56 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
57 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
58 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
67 public class GlobisSpecTaxImport
extends GlobisImportBase
<Reference
> implements IMappingImport
<Reference
, GlobisImportState
>{
68 private static final Logger logger
= Logger
.getLogger(GlobisSpecTaxImport
.class);
70 private int modCount
= 10000;
71 private static final String pluralString
= "taxa";
72 private static final String dbTableName
= "specTax";
73 private static final Class cdmTargetClass
= Reference
.class;
75 public GlobisSpecTaxImport(){
76 super(pluralString
, dbTableName
, cdmTargetClass
);
83 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
86 protected String
getIdQuery() {
87 String strRecordQuery
=
88 " SELECT specTaxId " +
89 " FROM " + dbTableName
;
90 return strRecordQuery
;
97 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
100 protected String
getRecordQuery(GlobisImportConfigurator config
) {
101 String strRecordQuery
=
102 " SELECT t.*, t.DateCreated as Created_When, t.CreatedBy as Created_Who," +
103 " t.ModifiedBy as Updated_who, t.DateModified as Updated_When, t.SpecRemarks as Notes " +
104 " FROM " + getTableName() + " t " +
105 " WHERE ( t.specTaxId IN (" + ID_LIST_TOKEN
+ ") )";
106 return strRecordQuery
;
112 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
115 public boolean doPartition(ResultSetPartitioner partitioner
, GlobisImportState state
) {
116 boolean success
= true;
118 Set
<TaxonBase
> objectsToSave
= new HashSet
<TaxonBase
>();
120 Map
<String
, Taxon
> taxonMap
= (Map
<String
, Taxon
>) partitioner
.getObjectMap(TAXON_NAMESPACE
);
121 Map
<String
, Reference
> referenceMap
= (Map
<String
, Reference
>) partitioner
.getObjectMap(REFERENCE_NAMESPACE
);
123 ResultSet rs
= partitioner
.getResultSet();
132 if ((i
++ % modCount
) == 0 && i
!= 1 ){ logger
.info(pluralString
+ " handled: " + (i
-1));}
134 Integer specTaxId
= rs
.getInt("SpecTaxId");
135 Integer acceptedTaxonId
= nullSafeInt(rs
, "SpecCurrspecID");
136 String specSystaxRank
= rs
.getString("SpecSystaxRank");
141 Reference
<?
> sourceRef
= state
.getTransactionalSourceReference();
143 Taxon acceptedTaxon
= taxonMap
.get(String
.valueOf(acceptedTaxonId
));
144 TaxonBase
<?
> thisTaxon
= null;
146 if (isBlank(specSystaxRank
) ){
148 }else if (specSystaxRank
.equals("synonym")){
149 Synonym synonym
= getSynonym(state
, rs
);
150 if (acceptedTaxon
== null){
152 logger
.warn("Accepted taxon (" + acceptedTaxonId
+ ") not found for synonym "+ specTaxId
);
154 acceptedTaxon
.addSynonym(synonym
, SynonymRelationshipType
.SYNONYM_OF());
157 }else if (specSystaxRank
.equals("species")){
158 validateAcceptedTaxon(acceptedTaxon
, rs
, specTaxId
, acceptedTaxonId
);
159 thisTaxon
= acceptedTaxon
;
161 logger
.warn(String
.format("Unhandled specSystaxRank %s in specTaxId %d", specSystaxRank
, specTaxId
));
164 if (thisTaxon
!= null){
165 ZoologicalName name
= CdmBase
.deproxy(thisTaxon
.getName(), ZoologicalName
.class);
167 handleNomRef(state
, referenceMap
, rs
, name
);
169 handleTypeInformation(state
,rs
, name
);
172 // this.doIdCreatedUpdatedNotes(state, ref, rs, refId, REFERENCE_NAMESPACE);
174 objectsToSave
.add(acceptedTaxon
);
178 } catch (Exception e
) {
179 logger
.warn("Exception in specTax: SpecTaxId " + specTaxId
+ ". " + e
.getMessage());
185 // logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
187 logger
.warn(pluralString
+ " to save: " + objectsToSave
.size());
188 getTaxonService().save(objectsToSave
);
191 } catch (Exception e
) {
192 logger
.error("Exception: " + e
);
198 private Pattern patternAll
= Pattern
.compile("(.+,\\s.+)(\\(.+\\))");
201 private void handleTypeInformation(GlobisImportState state
, ResultSet rs
, ZoologicalName name
) throws SQLException
{
203 String specTypeDepositoriesStr
= rs
.getString("SpecTypeDepository");
204 String countryString
= rs
.getString("SpecTypeCountry");
206 if (! hasTypeInformation(specTypeDepositoriesStr
, countryString
)){
210 FieldObservation fieldObservation
= makeTypeFieldObservation(state
, countryString
);
212 String
[] specTypeDepositories
= specTypeDepositoriesStr
.split(";");
213 //TODO different issues
214 if (specTypeDepositories
.length
== 0){
217 for (String specTypeDepositoryStr
: specTypeDepositories
){
218 specTypeDepositoryStr
= specTypeDepositoryStr
.trim();
221 Specimen specimen
= makeSingleTypeSpecimen(fieldObservation
);
223 if (specTypeDepositoryStr
.equals("??")){
226 specimen
.setTitleCache("??", true);
228 specTypeDepositoryStr
= makeAdditionalSpecimenInformation(
229 specTypeDepositoryStr
, specimen
);
231 makeCollection(specTypeDepositoryStr
, specimen
);
235 makeTypeDesignation(name
, rs
, specimen
);
243 private boolean hasTypeInformation(String specTypeDepositoriesStr
, String countryString
) {
244 boolean result
= false;
245 result
|= isNotBlank(specTypeDepositoriesStr
) || isNotBlank(countryString
);
252 * @param specTypeDepositoryStr
255 protected void makeCollection(String specTypeDepositoryStr
, Specimen specimen
) {
257 Map
<String
, Collection
> collectionMap
= new HashMap
<String
, Collection
>();
261 String
[] split
= specTypeDepositoryStr
.split(",");
262 if (split
.length
!= 2){
263 if (split
.length
== 1 && split
[0].startsWith("coll.")){
264 Collection collection
= Collection
.NewInstance();
265 collection
.setName(split
[0]);
267 logger
.warn("Split size is not 2: " + specTypeDepositoryStr
);
271 String collectionStr
= split
[0];
272 String location
= split
[1];
275 Collection collection
= collectionMap
.get(collectionStr
);
276 if (collection
== null){
277 collection
= Collection
.NewInstance();
278 collection
.setCode(collectionStr
);
279 collection
.setTownOrLocation(split
[1]);
280 }else if (CdmUtils
.nullSafeEqual(location
, collection
.getTownOrLocation())){
281 String message
= "Location (%s) is not equal to location (%s) of existing collection";
282 logger
.warn(String
.format(message
, location
, collection
.getTownOrLocation(), collection
.getCode()));
285 specimen
.setCollection(collection
);
294 * @param specTypeDepositoriesStr
295 * @param specTypeDepositoryStr
299 protected String
makeAdditionalSpecimenInformation( String specTypeDepositoryStr
,
302 if (specTypeDepositoryStr
.endsWith("?")){
303 Marker
.NewInstance(specimen
, true, MarkerType
.IS_DOUBTFUL());
304 specTypeDepositoryStr
= specTypeDepositoryStr
.substring(0, specTypeDepositoryStr
.length() -1).trim();
308 Matcher matcher
= patternAll
.matcher(specTypeDepositoryStr
);
311 String brackets
= matcher
.group(2);
312 brackets
= brackets
.substring(1, brackets
.length()-1);
314 brackets
= brackets
.replace("[mm]", "\u2642\u2642");
315 brackets
= brackets
.replace("[m]", "\u2642");
316 brackets
= brackets
.replace("[ff]", "\u2640\u2640");
317 brackets
= brackets
.replace("[f]", "\u2640");
319 if (brackets
.contains("[") || brackets
.contains("]")){
320 logger
.warn ("There are still '[', ']' in the bracket part: " + brackets
);
323 //TODO replace mm/ff by Unicode male
324 specimen
.setTitleCache(brackets
, true);
325 specTypeDepositoryStr
= matcher
.group(1).trim();
327 return specTypeDepositoryStr
;
334 * @param fieldObservation
337 protected Specimen
makeSingleTypeSpecimen(FieldObservation fieldObservation
) {
338 DerivationEvent derivEvent
= DerivationEvent
.NewInstance();
339 // derivEvent.setType(DerivationEventType.ACCESSIONING());
340 fieldObservation
.addDerivationEvent(derivEvent
);
341 Specimen specimen
= Specimen
.NewInstance();
342 specimen
.setDerivedFrom(derivEvent
);
352 * @throws SQLException
354 protected FieldObservation
makeTypeFieldObservation(GlobisImportState state
,
355 String countryString
) throws SQLException
{
357 DerivedUnitType unitType
= DerivedUnitType
.Specimen
;
358 DerivedUnitFacade facade
= DerivedUnitFacade
.NewInstance(unitType
);
360 WaterbodyOrCountry typeCountry
= getCountry(state
, countryString
);
361 facade
.setCountry(typeCountry
);
362 FieldObservation fieldObservation
= facade
.innerFieldObservation();
363 return fieldObservation
;
374 * @throws SQLException
376 protected void makeTypeDesignation(ZoologicalName name
, ResultSet rs
, Specimen specimen
) throws SQLException
{
378 String specType
= rs
.getString("SpecType");
379 SpecimenTypeDesignationStatus status
= getTypeDesigType(specType
);
381 SpecimenTypeDesignation typeDesignation
= SpecimenTypeDesignation
.NewInstance();
382 typeDesignation
.setTypeStatus(status
);
383 typeDesignation
.setTypeSpecimen(specimen
);
385 name
.addTypeDesignation(typeDesignation
, true);
391 private SpecimenTypeDesignationStatus
getTypeDesigType(String specType
) {
392 if (isBlank(specType
) ){
394 }else if (specType
.matches("Holotype(Holotypus)?")){
395 return SpecimenTypeDesignationStatus
.HOLOTYPE();
396 }else if (specType
.matches("Neotype")){
397 return SpecimenTypeDesignationStatus
.NEOTYPE();
398 }else if (specType
.matches("Syntype(\\(s\\))?")){
399 return SpecimenTypeDesignationStatus
.SYNTYPE();
400 }else if (specType
.matches("Lectotype")){
401 return SpecimenTypeDesignationStatus
.LECTOTYPE();
403 logger
.warn("SpecimenTypeDesignationStatus does not match: " + specType
);
413 * @param referenceMap
417 * @throws SQLException
419 private Reference
<?
> handleNomRef(GlobisImportState state
, Map
<String
, Reference
> referenceMap
, ResultSet rs
,
420 ZoologicalName name
) throws SQLException
{
422 Integer refId
= nullSafeInt(rs
, "fiSpecRefID");
423 Reference
<?
> nomRef
= null;
425 nomRef
= referenceMap
.get(String
.valueOf(refId
));
426 if (nomRef
== null && state
.getConfig().getDoReferences().equals(state
.getConfig().getDoReferences().ALL
)){
427 logger
.warn("Reference " + refId
+ " could not be found.");
428 }else if (nomRef
!= null){
429 name
.setNomenclaturalReference(nomRef
);
434 String refDetail
= rs
.getString("SpecPage");
435 if (isNotBlank(refDetail
)){
436 name
.setNomenclaturalMicroReference(refDetail
);
444 private void validateAcceptedTaxon(Taxon acceptedTaxon
, ResultSet rs
, Integer specTaxId
, Integer acceptedTaxonId
) throws SQLException
{
445 if (acceptedTaxon
== null){
446 logger
.warn("Accepted taxon is null for taxon taxon to validate: ");
451 ZoologicalName name
= CdmBase
.deproxy(acceptedTaxon
.getName(), ZoologicalName
.class);
453 String specName
= rs
.getString("SpecName");
454 if (! name
.getSpecificEpithet().equals(specName
)){
455 logger
.warn(String
.format("Species epithet is not equal for accepted taxon: %s - %s", name
.getSpecificEpithet(), specName
));
463 private Synonym
getSynonym(GlobisImportState state
, ResultSet rs
) throws SQLException
{
465 String rankStr
= rs
.getString("SpecRank");
467 if (isNotBlank(rankStr
)){
469 rank
= Rank
.getRankByNameOrAbbreviation(rankStr
, NomenclaturalCode
.ICZN
, true);
470 } catch (UnknownCdmTypeException e
) {
476 ZoologicalName name
= ZoologicalName
.NewInstance(rank
);
477 makeNamePartsAndCache(state
, rs
, rankStr
, name
);
480 // name.setGenusOrUninomial(genusOrUninomial);
481 String authorStr
= rs
.getString("SpecAuthor");
482 String yearStr
= rs
.getString("SpecYear");
483 String authorAndYearStr
= CdmUtils
.concat(", ", authorStr
, yearStr
);
484 handleAuthorAndYear(authorAndYearStr
, name
);
486 Synonym synonym
= Synonym
.NewInstance(name
, state
.getTransactionalSourceReference());
494 private void makeNamePartsAndCache(GlobisImportState state
, ResultSet rs
, String rank
, ZoologicalName name
) throws SQLException
{
495 String citedFamily
= rs
.getString("SpecCitedFamily");
496 String citedGenus
= rs
.getString("SpecCitedGenus");
497 String citedSpecies
= rs
.getString("SpecCitedSpecies");
498 String citedSubspecies
= rs
.getString("SpecCitedSubspecies");
499 String lastEpithet
= rs
.getString("SpecName");
502 String cache
= CdmUtils
.concat(" ", new String
[]{citedFamily
, citedGenus
, citedSpecies
, citedSubspecies
, rank
, lastEpithet
});
503 name
.setGenusOrUninomial(citedGenus
);
504 //TODO sperate authors
505 if (isBlank(citedSpecies
)){
506 name
.setSpecificEpithet(lastEpithet
);
508 name
.setSpecificEpithet(citedSpecies
);
509 if (isBlank(citedSubspecies
)){
510 name
.setInfraSpecificEpithet(lastEpithet
);
514 //TODO check if cache needs protection
515 name
.setNameCache(cache
, true);
521 private boolean isInfraSpecies(GlobisImportState state
, ResultSet rs
, Rank rank
) {
522 // TODO Auto-generated method stub
529 private Reference
<?
> getJournal(GlobisImportState state
, ResultSet rs
, String refJournal
) throws SQLException
{
532 Reference
<?
> journal
= ReferenceFactory
.newJournal();
533 String issn
= rs
.getString("RefISSN");
534 if (StringUtils
.isNotBlank(issn
)){
535 issn
.replaceAll("ISSN", "").trim();
536 journal
.setIssn(issn
);
542 journal
.setTitle(refJournal
);
550 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
552 public Reference
<?
> createObject(ResultSet rs
, GlobisImportState state
)
553 throws SQLException
{
555 String refType
= rs
.getString("RefType");
556 if (refType
== null){
557 ref
= ReferenceFactory
.newGeneric();
558 }else if (refType
== "book"){
559 ref
= ReferenceFactory
.newBook();
560 }else if (refType
== "paper in journal"){
561 ref
= ReferenceFactory
.newArticle();
562 }else if (refType
.startsWith("unpublished") ){
563 ref
= ReferenceFactory
.newGeneric();
564 }else if (refType
.endsWith("paper in journal")){
565 ref
= ReferenceFactory
.newArticle();
566 }else if (refType
== "paper in book"){
567 ref
= ReferenceFactory
.newBookSection();
568 }else if (refType
== "paper in journalwebsite"){
569 ref
= ReferenceFactory
.newArticle();
571 logger
.warn("Unknown reference type: " + refType
);
572 ref
= ReferenceFactory
.newGeneric();
578 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
580 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
584 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
586 Set
<String
> taxonIdSet
= new HashSet
<String
>();
587 Set
<String
> referenceIdSet
= new HashSet
<String
>();
590 handleForeignKey(rs
, taxonIdSet
, "SpecCurrspecID");
591 handleForeignKey(rs
, referenceIdSet
, "fiSpecRefID");
595 nameSpace
= TAXON_NAMESPACE
;
596 cdmClass
= Taxon
.class;
598 Map
<String
, Taxon
> objectMap
= (Map
<String
, Taxon
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
599 result
.put(nameSpace
, objectMap
);
602 nameSpace
= REFERENCE_NAMESPACE
;
603 cdmClass
= Reference
.class;
604 idSet
= referenceIdSet
;
605 Map
<String
, Reference
> referenceMap
= (Map
<String
, Reference
>)getCommonService().getSourcedObjectsByIdInSource(cdmClass
, idSet
, nameSpace
);
606 result
.put(nameSpace
, referenceMap
);
609 } catch (SQLException e
) {
610 throw new RuntimeException(e
);
616 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
619 protected boolean doCheck(GlobisImportState state
){
620 IOValidator
<GlobisImportState
> validator
= new GlobisSpecTaxaImportValidator();
621 return validator
.validate(state
);
626 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
628 protected boolean isIgnore(GlobisImportState state
){
629 return ! state
.getConfig().isDoSpecTaxa();