2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.pesi
.erms
;
12 import java
.sql
.ResultSet
;
13 import java
.sql
.SQLException
;
14 import java
.util
.HashMap
;
15 import java
.util
.HashSet
;
18 import java
.util
.UUID
;
20 import org
.apache
.log4j
.Logger
;
21 import org
.springframework
.stereotype
.Component
;
23 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
24 import eu
.etaxonomy
.cdm
.io
.common
.IOValidator
;
25 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.DbIgnoreMapper
;
26 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.DbImportExtensionMapper
;
27 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.DbImportLsidMapper
;
28 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.DbImportMapping
;
29 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.DbImportObjectCreationMapper
;
30 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.DbImportStringMapper
;
31 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.DbNotYetImplementedMapper
;
32 import eu
.etaxonomy
.cdm
.io
.common
.mapping
.IMappingImport
;
33 import eu
.etaxonomy
.cdm
.io
.pesi
.erms
.validation
.ErmsTaxonImportValidator
;
34 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
35 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
36 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
37 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
38 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
39 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
40 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
41 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
50 public class ErmsTaxonImport
extends ErmsImportBase
<TaxonBase
> implements IMappingImport
<TaxonBase
, ErmsImportState
>{
51 private static final Logger logger
= Logger
.getLogger(ErmsTaxonImport
.class);
53 public static final UUID TNS_EXT_UUID
= UUID
.fromString("41cb0450-ac84-4d73-905e-9c7773c23b05");
55 private DbImportMapping mapping
;
57 //second path is not used anymore, there is now an ErmsTaxonRelationImport class instead
58 private boolean isSecondPath
= false;
60 private int modCount
= 10000;
61 private static final String pluralString
= "taxa";
62 private static final String dbTableName
= "tu";
63 private static final Class cdmTargetClass
= TaxonBase
.class;
65 public ErmsTaxonImport(){
66 super(pluralString
, dbTableName
, cdmTargetClass
);
72 // * @see eu.etaxonomy.cdm.io.erms.ErmsImportBase#getIdQuery()
75 // protected String getIdQuery() {
76 // String strQuery = " SELECT id FROM tu WHERE id < 300000 " ;
82 * @see eu.etaxonomy.cdm.io.erms.ErmsImportBase#getMapping()
84 protected DbImportMapping
getMapping() {
86 mapping
= new DbImportMapping();
88 mapping
.addMapper(DbImportObjectCreationMapper
.NewInstance(this, "id", TAXON_NAMESPACE
)); //id + tu_status
89 UUID tsnUuid
= ErmsTransformer
.uuidTsn
;
90 mapping
.addMapper(DbImportLsidMapper
.NewInstance("GUID", "lsid"));
92 mapping
.addMapper(DbImportExtensionMapper
.NewInstance("tsn", tsnUuid
, "TSN", "TSN", "TSN"));
93 // mapping.addMapper(DbImportStringMapper.NewInstance("tu_name", "(NonViralName)name.nameCache"));
95 UUID displayNameUuid
= ErmsTransformer
.uuidDisplayName
;
96 mapping
.addMapper(DbImportExtensionMapper
.NewInstance("tu_displayname", displayNameUuid
, "display name", "display name", "display name"));
97 UUID fuzzyNameUuid
= ErmsTransformer
.uuidFuzzyName
;
98 mapping
.addMapper(DbImportExtensionMapper
.NewInstance("tu_fuzzyname", fuzzyNameUuid
, "fuzzy name", "fuzzy name", "fuzzy name"));
99 mapping
.addMapper(DbImportStringMapper
.NewInstance("tu_authority", "(NonViralName)name.authorshipCache"));
101 UUID fossilStatusUuid
= ErmsTransformer
.uuidFossilStatus
;
102 mapping
.addMapper(DbImportExtensionMapper
.NewInstance("fossil_name", fossilStatusUuid
, "fossil status", "fossil status", "fos. stat."));
103 // mapping.addMapper(DbImportExtensionTypeCreationMapper.NewInstance("fossil_name", EXTENSION_TYPE_NAMESPACE, "fossil_name", "fossil_name", "fossil_name"));
105 UUID unacceptUuid
= ErmsTransformer
.uuidUnacceptReason
;
106 mapping
.addMapper(DbImportExtensionMapper
.NewInstance("tu_unacceptreason", unacceptUuid
, "unaccept reason", "unaccept reason", "reason"));
108 UUID qualityUuid
= ErmsTransformer
.uuidQualityStatus
;
109 mapping
.addMapper(DbImportExtensionMapper
.NewInstance("qualitystatus_name", qualityUuid
, "quality status", "quality status", "quality status")); //checked by Tax Editor ERMS1.1, Added by db management team (2x), checked by Tax Editor
112 // UUID hiddenUuid = ErmsTransformer.uuidHidden;
113 // mapping.addMapper(DbImportMarkerCreationMapper.Mapper.NewInstance("qualitystatus_name", qualityUuid, "quality status", "quality status", "quality status")); //checked by Tax Editor ERMS1.1, Added by db management team (2x), checked by Tax Editor
115 //not yet implemented
116 mapping
.addMapper(DbNotYetImplementedMapper
.NewInstance("tu_sp", "included in rank/object creation"));
120 mapping
.addMapper(DbIgnoreMapper
.NewInstance("tu_marine", "marine flag not implemented in PESI"));
121 mapping
.addMapper(DbIgnoreMapper
.NewInstance("tu_brackish", "brackish flag not implemented in PESI"));
122 mapping
.addMapper(DbIgnoreMapper
.NewInstance("tu_fresh", "freshwater flag not implemented in PESI"));
123 mapping
.addMapper(DbIgnoreMapper
.NewInstance("tu_terrestrial", "terrestrial flag not implemented in PESI"));
124 mapping
.addMapper(DbIgnoreMapper
.NewInstance("tu_fossil", "tu_fossil implemented as foreign key"));
125 mapping
.addMapper(DbIgnoreMapper
.NewInstance("cache_citation", "citation cache not needed in PESI"));
130 //not in current version anymore
131 // mapping.addMapper(DbNotYetImplementedMapper.NewInstance("tu_hidden", "Needs DbImportMarkerMapper implemented"));
132 // UUID completenessUuid = ErmsTransformer.uuidCompleteness;
133 // x mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_completeness", completenessUuid, "completeness", "completeness", "completeness")); //null, unknown, tmpflag, tmp2, tmp3, complete
134 // UUID credibilityUuid = ErmsTransformer.uuidCredibility;
135 // x mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_credibility", credibilityUuid, "credibility", "credibility", "credibility")); //Werte: null, unknown, marked for deletion
139 // //second path / implemented in ErmsTaxonRelationImport
140 // DbImportMapping secondPathMapping = new DbImportMapping();
141 // secondPathMapping.addMapper(DbImportTaxIncludedInMapper.NewInstance("id", "tu_parent", TAXON_NAMESPACE, null)); //there is only one tree
142 // secondPathMapping.addMapper(DbImportSynonymMapper.NewInstance("id", "tu_acctaxon", TAXON_NAMESPACE, null));
143 // secondPathMapping.addMapper(DbImportNameTypeDesignationMapper.NewInstance("id", "tu_typetaxon", NAME_NAMESPACE, "tu_typedesignationstatus"));
144 // secondPathMapping.addMapper(DbNotYetImplementedMapper.NewInstance("tu_acctaxon"));
145 // mapping.setSecondPathMapping(secondPathMapping);
152 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
155 protected String
getRecordQuery(ErmsImportConfigurator config
) {
156 String strSelect
= " SELECT tu.*, parent1.tu_name AS parent1name, parent2.tu_name AS parent2name, parent3.tu_name AS parent3name, "
157 + " parent1.tu_rank AS parent1rank, parent2.tu_rank AS parent2rank, parent3.tu_rank AS parent3rank, " +
158 " status.status_id as status_id, fossil.fossil_name, qualitystatus.qualitystatus_name";
159 String strFrom
= " FROM tu LEFT OUTER JOIN tu AS parent1 ON parent1.id = tu.tu_parent " +
160 " LEFT OUTER JOIN tu AS parent2 ON parent2.id = parent1.tu_parent " +
161 " LEFT OUTER JOIN tu AS parent3 ON parent2.tu_parent = parent3.id " +
162 " LEFT OUTER JOIN status ON tu.tu_status = status.status_id " +
163 " LEFT OUTER JOIN fossil ON tu.tu_fossil = fossil.fossil_id " +
164 " LEFT OUTER JOIN qualitystatus ON tu.tu_qualitystatus = qualitystatus.id ";
165 String strWhere
= " WHERE ( tu.id IN (" + ID_LIST_TOKEN
+ ") )";
166 String strRecordQuery
= strSelect
+ strFrom
+ strWhere
;
167 return strRecordQuery
;
175 // private String getSecondPathRecordQuery(ErmsImportConfigurator config) {
176 // //TODO get automatic by second path mappers
177 // String selectAttributes = "id, tu_parent, tu_typetaxon, tu_typetaxon, tu_typedesignation, tu_acctaxon, tu_status";
178 // String strRecordQuery =
179 // " SELECT " + selectAttributes +
181 // " WHERE ( tu.id IN (" + ID_LIST_TOKEN + ") )";
182 // return strRecordQuery;
186 // private String getSecondPathIdQuery(){
187 // return getIdQuery();
191 * @see eu.etaxonomy.cdm.io.erms.ErmsImportBase#doInvoke(eu.etaxonomy.cdm.io.erms.ErmsImportState)
194 protected void doInvoke(ErmsImportState state
) {
196 super.doInvoke(state
);
199 // isSecondPath = true;
200 // ErmsImportConfigurator config = state.getConfig();
201 // Source source = config.getSource();
203 // String strIdQuery = getSecondPathIdQuery();
204 // String strRecordQuery = getSecondPathRecordQuery(config);
206 // int recordsPerTransaction = config.getRecordsPerTransaction();
208 // ResultSetPartitioner partitioner = ResultSetPartitioner.NewInstance(source, strIdQuery, strRecordQuery, recordsPerTransaction);
209 // while (partitioner.nextPartition()){
210 // partitioner.doPartition(this, state);
212 // } catch (SQLException e) {
213 // logger.error("SQLException:" + e);
217 // isSecondPath = false;
219 // logger.info("end make " + getPluralString() + " ... " + getSuccessString(success));
227 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
229 public Map
<Object
, Map
<String
, ?
extends CdmBase
>> getRelatedObjectsForPartition(ResultSet rs
) {
233 Map
<Object
, Map
<String
, ?
extends CdmBase
>> result
= new HashMap
<Object
, Map
<String
, ?
extends CdmBase
>>();
236 Set
<String
> nameIdSet
= new HashSet
<String
>();
237 Set
<String
> referenceIdSet
= new HashSet
<String
>();
239 // handleForeignKey(rs, nameIdSet, "PTNameFk");
240 // handleForeignKey(rs, referenceIdSet, "PTRefFk");
244 // nameSpace = "Reference";
245 // cdmClass = Reference.class;
246 // Map<String, Person> referenceMap = (Map<String, Person>)getCommonService().getSourcedObjectsByIdInSource(Person.class, teamIdSet, nameSpace);
247 // result.put(Reference.class, referenceMap);
249 } catch (SQLException e
) {
250 throw new RuntimeException(e
);
257 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet)
259 public TaxonBase
createObject(ResultSet rs
, ErmsImportState state
) throws SQLException
{
260 int statusId
= rs
.getInt("status_id");
261 String tuName
= rs
.getString("tu_name");
262 String displayName
= rs
.getString("tu_displayname");
264 String parent1Name
= rs
.getString("parent1name");
265 Integer parent1Rank
= rs
.getInt("parent1rank");
267 String parent2Name
= rs
.getString("parent2name");
268 Integer parent2Rank
= rs
.getInt("parent2rank");
270 String parent3Name
= rs
.getString("parent3name");
271 Integer parent3Rank
= rs
.getInt("parent3rank");
274 NonViralName taxonName
= getTaxonName(rs
, state
);
276 if (taxonName
.isGenus() || taxonName
.isSupraGeneric()){
277 taxonName
.setGenusOrUninomial(tuName
);
278 }else if (taxonName
.isInfraGeneric()){
279 taxonName
.setInfraGenericEpithet(tuName
);
280 taxonName
.setGenusOrUninomial(parent1Name
);
281 }else if (taxonName
.isSpecies()){
282 taxonName
.setSpecificEpithet(tuName
);
283 getGenusAndInfraGenus(parent1Name
, parent2Name
, parent1Rank
, taxonName
);
284 }else if (taxonName
.isInfraSpecific()){
285 if (parent1Rank
< 220){
286 handleException(parent1Rank
, taxonName
, displayName
);
288 taxonName
.setInfraSpecificEpithet(tuName
);
289 taxonName
.setSpecificEpithet(parent1Name
);
290 getGenusAndInfraGenus(parent2Name
, parent3Name
, parent2Rank
, taxonName
);
291 }else if (taxonName
.getRank()== null){
292 logger
.warn("rank super domain still needs to be implemented. Used domain instead.");
293 if ("Biota".equalsIgnoreCase(tuName
)){
294 Rank rank
= Rank
.DOMAIN(); //should be Superdomain
295 taxonName
.setRank(rank
);
296 taxonName
.setGenusOrUninomial(tuName
);
298 String warning
= "TaxonName has no rank. Use namecache.";
299 logger
.warn(warning
);
300 taxonName
.setNameCache(tuName
);
304 //e.g. Leucon [Platyhelminthes] ornatus
305 if (containsBrackets(displayName
)){
306 taxonName
.setNameCache(displayName
);
307 logger
.warn("Set name cache: " + displayName
);
310 //add original source for taxon name (taxon original source is added in mapper
311 Reference citation
= state
.getConfig().getSourceReference();
312 addOriginalSource(rs
, taxonName
, "id", NAME_NAMESPACE
, citation
);
314 // taxonName.setNameCache("Test");
316 ErmsImportConfigurator config
= state
.getConfig();
317 Reference sec
= config
.getSourceReference();
319 return Taxon
.NewInstance(taxonName
, sec
);
321 return Synonym
.NewInstance(taxonName
, sec
);
332 private void handleException(Integer parent1Rank
, NonViralName taxonName
, String displayName
) {
333 logger
.warn("Parent of infra specific taxon is higher than species. Used nameCache: " + displayName
) ;
334 taxonName
.setNameCache(displayName
);
343 private boolean containsBrackets(String displayName
) {
344 int index
= displayName
.indexOf("[");
356 private void getGenusAndInfraGenus(String parentName
, String grandParentName
, Integer parent1Rank
, NonViralName taxonName
) {
357 if (parent1Rank
<220 && parent1Rank
> 180){
358 //parent is infrageneric
359 taxonName
.setInfraGenericEpithet(parentName
);
360 taxonName
.setGenusOrUninomial(grandParentName
);
362 taxonName
.setGenusOrUninomial(parentName
);
369 * @throws SQLException
371 private NonViralName
getTaxonName(ResultSet rs
, ErmsImportState state
) throws SQLException
{
373 Integer kingdomId
= parseKingdomId(rs
);
374 Integer intRank
= rs
.getInt("tu_rank");
376 NomenclaturalCode nc
= ErmsTransformer
.kingdomId2NomCode(kingdomId
);
378 if (kingdomId
!= null){
379 rank
= state
.getRank(intRank
, kingdomId
);
381 logger
.warn("KingdomId is null");
384 logger
.warn("Rank is null. KingdomId: " + kingdomId
+ ", rankId: " + intRank
);
387 result
= (NonViralName
)nc
.getNewTaxonNameInstance(rank
);
389 result
= NonViralName
.NewInstance(rank
);
396 * Returns the kingdom id by extracting it from the second character in the <code>tu_sp</code>
397 * attribute. If the attribute can not be parsed to a valid id <code>null</code>
398 * is returned. If the attribute is <code>null</code> the id of the record is returned.
401 * @throws SQLException
403 private int parseKingdomId(ResultSet rs
) throws SQLException
{
404 Integer result
= null;
405 String treeString
= rs
.getString("tu_sp");
406 if (treeString
!= null){
407 if (CdmUtils
.isNotEmpty(treeString
) && treeString
.length() > 1){
408 String strKingdom
= treeString
.substring(1,2);
410 if (! treeString
.substring(0, 1).equals("#") && ! treeString
.substring(2, 3).equals("#") ){
411 logger
.warn("Tree string " + treeString
+ " has no recognized format");
414 result
= Integer
.valueOf(strKingdom
);
415 } catch (NumberFormatException e
) {
416 logger
.warn("Kingdom string " + strKingdom
+ "could not be recognized as a valid number");
421 Integer tu_id
= rs
.getInt("id");
429 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
432 protected boolean doCheck(ErmsImportState state
){
433 IOValidator
<ErmsImportState
> validator
= new ErmsTaxonImportValidator();
434 return validator
.validate(state
);
439 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
441 protected boolean isIgnore(ErmsImportState state
){
442 return ! state
.getConfig().isDoTaxa();