some small improvements on PESI
[cdmlib-apps.git] / cdm-pesi / src / main / java / eu / etaxonomy / cdm / io / pesi / erms / ErmsTaxonImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.pesi.erms;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.UUID;
19
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.io.common.IOValidator;
25 import eu.etaxonomy.cdm.io.common.mapping.DbIgnoreMapper;
26 import eu.etaxonomy.cdm.io.common.mapping.DbImportExtensionMapper;
27 import eu.etaxonomy.cdm.io.common.mapping.DbImportLsidMapper;
28 import eu.etaxonomy.cdm.io.common.mapping.DbImportMapping;
29 import eu.etaxonomy.cdm.io.common.mapping.DbImportObjectCreationMapper;
30 import eu.etaxonomy.cdm.io.common.mapping.DbImportStringMapper;
31 import eu.etaxonomy.cdm.io.common.mapping.DbNotYetImplementedMapper;
32 import eu.etaxonomy.cdm.io.common.mapping.IMappingImport;
33 import eu.etaxonomy.cdm.io.pesi.erms.validation.ErmsTaxonImportValidator;
34 import eu.etaxonomy.cdm.model.common.CdmBase;
35 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
36 import eu.etaxonomy.cdm.model.name.NonViralName;
37 import eu.etaxonomy.cdm.model.name.Rank;
38 import eu.etaxonomy.cdm.model.reference.Reference;
39 import eu.etaxonomy.cdm.model.taxon.Synonym;
40 import eu.etaxonomy.cdm.model.taxon.Taxon;
41 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
42
43
44 /**
45 * @author a.mueller
46 * @created 20.02.2010
47 * @version 1.0
48 */
49 @Component
50 public class ErmsTaxonImport extends ErmsImportBase<TaxonBase> implements IMappingImport<TaxonBase, ErmsImportState>{
51 private static final Logger logger = Logger.getLogger(ErmsTaxonImport.class);
52
53 public static final UUID TNS_EXT_UUID = UUID.fromString("41cb0450-ac84-4d73-905e-9c7773c23b05");
54
55 private DbImportMapping mapping;
56
57 //second path is not used anymore, there is now an ErmsTaxonRelationImport class instead
58 private boolean isSecondPath = false;
59
60 private int modCount = 10000;
61 private static final String pluralString = "taxa";
62 private static final String dbTableName = "tu";
63 private static final Class cdmTargetClass = TaxonBase.class;
64
65 public ErmsTaxonImport(){
66 super(pluralString, dbTableName, cdmTargetClass);
67 }
68
69
70
71 // /* (non-Javadoc)
72 // * @see eu.etaxonomy.cdm.io.erms.ErmsImportBase#getIdQuery()
73 // */
74 // @Override
75 // protected String getIdQuery() {
76 // String strQuery = " SELECT id FROM tu WHERE id < 300000 " ;
77 // return strQuery;
78 // }
79
80
81 /* (non-Javadoc)
82 * @see eu.etaxonomy.cdm.io.erms.ErmsImportBase#getMapping()
83 */
84 protected DbImportMapping getMapping() {
85 if (mapping == null){
86 mapping = new DbImportMapping();
87
88 mapping.addMapper(DbImportObjectCreationMapper.NewInstance(this, "id", TAXON_NAMESPACE)); //id + tu_status
89 UUID tsnUuid = ErmsTransformer.uuidTsn;
90 mapping.addMapper(DbImportLsidMapper.NewInstance("GUID", "lsid"));
91
92 mapping.addMapper(DbImportExtensionMapper.NewInstance("tsn", tsnUuid, "TSN", "TSN", "TSN"));
93 // mapping.addMapper(DbImportStringMapper.NewInstance("tu_name", "(NonViralName)name.nameCache"));
94
95 UUID displayNameUuid = ErmsTransformer.uuidDisplayName;
96 mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_displayname", displayNameUuid, "display name", "display name", "display name"));
97 UUID fuzzyNameUuid = ErmsTransformer.uuidFuzzyName;
98 mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_fuzzyname", fuzzyNameUuid, "fuzzy name", "fuzzy name", "fuzzy name"));
99 mapping.addMapper(DbImportStringMapper.NewInstance("tu_authority", "(NonViralName)name.authorshipCache"));
100
101 UUID fossilStatusUuid = ErmsTransformer.uuidFossilStatus;
102 mapping.addMapper(DbImportExtensionMapper.NewInstance("fossil_name", fossilStatusUuid, "fossil status", "fossil status", "fos. stat."));
103 // mapping.addMapper(DbImportExtensionTypeCreationMapper.NewInstance("fossil_name", EXTENSION_TYPE_NAMESPACE, "fossil_name", "fossil_name", "fossil_name"));
104
105 UUID unacceptUuid = ErmsTransformer.uuidUnacceptReason;
106 mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_unacceptreason", unacceptUuid, "unaccept reason", "unaccept reason", "reason"));
107
108 UUID qualityUuid = ErmsTransformer.uuidQualityStatus;
109 mapping.addMapper(DbImportExtensionMapper.NewInstance("qualitystatus_name", qualityUuid, "quality status", "quality status", "quality status")); //checked by Tax Editor ERMS1.1, Added by db management team (2x), checked by Tax Editor
110
111
112 // UUID hiddenUuid = ErmsTransformer.uuidHidden;
113 // mapping.addMapper(DbImportMarkerCreationMapper.Mapper.NewInstance("qualitystatus_name", qualityUuid, "quality status", "quality status", "quality status")); //checked by Tax Editor ERMS1.1, Added by db management team (2x), checked by Tax Editor
114
115 //not yet implemented
116 mapping.addMapper(DbNotYetImplementedMapper.NewInstance("tu_sp", "included in rank/object creation"));
117
118
119 //ignore
120 mapping.addMapper(DbIgnoreMapper.NewInstance("tu_marine", "marine flag not implemented in PESI"));
121 mapping.addMapper(DbIgnoreMapper.NewInstance("tu_brackish", "brackish flag not implemented in PESI"));
122 mapping.addMapper(DbIgnoreMapper.NewInstance("tu_fresh", "freshwater flag not implemented in PESI"));
123 mapping.addMapper(DbIgnoreMapper.NewInstance("tu_terrestrial", "terrestrial flag not implemented in PESI"));
124 mapping.addMapper(DbIgnoreMapper.NewInstance("tu_fossil", "tu_fossil implemented as foreign key"));
125 mapping.addMapper(DbIgnoreMapper.NewInstance("cache_citation", "citation cache not needed in PESI"));
126
127
128
129
130 //not in current version anymore
131 // mapping.addMapper(DbNotYetImplementedMapper.NewInstance("tu_hidden", "Needs DbImportMarkerMapper implemented"));
132 // UUID completenessUuid = ErmsTransformer.uuidCompleteness;
133 // x mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_completeness", completenessUuid, "completeness", "completeness", "completeness")); //null, unknown, tmpflag, tmp2, tmp3, complete
134 // UUID credibilityUuid = ErmsTransformer.uuidCredibility;
135 // x mapping.addMapper(DbImportExtensionMapper.NewInstance("tu_credibility", credibilityUuid, "credibility", "credibility", "credibility")); //Werte: null, unknown, marked for deletion
136
137
138
139 // //second path / implemented in ErmsTaxonRelationImport
140 // DbImportMapping secondPathMapping = new DbImportMapping();
141 // secondPathMapping.addMapper(DbImportTaxIncludedInMapper.NewInstance("id", "tu_parent", TAXON_NAMESPACE, null)); //there is only one tree
142 // secondPathMapping.addMapper(DbImportSynonymMapper.NewInstance("id", "tu_acctaxon", TAXON_NAMESPACE, null));
143 // secondPathMapping.addMapper(DbImportNameTypeDesignationMapper.NewInstance("id", "tu_typetaxon", NAME_NAMESPACE, "tu_typedesignationstatus"));
144 // secondPathMapping.addMapper(DbNotYetImplementedMapper.NewInstance("tu_acctaxon"));
145 // mapping.setSecondPathMapping(secondPathMapping);
146
147 }
148 return mapping;
149 }
150
151 /* (non-Javadoc)
152 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
153 */
154 @Override
155 protected String getRecordQuery(ErmsImportConfigurator config) {
156 String strSelect = " SELECT tu.*, parent1.tu_name AS parent1name, parent2.tu_name AS parent2name, parent3.tu_name AS parent3name, "
157 + " parent1.tu_rank AS parent1rank, parent2.tu_rank AS parent2rank, parent3.tu_rank AS parent3rank, " +
158 " status.status_id as status_id, fossil.fossil_name, qualitystatus.qualitystatus_name";
159 String strFrom = " FROM tu LEFT OUTER JOIN tu AS parent1 ON parent1.id = tu.tu_parent " +
160 " LEFT OUTER JOIN tu AS parent2 ON parent2.id = parent1.tu_parent " +
161 " LEFT OUTER JOIN tu AS parent3 ON parent2.tu_parent = parent3.id " +
162 " LEFT OUTER JOIN status ON tu.tu_status = status.status_id " +
163 " LEFT OUTER JOIN fossil ON tu.tu_fossil = fossil.fossil_id " +
164 " LEFT OUTER JOIN qualitystatus ON tu.tu_qualitystatus = qualitystatus.id ";
165 String strWhere = " WHERE ( tu.id IN (" + ID_LIST_TOKEN + ") )";
166 String strRecordQuery = strSelect + strFrom + strWhere;
167 return strRecordQuery;
168 }
169
170
171 // /**
172 // * @param config
173 // * @return
174 // */
175 // private String getSecondPathRecordQuery(ErmsImportConfigurator config) {
176 // //TODO get automatic by second path mappers
177 // String selectAttributes = "id, tu_parent, tu_typetaxon, tu_typetaxon, tu_typedesignation, tu_acctaxon, tu_status";
178 // String strRecordQuery =
179 // " SELECT " + selectAttributes +
180 // " FROM tu " +
181 // " WHERE ( tu.id IN (" + ID_LIST_TOKEN + ") )";
182 // return strRecordQuery;
183 // }
184
185
186 // private String getSecondPathIdQuery(){
187 // return getIdQuery();
188 // }
189
190 /* (non-Javadoc)
191 * @see eu.etaxonomy.cdm.io.erms.ErmsImportBase#doInvoke(eu.etaxonomy.cdm.io.erms.ErmsImportState)
192 */
193 @Override
194 protected void doInvoke(ErmsImportState state) {
195 //first path
196 super.doInvoke(state);
197
198 // //second path
199 // isSecondPath = true;
200 // ErmsImportConfigurator config = state.getConfig();
201 // Source source = config.getSource();
202 //
203 // String strIdQuery = getSecondPathIdQuery();
204 // String strRecordQuery = getSecondPathRecordQuery(config);
205 //
206 // int recordsPerTransaction = config.getRecordsPerTransaction();
207 // try{
208 // ResultSetPartitioner partitioner = ResultSetPartitioner.NewInstance(source, strIdQuery, strRecordQuery, recordsPerTransaction);
209 // while (partitioner.nextPartition()){
210 // partitioner.doPartition(this, state);
211 // }
212 // } catch (SQLException e) {
213 // logger.error("SQLException:" + e);
214 // return false;
215 // }
216 //
217 // isSecondPath = false;
218 //
219 // logger.info("end make " + getPluralString() + " ... " + getSuccessString(success));
220 return;
221
222 }
223
224
225
226 /* (non-Javadoc)
227 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
228 */
229 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
230 String nameSpace;
231 Class cdmClass;
232 Set<String> idSet;
233 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
234
235 try{
236 Set<String> nameIdSet = new HashSet<String>();
237 Set<String> referenceIdSet = new HashSet<String>();
238 while (rs.next()){
239 // handleForeignKey(rs, nameIdSet, "PTNameFk");
240 // handleForeignKey(rs, referenceIdSet, "PTRefFk");
241 }
242
243 //reference map
244 // nameSpace = "Reference";
245 // cdmClass = Reference.class;
246 // Map<String, Person> referenceMap = (Map<String, Person>)getCommonService().getSourcedObjectsByIdInSource(Person.class, teamIdSet, nameSpace);
247 // result.put(Reference.class, referenceMap);
248
249 } catch (SQLException e) {
250 throw new RuntimeException(e);
251 }
252 return result;
253 }
254
255
256 /* (non-Javadoc)
257 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet)
258 */
259 public TaxonBase createObject(ResultSet rs, ErmsImportState state) throws SQLException {
260 int statusId = rs.getInt("status_id");
261 String tuName = rs.getString("tu_name");
262 String displayName = rs.getString("tu_displayname");
263
264 String parent1Name = rs.getString("parent1name");
265 Integer parent1Rank = rs.getInt("parent1rank");
266
267 String parent2Name = rs.getString("parent2name");
268 Integer parent2Rank = rs.getInt("parent2rank");
269
270 String parent3Name = rs.getString("parent3name");
271 Integer parent3Rank = rs.getInt("parent3rank");
272
273
274 NonViralName taxonName = getTaxonName(rs, state);
275 //set epithets
276 if (taxonName.isGenus() || taxonName.isSupraGeneric()){
277 taxonName.setGenusOrUninomial(tuName);
278 }else if (taxonName.isInfraGeneric()){
279 taxonName.setInfraGenericEpithet(tuName);
280 taxonName.setGenusOrUninomial(parent1Name);
281 }else if (taxonName.isSpecies()){
282 taxonName.setSpecificEpithet(tuName);
283 getGenusAndInfraGenus(parent1Name, parent2Name, parent1Rank, taxonName);
284 }else if (taxonName.isInfraSpecific()){
285 if (parent1Rank < 220){
286 handleException(parent1Rank, taxonName, displayName);
287 }
288 taxonName.setInfraSpecificEpithet(tuName);
289 taxonName.setSpecificEpithet(parent1Name);
290 getGenusAndInfraGenus(parent2Name, parent3Name, parent2Rank, taxonName);
291 }else if (taxonName.getRank()== null){
292 logger.warn("rank super domain still needs to be implemented. Used domain instead.");
293 if ("Biota".equalsIgnoreCase(tuName)){
294 Rank rank = Rank.DOMAIN(); //should be Superdomain
295 taxonName.setRank(rank);
296 taxonName.setGenusOrUninomial(tuName);
297 }else{
298 String warning = "TaxonName has no rank. Use namecache.";
299 logger.warn(warning);
300 taxonName.setNameCache(tuName);
301 }
302
303 }
304 //e.g. Leucon [Platyhelminthes] ornatus
305 if (containsBrackets(displayName)){
306 taxonName.setNameCache(displayName);
307 logger.warn("Set name cache: " + displayName);
308 }
309
310 //add original source for taxon name (taxon original source is added in mapper
311 Reference citation = state.getConfig().getSourceReference();
312 addOriginalSource(rs, taxonName, "id", NAME_NAMESPACE, citation);
313
314 // taxonName.setNameCache("Test");
315
316 ErmsImportConfigurator config = state.getConfig();
317 Reference sec = config.getSourceReference();
318 if (statusId == 1){
319 return Taxon.NewInstance(taxonName, sec);
320 }else{
321 return Synonym.NewInstance(taxonName, sec);
322 }
323 }
324
325
326
327 /**
328 * @param parent1Rank
329 * @param displayName
330 * @param taxonName
331 */
332 private void handleException(Integer parent1Rank, NonViralName taxonName, String displayName) {
333 logger.warn("Parent of infra specific taxon is higher than species. Used nameCache: " + displayName) ;
334 taxonName.setNameCache(displayName);
335 }
336
337
338
339 /**
340 * @param displayName
341 * @return
342 */
343 private boolean containsBrackets(String displayName) {
344 int index = displayName.indexOf("[");
345 return (index > -1);
346 }
347
348
349
350 /**
351 * @param parent1Name
352 * @param parent2Name
353 * @param parent1Rank
354 * @param taxonName
355 */
356 private void getGenusAndInfraGenus(String parentName, String grandParentName, Integer parent1Rank, NonViralName taxonName) {
357 if (parent1Rank <220 && parent1Rank > 180){
358 //parent is infrageneric
359 taxonName.setInfraGenericEpithet(parentName);
360 taxonName.setGenusOrUninomial(grandParentName);
361 }else{
362 taxonName.setGenusOrUninomial(parentName);
363 }
364 }
365
366 /**
367 * @param rs
368 * @return
369 * @throws SQLException
370 */
371 private NonViralName getTaxonName(ResultSet rs, ErmsImportState state) throws SQLException {
372 NonViralName result;
373 Integer kingdomId = parseKingdomId(rs);
374 Integer intRank = rs.getInt("tu_rank");
375
376 NomenclaturalCode nc = ErmsTransformer.kingdomId2NomCode(kingdomId);
377 Rank rank = null;
378 if (kingdomId != null){
379 rank = state.getRank(intRank, kingdomId);
380 }else{
381 logger.warn("KingdomId is null");
382 }
383 if (rank == null){
384 logger.warn("Rank is null. KingdomId: " + kingdomId + ", rankId: " + intRank);
385 }
386 if (nc != null){
387 result = (NonViralName)nc.getNewTaxonNameInstance(rank);
388 }else{
389 result = NonViralName.NewInstance(rank);
390 }
391
392 return result;
393 }
394
395 /**
396 * Returns the kingdom id by extracting it from the second character in the <code>tu_sp</code>
397 * attribute. If the attribute can not be parsed to a valid id <code>null</code>
398 * is returned. If the attribute is <code>null</code> the id of the record is returned.
399 * @param rs
400 * @return
401 * @throws SQLException
402 */
403 private int parseKingdomId(ResultSet rs) throws SQLException {
404 Integer result = null;
405 String treeString = rs.getString("tu_sp");
406 if (treeString != null){
407 if (CdmUtils.isNotEmpty(treeString) && treeString.length() > 1){
408 String strKingdom = treeString.substring(1,2);
409
410 if (! treeString.substring(0, 1).equals("#") && ! treeString.substring(2, 3).equals("#") ){
411 logger.warn("Tree string " + treeString + " has no recognized format");
412 }else{
413 try {
414 result = Integer.valueOf(strKingdom);
415 } catch (NumberFormatException e) {
416 logger.warn("Kingdom string " + strKingdom + "could not be recognized as a valid number");
417 }
418 }
419 }
420 }else{
421 Integer tu_id = rs.getInt("id");
422 result = tu_id;
423 }
424 return result;
425 }
426
427
428 /* (non-Javadoc)
429 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
430 */
431 @Override
432 protected boolean doCheck(ErmsImportState state){
433 IOValidator<ErmsImportState> validator = new ErmsTaxonImportValidator();
434 return validator.validate(state);
435 }
436
437
438 /* (non-Javadoc)
439 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
440 */
441 protected boolean isIgnore(ErmsImportState state){
442 return ! state.getConfig().isDoTaxa();
443 }
444
445
446
447 }