Revert missing rank constant
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisCurrentSpeciesImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Map;
17 import java.util.Set;
18
19 import org.apache.commons.lang.StringUtils;
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.common.UTF8;
25 import eu.etaxonomy.cdm.io.common.IOValidator;
26 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
27 import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
28 import eu.etaxonomy.cdm.model.common.CdmBase;
29 import eu.etaxonomy.cdm.model.common.Language;
30 import eu.etaxonomy.cdm.model.description.Distribution;
31 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
32 import eu.etaxonomy.cdm.model.description.TaxonDescription;
33 import eu.etaxonomy.cdm.model.location.NamedArea;
34 import eu.etaxonomy.cdm.model.name.Rank;
35 import eu.etaxonomy.cdm.model.name.ZoologicalName;
36 import eu.etaxonomy.cdm.model.reference.Reference;
37 import eu.etaxonomy.cdm.model.taxon.Classification;
38 import eu.etaxonomy.cdm.model.taxon.Taxon;
39 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
40 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
41
42
43 /**
44 * @author a.mueller
45 * @created 20.02.2010
46 */
47 @Component
48 public class GlobisCurrentSpeciesImport extends GlobisImportBase<Taxon> {
49 private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
50
51 private int modCount = 10000;
52 private static final String pluralString = "current taxa";
53 private static final String dbTableName = "current_species";
54 private static final Class<?> cdmTargetClass = Taxon.class; //not needed
55
56 public GlobisCurrentSpeciesImport(){
57 super(pluralString, dbTableName, cdmTargetClass);
58 }
59
60 @Override
61 protected String getIdQuery() {
62 String strRecordQuery =
63 " SELECT IDcurrentspec " +
64 " FROM " + dbTableName;
65 return strRecordQuery;
66 }
67
68 @Override
69 protected String getRecordQuery(GlobisImportConfigurator config) {
70 String strRecordQuery =
71 " SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
72 " cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " +
73 " FROM " + getTableName() + " cs " +
74 " WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
75 return strRecordQuery;
76 }
77
78 @Override
79 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
80 boolean success = true;
81
82 Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
83 Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
84 ResultSet rs = partitioner.getResultSet();
85
86 Classification classification = getClassification(state);
87
88 try {
89
90 int i = 0;
91
92 //for each reference
93 while (rs.next()){
94
95 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
96
97 Integer taxonId = rs.getInt("IDcurrentspec");
98
99 //String dtSpcJahr -> ignore !
100 //empty: fiSpcLiteratur
101
102 //TODO
103 //fiSpcspcgrptax
104
105 try {
106
107 //source ref
108 Reference<?> sourceRef = state.getTransactionalSourceReference();
109 Taxon nextHigherTaxon = null;
110
111 boolean hasNewParent = false; //true if any parent is new
112
113 //species
114 Taxon species = createObject(rs, state, taxonId);
115
116
117 String familyStr = rs.getString("dtSpcFamakt");
118 String subFamilyStr = rs.getString("dtSpcSubfamakt");
119 String tribeStr = rs.getString("dtSpcTribakt");
120
121 //family
122 Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap, taxonId);
123
124 //subfamily
125 Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap, taxonId);
126 Taxon subFamilyParent = getParent(subFamily, classification);
127 if (subFamilyParent != null){
128 if (! compareTaxa(family, subFamilyParent)){
129 logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
130 }
131 }else{
132 classification.addParentChild(family, subFamily, sourceRef, null);
133 }
134 nextHigherTaxon = subFamily;
135
136 //tribe
137 Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap, taxonId);
138 if (tribe != null){
139 Taxon tribeParent = getParent(tribe, classification);
140 if (tribeParent != null){
141 if (! compareTaxa(subFamily, tribeParent)){
142 logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
143 }
144 }else{
145 classification.addParentChild(subFamily, tribe, sourceRef, null);
146 }
147 nextHigherTaxon = tribe;
148 }
149
150
151 //genus
152 String genusStr = rs.getString("dtSpcGenusakt");
153 String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
154 Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap, taxonId);
155 Taxon genusParent = getParent(genus, classification);
156
157 if (genusParent != null){
158 if (! compareTaxa(genusParent, nextHigherTaxon)){
159 logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
160 }
161 }else{
162 classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
163 }
164 nextHigherTaxon = genus;
165
166 //subgenus
167 String subGenusStr = CdmBase.deproxy(species.getName(), ZoologicalName.class).getInfraGenericEpithet();
168 String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
169 boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
170 if (hasSubgenus){
171 Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap, taxonId);
172 classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
173 nextHigherTaxon = subGenus;
174 }
175
176 classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
177
178 handleCountries(state, rs, species, taxonId);
179
180 //common names -> not used anymore
181 handleCommonNames(state, rs, species);
182
183 this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
184
185 objectsToSave.add(species);
186
187
188 } catch (Exception e) {
189 logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
190 e.printStackTrace();
191 }
192
193 }
194
195 logger.warn(pluralString + " to save: " + objectsToSave.size());
196 getTaxonService().save(objectsToSave);
197
198 return success;
199 } catch (SQLException e) {
200 logger.error("SQLException:" + e);
201 return false;
202 }
203 }
204
205 private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species, Integer taxonId) throws SQLException {
206 String countriesStr = rs.getString("dtSpcCountries");
207 if (isBlank(countriesStr)){
208 return;
209 }
210 String[] countriesSplit = countriesStr.split(";");
211 for (String countryStr : countriesSplit){
212 if (isBlank(countryStr)){
213 continue;
214 }
215 countryStr = countryStr.trim();
216
217 //TODO use isComplete
218 boolean isComplete = countryStr.endsWith(".");
219 if (isComplete){
220 countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
221 }
222 boolean isDoubtful = countryStr.endsWith("[?]");
223 if (isDoubtful){
224 countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
225 }
226 if (countryStr.startsWith("?")){
227 isDoubtful = true;
228 countryStr = countryStr.substring(1).trim();
229 }
230
231
232
233 countryStr = normalizeCountry(countryStr);
234
235 NamedArea country = getCountry(state, countryStr);
236
237 PresenceAbsenceTerm status;
238 if (isDoubtful){
239 status = PresenceAbsenceTerm.PRESENT_DOUBTFULLY();
240 }else{
241 status = PresenceAbsenceTerm.PRESENT();
242 }
243
244 if (country != null){
245 TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
246 Distribution distribution = Distribution.NewInstance(country, status);
247 desc.addElement(distribution);
248 }else{
249 if (countryStr.length() > 0){
250 logger.warn("Country string not recognized : " + countryStr + " for IDcurrentspec " + taxonId);
251 }
252 }
253 }
254 }
255
256
257
258 /**
259 * @param countryStr
260 * @return
261 */
262 private String normalizeCountry(String countryStr) {
263 String result = countryStr.trim();
264 if (result.endsWith(".")){
265 result = result.substring(0,result.length() - 1);
266 }
267 while (result.startsWith(UTF8.NO_BREAK_SPACE.toString())){
268 result = result.substring(1); //
269 }
270 if (result.matches("\\s+")){
271 result = "";
272 }
273 return result.trim();
274 }
275
276 private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
277 //DON't use, use seperate common name tables instead
278
279 // String commonNamesStr = rs.getString("vernacularnames");
280 // if (isBlank(commonNamesStr)){
281 // return;
282 // }
283 // String[] commonNamesSplit = commonNamesStr.split(";");
284 // for (String commonNameStr : commonNamesSplit){
285 // if (isBlank(commonNameStr)){
286 // continue;
287 // }
288 // Language language = null; //TODO
289 // CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
290 // TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
291 // desc.addElement(commonName);
292 // }
293 }
294
295
296
297
298 /**
299 * Compares 2 taxa, returns true of both taxa look similar
300 * @param genus
301 * @param nextHigherTaxon
302 * @return
303 */
304 private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
305 ZoologicalName name1 = CdmBase.deproxy(taxon1.getName(), ZoologicalName.class);
306 ZoologicalName name2 = CdmBase.deproxy(taxon2.getName(), ZoologicalName.class);
307 if (!name1.getRank().equals(name2.getRank())){
308 return false;
309 }
310 if (! name1.getTitleCache().equals(name2.getTitleCache())){
311 return false;
312 }
313 return true;
314 }
315
316
317
318
319 private Taxon getParent(Taxon child, Classification classification) {
320 if (child == null){
321 logger.warn("Child is null");
322 return null;
323 }
324 for (TaxonNode node : child.getTaxonNodes()){
325 if (node.getClassification().equals(classification)){
326 if (node.getParent() != null){
327 return node.getParent().getTaxon();
328 }else{
329 return null;
330 }
331 }
332 }
333 return null;
334 }
335
336
337
338
339 private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap, Integer taxonId) {
340 if (isBlank(uninomial)){
341 return null;
342 }
343
344 String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
345
346 String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
347 Taxon taxon = taxonMap.get(key);
348 if (taxon == null){
349 ZoologicalName name = ZoologicalName.NewInstance(rank);
350 name.setGenusOrUninomial(uninomial);
351 if (isNotBlank(infraGenericEpi)){
352 name.setInfraGenericEpithet(infraGenericEpi);
353 }
354 taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
355
356 taxonMap.put(key, taxon);
357 handleAuthorAndYear(author, name, taxonId, state);
358 getTaxonService().save(taxon);
359 }
360
361 return taxon;
362 }
363
364
365 //fast and dirty is enough here
366 private Classification classification;
367
368 private Classification getClassification(GlobisImportState state) {
369 if (this.classification == null){
370 String name = state.getConfig().getClassificationName();
371 Reference<?> reference = state.getTransactionalSourceReference();
372 this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
373 classification.setUuid(state.getConfig().getClassificationUuid());
374 getClassificationService().save(classification);
375 }
376 return this.classification;
377
378 }
379
380 /* (non-Javadoc)
381 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
382 */
383 public Taxon createObject(ResultSet rs, GlobisImportState state, Integer taxonId)
384 throws SQLException {
385 String speciesEpi = rs.getString("dtSpcSpcakt");
386 String subGenusEpi = rs.getString("dtSpcSubgenakt");
387 String genusEpi = rs.getString("dtSpcGenusakt");
388 String author = rs.getString("dtSpcAutor");
389
390
391 ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
392 zooName.setSpecificEpithet(speciesEpi);
393 if (StringUtils.isNotBlank(subGenusEpi)){
394 zooName.setInfraGenericEpithet(subGenusEpi);
395 }
396 zooName.setGenusOrUninomial(genusEpi);
397 handleAuthorAndYear(author, zooName, taxonId, state);
398
399 Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
400
401 return taxon;
402 }
403
404
405
406
407 @Override
408 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
409 String nameSpace;
410 Class cdmClass;
411 Set<String> idSet;
412 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
413 try{
414 Set<String> taxonIdSet = new HashSet<String>();
415
416 while (rs.next()){
417 // handleForeignKey(rs, taxonIdSet, "taxonId");
418 }
419
420 //taxon map
421 nameSpace = TAXON_NAMESPACE;
422 cdmClass = Taxon.class;
423 idSet = taxonIdSet;
424 Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
425 result.put(nameSpace, objectMap);
426
427
428 } catch (SQLException e) {
429 throw new RuntimeException(e);
430 }
431 return result;
432 }
433
434 /* (non-Javadoc)
435 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
436 */
437 @Override
438 protected boolean doCheck(GlobisImportState state){
439 IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
440 return validator.validate(state);
441 }
442
443
444 /* (non-Javadoc)
445 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
446 */
447 protected boolean isIgnore(GlobisImportState state){
448 return ! state.getConfig().isDoCurrentTaxa();
449 }
450
451
452
453
454
455 }