ref #10432 finishing Cora import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisCurrentSpeciesImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Map;
17 import java.util.Set;
18
19 import org.apache.commons.lang3.StringUtils;
20 import org.apache.logging.log4j.LogManager;
21 import org.apache.logging.log4j.Logger;
22 import org.springframework.stereotype.Component;
23
24 import eu.etaxonomy.cdm.common.CdmUtils;
25 import eu.etaxonomy.cdm.common.UTF8;
26 import eu.etaxonomy.cdm.io.common.IOValidator;
27 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
28 import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
29 import eu.etaxonomy.cdm.model.common.CdmBase;
30 import eu.etaxonomy.cdm.model.common.Language;
31 import eu.etaxonomy.cdm.model.description.Distribution;
32 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
33 import eu.etaxonomy.cdm.model.description.TaxonDescription;
34 import eu.etaxonomy.cdm.model.location.NamedArea;
35 import eu.etaxonomy.cdm.model.name.IZoologicalName;
36 import eu.etaxonomy.cdm.model.name.Rank;
37 import eu.etaxonomy.cdm.model.name.TaxonName;
38 import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
39 import eu.etaxonomy.cdm.model.reference.Reference;
40 import eu.etaxonomy.cdm.model.taxon.Classification;
41 import eu.etaxonomy.cdm.model.taxon.Taxon;
42 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
43 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
44
45 /**
46 * @author a.mueller
47 * @since 20.02.2010
48 */
49 @Component
50 public class GlobisCurrentSpeciesImport extends GlobisImportBase<Taxon> {
51
52 private static final long serialVersionUID = -4392659482520384118L;
53 private static final Logger logger = LogManager.getLogger();
54
55 private int modCount = 10000;
56 private static final String pluralString = "current taxa";
57 private static final String dbTableName = "current_species";
58 private static final Class<?> cdmTargetClass = Taxon.class; //not needed
59
60 public GlobisCurrentSpeciesImport(){
61 super(pluralString, dbTableName, cdmTargetClass);
62 }
63
64 @Override
65 protected String getIdQuery() {
66 String strRecordQuery =
67 " SELECT IDcurrentspec " +
68 " FROM " + dbTableName;
69 return strRecordQuery;
70 }
71
72 @Override
73 protected String getRecordQuery(GlobisImportConfigurator config) {
74 String strRecordQuery =
75 " SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
76 " cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " +
77 " FROM " + getTableName() + " cs " +
78 " WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
79 return strRecordQuery;
80 }
81
82 @Override
83 public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, GlobisImportState state) {
84 boolean success = true;
85
86 @SuppressWarnings("rawtypes")
87 Set<TaxonBase> objectsToSave = new HashSet<>();
88 @SuppressWarnings("unchecked")
89 Map<String, Taxon> taxonMap = partitioner.getObjectMap(TAXON_NAMESPACE);
90 ResultSet rs = partitioner.getResultSet();
91
92 Classification classification = getClassification(state);
93
94 try {
95
96 int i = 0;
97
98 //for each reference
99 while (rs.next()){
100
101 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
102
103 Integer taxonId = rs.getInt("IDcurrentspec");
104
105 //String dtSpcJahr -> ignore !
106 //empty: fiSpcLiteratur
107
108 //TODO
109 //fiSpcspcgrptax
110
111 try {
112
113 //source ref
114 Reference sourceRef = state.getTransactionalSourceReference();
115 Taxon nextHigherTaxon = null;
116
117 boolean hasNewParent = false; //true if any parent is new
118
119 //species
120 Taxon species = createObject(rs, state, taxonId);
121
122
123 String familyStr = rs.getString("dtSpcFamakt");
124 String subFamilyStr = rs.getString("dtSpcSubfamakt");
125 String tribeStr = rs.getString("dtSpcTribakt");
126
127 //family
128 Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap, taxonId);
129
130 //subfamily
131 Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap, taxonId);
132 Taxon subFamilyParent = getParent(subFamily, classification);
133 if (subFamilyParent != null){
134 if (! compareTaxa(family, subFamilyParent)){
135 logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
136 }
137 }else{
138 classification.addParentChild(family, subFamily, sourceRef, null);
139 }
140 nextHigherTaxon = subFamily;
141
142 //tribe
143 Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap, taxonId);
144 if (tribe != null){
145 Taxon tribeParent = getParent(tribe, classification);
146 if (tribeParent != null){
147 if (! compareTaxa(subFamily, tribeParent)){
148 logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
149 }
150 }else{
151 classification.addParentChild(subFamily, tribe, sourceRef, null);
152 }
153 nextHigherTaxon = tribe;
154 }
155
156
157 //genus
158 String genusStr = rs.getString("dtSpcGenusakt");
159 String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
160 Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap, taxonId);
161 Taxon genusParent = getParent(genus, classification);
162
163 if (genusParent != null){
164 if (! compareTaxa(genusParent, nextHigherTaxon)){
165 logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
166 }
167 }else{
168 classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
169 }
170 nextHigherTaxon = genus;
171
172 //subgenus
173 String subGenusStr = CdmBase.deproxy(species.getName(), TaxonName.class).getInfraGenericEpithet();
174 String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
175 boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
176 if (hasSubgenus){
177 Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap, taxonId);
178 classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
179 nextHigherTaxon = subGenus;
180 }
181
182 classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
183
184 handleCountries(state, rs, species, taxonId);
185
186 //common names -> not used anymore
187 handleCommonNames(state, rs, species);
188
189 this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
190
191 objectsToSave.add(species);
192
193
194 } catch (Exception e) {
195 logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
196 e.printStackTrace();
197 }
198
199 }
200
201 logger.warn(pluralString + " to save: " + objectsToSave.size());
202 getTaxonService().save(objectsToSave);
203
204 return success;
205 } catch (SQLException e) {
206 logger.error("SQLException:" + e);
207 return false;
208 }
209 }
210
211 private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species, Integer taxonId) throws SQLException {
212 String countriesStr = rs.getString("dtSpcCountries");
213 if (isBlank(countriesStr)){
214 return;
215 }
216 String[] countriesSplit = countriesStr.split(";");
217 for (String countryStr : countriesSplit){
218 if (isBlank(countryStr)){
219 continue;
220 }
221 countryStr = countryStr.trim();
222
223 //TODO use isComplete
224 boolean isComplete = countryStr.endsWith(".");
225 if (isComplete){
226 countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
227 }
228 boolean isDoubtful = countryStr.endsWith("[?]");
229 if (isDoubtful){
230 countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
231 }
232 if (countryStr.startsWith("?")){
233 isDoubtful = true;
234 countryStr = countryStr.substring(1).trim();
235 }
236
237 countryStr = normalizeCountry(countryStr);
238
239 NamedArea country = getCountry(state, countryStr);
240
241 PresenceAbsenceTerm status;
242 if (isDoubtful){
243 status = PresenceAbsenceTerm.PRESENT_DOUBTFULLY();
244 }else{
245 status = PresenceAbsenceTerm.PRESENT();
246 }
247
248 if (country != null){
249 TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
250 Distribution distribution = Distribution.NewInstance(country, status);
251 desc.addElement(distribution);
252 }else{
253 if (countryStr.length() > 0){
254 logger.warn("Country string not recognized : " + countryStr + " for IDcurrentspec " + taxonId);
255 }
256 }
257 }
258 }
259
260 private String normalizeCountry(String countryStr) {
261 String result = countryStr.trim();
262 if (result.endsWith(".")){
263 result = result.substring(0,result.length() - 1);
264 }
265 while (result.startsWith(UTF8.NO_BREAK_SPACE.toString())){
266 result = result.substring(1); //
267 }
268 if (result.matches("\\s+")){
269 result = "";
270 }
271 return result.trim();
272 }
273
274 private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
275 //DON't use, use seperate common name tables instead
276
277 // String commonNamesStr = rs.getString("vernacularnames");
278 // if (isBlank(commonNamesStr)){
279 // return;
280 // }
281 // String[] commonNamesSplit = commonNamesStr.split(";");
282 // for (String commonNameStr : commonNamesSplit){
283 // if (isBlank(commonNameStr)){
284 // continue;
285 // }
286 // Language language = null; //TODO
287 // CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
288 // TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
289 // desc.addElement(commonName);
290 // }
291 }
292
293 /**
294 * Compares 2 taxa, returns true of both taxa look similar
295 * @param genus
296 * @param nextHigherTaxon
297 * @return
298 */
299 private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
300 IZoologicalName name1 = taxon1.getName();
301 IZoologicalName name2 = taxon2.getName();
302 if (!name1.getRank().equals(name2.getRank())){
303 return false;
304 }
305 if (! name1.getTitleCache().equals(name2.getTitleCache())){
306 return false;
307 }
308 return true;
309 }
310
311 private Taxon getParent(Taxon child, Classification classification) {
312 if (child == null){
313 logger.warn("Child is null");
314 return null;
315 }
316 for (TaxonNode node : child.getTaxonNodes()){
317 if (node.getClassification().equals(classification)){
318 if (node.getParent() != null){
319 return node.getParent().getTaxon();
320 }else{
321 return null;
322 }
323 }
324 }
325 return null;
326 }
327
328 private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap, Integer taxonId) {
329 if (isBlank(uninomial)){
330 return null;
331 }
332
333 String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
334
335 String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
336 Taxon taxon = taxonMap.get(key);
337 if (taxon == null){
338 IZoologicalName name = TaxonNameFactory.NewZoologicalInstance(rank);
339 name.setGenusOrUninomial(uninomial);
340 if (isNotBlank(infraGenericEpi)){
341 name.setInfraGenericEpithet(infraGenericEpi);
342 }
343 taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
344
345 taxonMap.put(key, taxon);
346 handleAuthorAndYear(author, name, taxonId, state);
347 getTaxonService().save(taxon);
348 }
349
350 return taxon;
351 }
352
353
354 //fast and dirty is enough here
355 private Classification classification;
356
357 private Classification getClassification(GlobisImportState state) {
358 if (this.classification == null){
359 String name = state.getConfig().getClassificationName();
360 Reference reference = state.getTransactionalSourceReference();
361 this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
362 classification.setUuid(state.getConfig().getClassificationUuid());
363 getClassificationService().save(classification);
364 }
365 return this.classification;
366
367 }
368
369 public Taxon createObject(ResultSet rs, GlobisImportState state, Integer taxonId)
370 throws SQLException {
371 String speciesEpi = rs.getString("dtSpcSpcakt");
372 String subGenusEpi = rs.getString("dtSpcSubgenakt");
373 String genusEpi = rs.getString("dtSpcGenusakt");
374 String author = rs.getString("dtSpcAutor");
375
376
377 IZoologicalName zooName = TaxonNameFactory.NewZoologicalInstance(Rank.SPECIES());
378 zooName.setSpecificEpithet(speciesEpi);
379 if (StringUtils.isNotBlank(subGenusEpi)){
380 zooName.setInfraGenericEpithet(subGenusEpi);
381 }
382 zooName.setGenusOrUninomial(genusEpi);
383 handleAuthorAndYear(author, zooName, taxonId, state);
384
385 Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
386
387 return taxon;
388 }
389
390 @Override
391 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
392
393 String nameSpace;
394 Set<String> idSet;
395 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
396 try{
397 Set<String> taxonIdSet = new HashSet<>();
398
399 while (rs.next()){
400 // handleForeignKey(rs, taxonIdSet, "taxonId");
401 }
402
403 //taxon map
404 nameSpace = TAXON_NAMESPACE;
405 idSet = taxonIdSet;
406 Map<String, Taxon> objectMap = getCommonService().getSourcedObjectsByIdInSourceC(Taxon.class, idSet, nameSpace);
407 result.put(nameSpace, objectMap);
408
409
410 } catch (SQLException e) {
411 throw new RuntimeException(e);
412 }
413 return result;
414 }
415
416 @Override
417 protected boolean doCheck(GlobisImportState state){
418 IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
419 return validator.validate(state);
420 }
421
422 @Override
423 protected boolean isIgnore(GlobisImportState state){
424 return ! state.getConfig().isDoCurrentTaxa();
425 }
426
427
428
429
430
431 }