update Globis import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisCurrentSpeciesImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Map;
17 import java.util.Set;
18
19 import org.apache.commons.lang.StringUtils;
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import com.yourkit.util.Strings;
24
25 import eu.etaxonomy.cdm.common.CdmUtils;
26 import eu.etaxonomy.cdm.io.algaterra.AlgaTerraCollectionImport;
27 import eu.etaxonomy.cdm.io.algaterra.AlgaTerraSpecimenImport;
28 import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelTaxonImport;
29 import eu.etaxonomy.cdm.io.common.IOValidator;
30 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
31 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
32 import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
33 import eu.etaxonomy.cdm.model.common.CdmBase;
34 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
35 import eu.etaxonomy.cdm.model.common.Language;
36 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
37 import eu.etaxonomy.cdm.model.description.Distribution;
38 import eu.etaxonomy.cdm.model.description.PresenceTerm;
39 import eu.etaxonomy.cdm.model.description.TaxonDescription;
40 import eu.etaxonomy.cdm.model.location.NamedArea;
41 import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
42 import eu.etaxonomy.cdm.model.name.Rank;
43 import eu.etaxonomy.cdm.model.name.ZoologicalName;
44 import eu.etaxonomy.cdm.model.occurrence.Collection;
45 import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
46 import eu.etaxonomy.cdm.model.reference.Reference;
47 import eu.etaxonomy.cdm.model.taxon.Classification;
48 import eu.etaxonomy.cdm.model.taxon.Taxon;
49 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
50 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
51 import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
52 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
53 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
54
55
56 /**
57 * @author a.mueller
58 * @created 20.02.2010
59 * @version 1.0
60 */
61 @Component
62 public class GlobisCurrentSpeciesImport extends GlobisImportBase<Taxon> {
63 private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
64
65 private int modCount = 10000;
66 private static final String pluralString = "current taxa";
67 private static final String dbTableName = "current_species";
68 private static final Class cdmTargetClass = Taxon.class; //not needed
69
70 public GlobisCurrentSpeciesImport(){
71 super(pluralString, dbTableName, cdmTargetClass);
72 }
73
74
75
76
77 /* (non-Javadoc)
78 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
79 */
80 @Override
81 protected String getIdQuery() {
82 String strRecordQuery =
83 " SELECT IDcurrentspec " +
84 " FROM " + dbTableName;
85 return strRecordQuery;
86 }
87
88
89
90
91 /* (non-Javadoc)
92 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
93 */
94 @Override
95 protected String getRecordQuery(GlobisImportConfigurator config) {
96 String strRecordQuery =
97 " SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
98 " cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " +
99 " FROM " + getTableName() + " cs " +
100 " WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
101 return strRecordQuery;
102 }
103
104
105
106 /* (non-Javadoc)
107 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
108 */
109 @Override
110 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
111 boolean success = true;
112
113 Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
114
115 Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
116 // Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
117
118 ResultSet rs = partitioner.getResultSet();
119
120 Classification classification = getClassification(state);
121
122 try {
123
124 int i = 0;
125
126 //for each reference
127 while (rs.next()){
128
129 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
130
131 Integer taxonId = rs.getInt("IDcurrentspec");
132
133
134 //String dtSpcJahr -> ignore !
135 //empty: fiSpcLiteratur
136
137 //TODO
138 //fiSpcspcgrptax
139
140
141
142 try {
143
144 //source ref
145 Reference<?> sourceRef = state.getTransactionalSourceReference();
146 Taxon nextHigherTaxon = null;
147
148 boolean hasNewParent = false; //true if any parent is new
149
150 //species
151 Taxon species = createObject(rs, state);
152
153
154 String familyStr = rs.getString("dtSpcFamakt");
155 String subFamilyStr = rs.getString("dtSpcSubfamakt");
156 String tribeStr = rs.getString("dtSpcTribakt");
157
158 //family
159 Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap);
160
161 //subfamily
162 Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap);
163 Taxon subFamilyParent = getParent(subFamily, classification);
164 if (subFamilyParent != null){
165 if (! compareTaxa(family, subFamilyParent)){
166 logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
167 }
168 }else{
169 classification.addParentChild(family, subFamily, sourceRef, null);
170 }
171 nextHigherTaxon = subFamily;
172
173 //tribe
174 Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap);
175 if (tribe != null){
176 Taxon tribeParent = getParent(tribe, classification);
177 if (tribeParent != null){
178 if (! compareTaxa(subFamily, tribeParent)){
179 logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
180 }
181 }else{
182 classification.addParentChild(subFamily, tribe, sourceRef, null);
183 }
184 nextHigherTaxon = tribe;
185 }
186
187
188 //genus
189 String genusStr = rs.getString("dtSpcGenusakt");
190 String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
191 Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap);
192 Taxon genusParent = getParent(genus, classification);
193
194 if (genusParent != null){
195 if (! compareTaxa(genusParent, nextHigherTaxon)){
196 logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
197 }
198 }else{
199 classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
200 }
201 nextHigherTaxon = genus;
202
203 //subgenus
204 String subGenusStr = CdmBase.deproxy(species.getName(), ZoologicalName.class).getInfraGenericEpithet();
205 String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
206 boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
207 if (hasSubgenus){
208 Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap);
209 classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
210 nextHigherTaxon = subGenus;
211 }
212
213 classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
214
215 handleCountries(state, rs, species);
216
217 handleCommonNames(state, rs, species);
218
219 this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
220
221 objectsToSave.add(species);
222
223
224 } catch (Exception e) {
225 logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
226 // e.printStackTrace();
227 }
228
229 }
230
231 // logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
232
233 logger.warn(pluralString + " to save: " + objectsToSave.size());
234 getTaxonService().save(objectsToSave);
235
236 return success;
237 } catch (SQLException e) {
238 logger.error("SQLException:" + e);
239 return false;
240 }
241 }
242
243 private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
244 String countriesStr = rs.getString("dtSpcCountries");
245 if (isBlank(countriesStr)){
246 return;
247 }
248 String[] countriesSplit = countriesStr.split(";");
249 for (String countryStr : countriesSplit){
250 if (isBlank(countryStr)){
251 continue;
252 }else{
253 countryStr = normalizeCountry(countryStr);
254 }
255
256 WaterbodyOrCountry country = getCountry(state, countryStr);
257
258 if (country != null){
259 TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
260 Distribution distribution = Distribution.NewInstance(country, PresenceTerm.PRESENT());
261 desc.addElement(distribution);
262 }else{
263 logger.warn("Country string not recognized: " + countryStr);
264 }
265 }
266 }
267
268
269
270 /**
271 * @param countryStr
272 * @return
273 */
274 private String normalizeCountry(String countryStr) {
275 String result = countryStr.trim();
276 if (result.endsWith(".")){
277 result = result.substring(0,result.length() - 1);
278 }
279 return result;
280 }
281
282 private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
283 String commonNamesStr = rs.getString("vernacularnames");
284 if (isBlank(commonNamesStr)){
285 return;
286 }
287 String[] commonNamesSplit = commonNamesStr.split(";");
288 for (String commonNameStr : commonNamesSplit){
289 if (isBlank(commonNameStr)){
290 continue;
291 }
292 Language language = null; //TODO
293 CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
294 TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
295 desc.addElement(commonName);
296 }
297 }
298
299
300
301
302 /**
303 * Compares 2 taxa, returns true of both taxa look similar
304 * @param genus
305 * @param nextHigherTaxon
306 * @return
307 */
308 private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
309 ZoologicalName name1 = CdmBase.deproxy(taxon1.getName(), ZoologicalName.class);
310 ZoologicalName name2 = CdmBase.deproxy(taxon2.getName(), ZoologicalName.class);
311 if (!name1.getRank().equals(name2.getRank())){
312 return false;
313 }
314 if (! name1.getTitleCache().equals(name2.getTitleCache())){
315 return false;
316 }
317 return true;
318 }
319
320
321
322
323 private Taxon getParent(Taxon child, Classification classification) {
324 for (TaxonNode node : child.getTaxonNodes()){
325 if (node.getClassification().equals(classification)){
326 if (node.getParent() != null){
327 return node.getParent().getTaxon();
328 }else{
329 return null;
330 }
331 }
332 }
333 return null;
334 }
335
336
337
338
339 private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap) {
340 if (isBlank(uninomial)){
341 return null;
342 }
343
344 String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
345
346 String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
347 Taxon taxon = taxonMap.get(key);
348 if (taxon == null){
349 ZoologicalName name = ZoologicalName.NewInstance(rank);
350 name.setGenusOrUninomial(uninomial);
351 if (isNotBlank(infraGenericEpi)){
352 name.setInfraGenericEpithet(infraGenericEpi);
353 }
354 taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
355
356 taxonMap.put(key, taxon);
357 handleAuthorAndYear(author, name);
358 getTaxonService().save(taxon);
359 }
360
361 return taxon;
362 }
363
364
365 //fast and dirty is enough here
366 private Classification classification;
367
368 private Classification getClassification(GlobisImportState state) {
369 if (this.classification == null){
370 String name = state.getConfig().getClassificationName();
371 Reference<?> reference = state.getTransactionalSourceReference();
372 this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
373 classification.setUuid(state.getConfig().getClassificationUuid());
374 getClassificationService().save(classification);
375 }
376 return this.classification;
377
378 }
379
380 /* (non-Javadoc)
381 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
382 */
383 public Taxon createObject(ResultSet rs, GlobisImportState state)
384 throws SQLException {
385 String speciesEpi = rs.getString("dtSpcSpcakt");
386 String subGenusEpi = rs.getString("dtSpcSubgenakt");
387 String genusEpi = rs.getString("dtSpcGenusakt");
388 String author = rs.getString("dtSpcAutor");
389
390
391 ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
392 zooName.setSpecificEpithet(speciesEpi);
393 if (StringUtils.isNotBlank(subGenusEpi)){
394 zooName.setInfraGenericEpithet(subGenusEpi);
395 }
396 zooName.setGenusOrUninomial(genusEpi);
397 handleAuthorAndYear(author, zooName);
398
399 Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
400
401 return taxon;
402 }
403
404
405
406
407
408 /* (non-Javadoc)
409 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
410 */
411 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
412 String nameSpace;
413 Class cdmClass;
414 Set<String> idSet;
415 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
416 try{
417 Set<String> taxonIdSet = new HashSet<String>();
418
419 while (rs.next()){
420 // handleForeignKey(rs, taxonIdSet, "taxonId");
421 }
422
423 //taxon map
424 nameSpace = TAXON_NAMESPACE;
425 cdmClass = Taxon.class;
426 idSet = taxonIdSet;
427 Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
428 result.put(nameSpace, objectMap);
429
430
431 } catch (SQLException e) {
432 throw new RuntimeException(e);
433 }
434 return result;
435 }
436
437 /* (non-Javadoc)
438 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
439 */
440 @Override
441 protected boolean doCheck(GlobisImportState state){
442 IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
443 return validator.validate(state);
444 }
445
446
447 /* (non-Javadoc)
448 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
449 */
450 protected boolean isIgnore(GlobisImportState state){
451 return ! state.getConfig().isDoCurrentTaxa();
452 }
453
454
455
456
457
458 }