1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.globis;
|
11
|
|
12
|
import java.sql.ResultSet;
|
13
|
import java.sql.SQLException;
|
14
|
import java.util.HashMap;
|
15
|
import java.util.HashSet;
|
16
|
import java.util.Map;
|
17
|
import java.util.Set;
|
18
|
|
19
|
import org.apache.commons.lang.StringUtils;
|
20
|
import org.apache.log4j.Logger;
|
21
|
import org.springframework.stereotype.Component;
|
22
|
|
23
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
24
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
25
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
26
|
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
|
27
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
28
|
import eu.etaxonomy.cdm.model.common.Language;
|
29
|
import eu.etaxonomy.cdm.model.description.Distribution;
|
30
|
import eu.etaxonomy.cdm.model.description.PresenceTerm;
|
31
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
32
|
import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
|
33
|
import eu.etaxonomy.cdm.model.name.Rank;
|
34
|
import eu.etaxonomy.cdm.model.name.ZoologicalName;
|
35
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
36
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
37
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
38
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
39
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
40
|
|
41
|
|
42
|
/**
|
43
|
* @author a.mueller
|
44
|
* @created 20.02.2010
|
45
|
* @version 1.0
|
46
|
*/
|
47
|
@Component
|
48
|
public class GlobisCurrentSpeciesImport extends GlobisImportBase<Taxon> {
|
49
|
private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
|
50
|
|
51
|
private int modCount = 10000;
|
52
|
private static final String pluralString = "current taxa";
|
53
|
private static final String dbTableName = "current_species";
|
54
|
private static final Class cdmTargetClass = Taxon.class; //not needed
|
55
|
|
56
|
public GlobisCurrentSpeciesImport(){
|
57
|
super(pluralString, dbTableName, cdmTargetClass);
|
58
|
}
|
59
|
|
60
|
|
61
|
|
62
|
|
63
|
/* (non-Javadoc)
|
64
|
* @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
|
65
|
*/
|
66
|
@Override
|
67
|
protected String getIdQuery() {
|
68
|
String strRecordQuery =
|
69
|
" SELECT IDcurrentspec " +
|
70
|
" FROM " + dbTableName;
|
71
|
return strRecordQuery;
|
72
|
}
|
73
|
|
74
|
|
75
|
|
76
|
|
77
|
/* (non-Javadoc)
|
78
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
|
79
|
*/
|
80
|
@Override
|
81
|
protected String getRecordQuery(GlobisImportConfigurator config) {
|
82
|
String strRecordQuery =
|
83
|
" SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
|
84
|
" cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " +
|
85
|
" FROM " + getTableName() + " cs " +
|
86
|
" WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
|
87
|
return strRecordQuery;
|
88
|
}
|
89
|
|
90
|
|
91
|
|
92
|
/* (non-Javadoc)
|
93
|
* @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
|
94
|
*/
|
95
|
@Override
|
96
|
public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
|
97
|
boolean success = true;
|
98
|
|
99
|
Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
|
100
|
|
101
|
Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
|
102
|
// Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
|
103
|
|
104
|
ResultSet rs = partitioner.getResultSet();
|
105
|
|
106
|
Classification classification = getClassification(state);
|
107
|
|
108
|
try {
|
109
|
|
110
|
int i = 0;
|
111
|
|
112
|
//for each reference
|
113
|
while (rs.next()){
|
114
|
|
115
|
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
|
116
|
|
117
|
Integer taxonId = rs.getInt("IDcurrentspec");
|
118
|
|
119
|
|
120
|
//String dtSpcJahr -> ignore !
|
121
|
//empty: fiSpcLiteratur
|
122
|
|
123
|
//TODO
|
124
|
//fiSpcspcgrptax
|
125
|
|
126
|
|
127
|
|
128
|
try {
|
129
|
|
130
|
//source ref
|
131
|
Reference<?> sourceRef = state.getTransactionalSourceReference();
|
132
|
Taxon nextHigherTaxon = null;
|
133
|
|
134
|
boolean hasNewParent = false; //true if any parent is new
|
135
|
|
136
|
//species
|
137
|
Taxon species = createObject(rs, state);
|
138
|
|
139
|
|
140
|
String familyStr = rs.getString("dtSpcFamakt");
|
141
|
String subFamilyStr = rs.getString("dtSpcSubfamakt");
|
142
|
String tribeStr = rs.getString("dtSpcTribakt");
|
143
|
|
144
|
//family
|
145
|
Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap);
|
146
|
|
147
|
//subfamily
|
148
|
Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap);
|
149
|
Taxon subFamilyParent = getParent(subFamily, classification);
|
150
|
if (subFamilyParent != null){
|
151
|
if (! compareTaxa(family, subFamilyParent)){
|
152
|
logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
|
153
|
}
|
154
|
}else{
|
155
|
classification.addParentChild(family, subFamily, sourceRef, null);
|
156
|
}
|
157
|
nextHigherTaxon = subFamily;
|
158
|
|
159
|
//tribe
|
160
|
Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap);
|
161
|
if (tribe != null){
|
162
|
Taxon tribeParent = getParent(tribe, classification);
|
163
|
if (tribeParent != null){
|
164
|
if (! compareTaxa(subFamily, tribeParent)){
|
165
|
logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
|
166
|
}
|
167
|
}else{
|
168
|
classification.addParentChild(subFamily, tribe, sourceRef, null);
|
169
|
}
|
170
|
nextHigherTaxon = tribe;
|
171
|
}
|
172
|
|
173
|
|
174
|
//genus
|
175
|
String genusStr = rs.getString("dtSpcGenusakt");
|
176
|
String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
|
177
|
Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap);
|
178
|
Taxon genusParent = getParent(genus, classification);
|
179
|
|
180
|
if (genusParent != null){
|
181
|
if (! compareTaxa(genusParent, nextHigherTaxon)){
|
182
|
logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
|
183
|
}
|
184
|
}else{
|
185
|
classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
|
186
|
}
|
187
|
nextHigherTaxon = genus;
|
188
|
|
189
|
//subgenus
|
190
|
String subGenusStr = CdmBase.deproxy(species.getName(), ZoologicalName.class).getInfraGenericEpithet();
|
191
|
String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
|
192
|
boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
|
193
|
if (hasSubgenus){
|
194
|
Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap);
|
195
|
classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
|
196
|
nextHigherTaxon = subGenus;
|
197
|
}
|
198
|
|
199
|
classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
|
200
|
|
201
|
handleCountries(state, rs, species);
|
202
|
|
203
|
handleCommonNames(state, rs, species);
|
204
|
|
205
|
this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
|
206
|
|
207
|
objectsToSave.add(species);
|
208
|
|
209
|
|
210
|
} catch (Exception e) {
|
211
|
logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
|
212
|
// e.printStackTrace();
|
213
|
}
|
214
|
|
215
|
}
|
216
|
|
217
|
// logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
|
218
|
|
219
|
logger.warn(pluralString + " to save: " + objectsToSave.size());
|
220
|
getTaxonService().save(objectsToSave);
|
221
|
|
222
|
return success;
|
223
|
} catch (SQLException e) {
|
224
|
logger.error("SQLException:" + e);
|
225
|
return false;
|
226
|
}
|
227
|
}
|
228
|
|
229
|
private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
|
230
|
String countriesStr = rs.getString("dtSpcCountries");
|
231
|
if (isBlank(countriesStr)){
|
232
|
return;
|
233
|
}
|
234
|
String[] countriesSplit = countriesStr.split(";");
|
235
|
for (String countryStr : countriesSplit){
|
236
|
if (isBlank(countryStr)){
|
237
|
continue;
|
238
|
}
|
239
|
countryStr = countryStr.trim();
|
240
|
|
241
|
//TODO use isComplete
|
242
|
boolean isComplete = countryStr.endsWith(".");
|
243
|
if (isComplete){
|
244
|
countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
|
245
|
}
|
246
|
boolean isDoubtful = countryStr.endsWith("[?]");
|
247
|
if (isDoubtful){
|
248
|
countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
|
249
|
}
|
250
|
if (countryStr.startsWith("?")){
|
251
|
isDoubtful = true;
|
252
|
countryStr = countryStr.substring(1).trim();
|
253
|
}
|
254
|
|
255
|
|
256
|
|
257
|
countryStr = normalizeCountry(countryStr);
|
258
|
|
259
|
WaterbodyOrCountry country = getCountry(state, countryStr);
|
260
|
|
261
|
PresenceTerm status;
|
262
|
if (isDoubtful){
|
263
|
status = PresenceTerm.PRESENT_DOUBTFULLY();
|
264
|
}else{
|
265
|
status = PresenceTerm.PRESENT();
|
266
|
}
|
267
|
|
268
|
if (country != null){
|
269
|
TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
|
270
|
Distribution distribution = Distribution.NewInstance(country, status);
|
271
|
desc.addElement(distribution);
|
272
|
}else{
|
273
|
logger.warn("Country string not recognized: " + countryStr);
|
274
|
}
|
275
|
}
|
276
|
}
|
277
|
|
278
|
|
279
|
|
280
|
/**
|
281
|
* @param countryStr
|
282
|
* @return
|
283
|
*/
|
284
|
private String normalizeCountry(String countryStr) {
|
285
|
String result = countryStr.trim();
|
286
|
if (result.endsWith(".")){
|
287
|
result = result.substring(0,result.length() - 1);
|
288
|
}
|
289
|
return result;
|
290
|
}
|
291
|
|
292
|
private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
|
293
|
//DON't use, use seperate common name tables instead
|
294
|
|
295
|
// String commonNamesStr = rs.getString("vernacularnames");
|
296
|
// if (isBlank(commonNamesStr)){
|
297
|
// return;
|
298
|
// }
|
299
|
// String[] commonNamesSplit = commonNamesStr.split(";");
|
300
|
// for (String commonNameStr : commonNamesSplit){
|
301
|
// if (isBlank(commonNameStr)){
|
302
|
// continue;
|
303
|
// }
|
304
|
// Language language = null; //TODO
|
305
|
// CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
|
306
|
// TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
|
307
|
// desc.addElement(commonName);
|
308
|
// }
|
309
|
}
|
310
|
|
311
|
|
312
|
|
313
|
|
314
|
/**
|
315
|
* Compares 2 taxa, returns true of both taxa look similar
|
316
|
* @param genus
|
317
|
* @param nextHigherTaxon
|
318
|
* @return
|
319
|
*/
|
320
|
private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
|
321
|
ZoologicalName name1 = CdmBase.deproxy(taxon1.getName(), ZoologicalName.class);
|
322
|
ZoologicalName name2 = CdmBase.deproxy(taxon2.getName(), ZoologicalName.class);
|
323
|
if (!name1.getRank().equals(name2.getRank())){
|
324
|
return false;
|
325
|
}
|
326
|
if (! name1.getTitleCache().equals(name2.getTitleCache())){
|
327
|
return false;
|
328
|
}
|
329
|
return true;
|
330
|
}
|
331
|
|
332
|
|
333
|
|
334
|
|
335
|
private Taxon getParent(Taxon child, Classification classification) {
|
336
|
for (TaxonNode node : child.getTaxonNodes()){
|
337
|
if (node.getClassification().equals(classification)){
|
338
|
if (node.getParent() != null){
|
339
|
return node.getParent().getTaxon();
|
340
|
}else{
|
341
|
return null;
|
342
|
}
|
343
|
}
|
344
|
}
|
345
|
return null;
|
346
|
}
|
347
|
|
348
|
|
349
|
|
350
|
|
351
|
private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap) {
|
352
|
if (isBlank(uninomial)){
|
353
|
return null;
|
354
|
}
|
355
|
|
356
|
String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
|
357
|
|
358
|
String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
|
359
|
Taxon taxon = taxonMap.get(key);
|
360
|
if (taxon == null){
|
361
|
ZoologicalName name = ZoologicalName.NewInstance(rank);
|
362
|
name.setGenusOrUninomial(uninomial);
|
363
|
if (isNotBlank(infraGenericEpi)){
|
364
|
name.setInfraGenericEpithet(infraGenericEpi);
|
365
|
}
|
366
|
taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
|
367
|
|
368
|
taxonMap.put(key, taxon);
|
369
|
handleAuthorAndYear(author, name);
|
370
|
getTaxonService().save(taxon);
|
371
|
}
|
372
|
|
373
|
return taxon;
|
374
|
}
|
375
|
|
376
|
|
377
|
//fast and dirty is enough here
|
378
|
private Classification classification;
|
379
|
|
380
|
private Classification getClassification(GlobisImportState state) {
|
381
|
if (this.classification == null){
|
382
|
String name = state.getConfig().getClassificationName();
|
383
|
Reference<?> reference = state.getTransactionalSourceReference();
|
384
|
this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
|
385
|
classification.setUuid(state.getConfig().getClassificationUuid());
|
386
|
getClassificationService().save(classification);
|
387
|
}
|
388
|
return this.classification;
|
389
|
|
390
|
}
|
391
|
|
392
|
/* (non-Javadoc)
|
393
|
* @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
|
394
|
*/
|
395
|
public Taxon createObject(ResultSet rs, GlobisImportState state)
|
396
|
throws SQLException {
|
397
|
String speciesEpi = rs.getString("dtSpcSpcakt");
|
398
|
String subGenusEpi = rs.getString("dtSpcSubgenakt");
|
399
|
String genusEpi = rs.getString("dtSpcGenusakt");
|
400
|
String author = rs.getString("dtSpcAutor");
|
401
|
|
402
|
|
403
|
ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
|
404
|
zooName.setSpecificEpithet(speciesEpi);
|
405
|
if (StringUtils.isNotBlank(subGenusEpi)){
|
406
|
zooName.setInfraGenericEpithet(subGenusEpi);
|
407
|
}
|
408
|
zooName.setGenusOrUninomial(genusEpi);
|
409
|
handleAuthorAndYear(author, zooName);
|
410
|
|
411
|
Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
|
412
|
|
413
|
return taxon;
|
414
|
}
|
415
|
|
416
|
|
417
|
|
418
|
|
419
|
|
420
|
/* (non-Javadoc)
|
421
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
|
422
|
*/
|
423
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
|
424
|
String nameSpace;
|
425
|
Class cdmClass;
|
426
|
Set<String> idSet;
|
427
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
|
428
|
try{
|
429
|
Set<String> taxonIdSet = new HashSet<String>();
|
430
|
|
431
|
while (rs.next()){
|
432
|
// handleForeignKey(rs, taxonIdSet, "taxonId");
|
433
|
}
|
434
|
|
435
|
//taxon map
|
436
|
nameSpace = TAXON_NAMESPACE;
|
437
|
cdmClass = Taxon.class;
|
438
|
idSet = taxonIdSet;
|
439
|
Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
440
|
result.put(nameSpace, objectMap);
|
441
|
|
442
|
|
443
|
} catch (SQLException e) {
|
444
|
throw new RuntimeException(e);
|
445
|
}
|
446
|
return result;
|
447
|
}
|
448
|
|
449
|
/* (non-Javadoc)
|
450
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
|
451
|
*/
|
452
|
@Override
|
453
|
protected boolean doCheck(GlobisImportState state){
|
454
|
IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
|
455
|
return validator.validate(state);
|
456
|
}
|
457
|
|
458
|
|
459
|
/* (non-Javadoc)
|
460
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
|
461
|
*/
|
462
|
protected boolean isIgnore(GlobisImportState state){
|
463
|
return ! state.getConfig().isDoCurrentTaxa();
|
464
|
}
|
465
|
|
466
|
|
467
|
|
468
|
|
469
|
|
470
|
}
|