1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.globis;
|
11
|
|
12
|
import java.sql.ResultSet;
|
13
|
import java.sql.SQLException;
|
14
|
import java.util.HashMap;
|
15
|
import java.util.HashSet;
|
16
|
import java.util.Map;
|
17
|
import java.util.Set;
|
18
|
|
19
|
import org.apache.commons.lang.StringUtils;
|
20
|
import org.apache.log4j.Logger;
|
21
|
import org.springframework.stereotype.Component;
|
22
|
|
23
|
import com.yourkit.util.Strings;
|
24
|
|
25
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
26
|
import eu.etaxonomy.cdm.io.algaterra.AlgaTerraCollectionImport;
|
27
|
import eu.etaxonomy.cdm.io.algaterra.AlgaTerraSpecimenImport;
|
28
|
import eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelTaxonImport;
|
29
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
30
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
31
|
import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
|
32
|
import eu.etaxonomy.cdm.io.globis.validation.GlobisCurrentSpeciesImportValidator;
|
33
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
34
|
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
|
35
|
import eu.etaxonomy.cdm.model.common.Language;
|
36
|
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
|
37
|
import eu.etaxonomy.cdm.model.description.Distribution;
|
38
|
import eu.etaxonomy.cdm.model.description.PresenceTerm;
|
39
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
40
|
import eu.etaxonomy.cdm.model.location.NamedArea;
|
41
|
import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
|
42
|
import eu.etaxonomy.cdm.model.name.Rank;
|
43
|
import eu.etaxonomy.cdm.model.name.ZoologicalName;
|
44
|
import eu.etaxonomy.cdm.model.occurrence.Collection;
|
45
|
import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
|
46
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
47
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
48
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
49
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
50
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
51
|
import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
|
52
|
import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
|
53
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
54
|
|
55
|
|
56
|
/**
|
57
|
* @author a.mueller
|
58
|
* @created 20.02.2010
|
59
|
* @version 1.0
|
60
|
*/
|
61
|
@Component
|
62
|
public class GlobisCurrentSpeciesImport extends GlobisImportBase<Taxon> {
|
63
|
private static final Logger logger = Logger.getLogger(GlobisCurrentSpeciesImport.class);
|
64
|
|
65
|
private int modCount = 10000;
|
66
|
private static final String pluralString = "current taxa";
|
67
|
private static final String dbTableName = "current_species";
|
68
|
private static final Class cdmTargetClass = Taxon.class; //not needed
|
69
|
|
70
|
public GlobisCurrentSpeciesImport(){
|
71
|
super(pluralString, dbTableName, cdmTargetClass);
|
72
|
}
|
73
|
|
74
|
|
75
|
|
76
|
|
77
|
/* (non-Javadoc)
|
78
|
* @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
|
79
|
*/
|
80
|
@Override
|
81
|
protected String getIdQuery() {
|
82
|
String strRecordQuery =
|
83
|
" SELECT IDcurrentspec " +
|
84
|
" FROM " + dbTableName;
|
85
|
return strRecordQuery;
|
86
|
}
|
87
|
|
88
|
|
89
|
|
90
|
|
91
|
/* (non-Javadoc)
|
92
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
|
93
|
*/
|
94
|
@Override
|
95
|
protected String getRecordQuery(GlobisImportConfigurator config) {
|
96
|
String strRecordQuery =
|
97
|
" SELECT cs.*, cs.dtSpcEingabedatum as Created_When, cs.dtSpcErfasser as Created_Who," +
|
98
|
" cs.dtSpcBearbeiter as Updated_who, cs.dtSpcAendrgdatum as Updated_When, cs.dtSpcBemerkung as Notes " +
|
99
|
" FROM " + getTableName() + " cs " +
|
100
|
" WHERE ( cs.IDcurrentspec IN (" + ID_LIST_TOKEN + ") )";
|
101
|
return strRecordQuery;
|
102
|
}
|
103
|
|
104
|
|
105
|
|
106
|
/* (non-Javadoc)
|
107
|
* @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
|
108
|
*/
|
109
|
@Override
|
110
|
public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
|
111
|
boolean success = true;
|
112
|
|
113
|
Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
|
114
|
|
115
|
Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
|
116
|
// Map<String, DerivedUnit> ecoFactDerivedUnitMap = (Map<String, DerivedUnit>) partitioner.getObjectMap(ECO_FACT_DERIVED_UNIT_NAMESPACE);
|
117
|
|
118
|
ResultSet rs = partitioner.getResultSet();
|
119
|
|
120
|
Classification classification = getClassification(state);
|
121
|
|
122
|
try {
|
123
|
|
124
|
int i = 0;
|
125
|
|
126
|
//for each reference
|
127
|
while (rs.next()){
|
128
|
|
129
|
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
|
130
|
|
131
|
Integer taxonId = rs.getInt("IDcurrentspec");
|
132
|
|
133
|
|
134
|
//String dtSpcJahr -> ignore !
|
135
|
//empty: fiSpcLiteratur
|
136
|
|
137
|
//TODO
|
138
|
//fiSpcspcgrptax
|
139
|
|
140
|
|
141
|
|
142
|
try {
|
143
|
|
144
|
//source ref
|
145
|
Reference<?> sourceRef = state.getTransactionalSourceReference();
|
146
|
Taxon nextHigherTaxon = null;
|
147
|
|
148
|
boolean hasNewParent = false; //true if any parent is new
|
149
|
|
150
|
//species
|
151
|
Taxon species = createObject(rs, state);
|
152
|
|
153
|
|
154
|
String familyStr = rs.getString("dtSpcFamakt");
|
155
|
String subFamilyStr = rs.getString("dtSpcSubfamakt");
|
156
|
String tribeStr = rs.getString("dtSpcTribakt");
|
157
|
|
158
|
//family
|
159
|
Taxon family = getTaxon(state, rs, familyStr, null, Rank.FAMILY(), null, taxonMap);
|
160
|
|
161
|
//subfamily
|
162
|
Taxon subFamily = getTaxon(state, rs, subFamilyStr, null, Rank.SUBFAMILY(), null, taxonMap);
|
163
|
Taxon subFamilyParent = getParent(subFamily, classification);
|
164
|
if (subFamilyParent != null){
|
165
|
if (! compareTaxa(family, subFamilyParent)){
|
166
|
logger.warn("Current family and parent of subfamily are not equal: " + taxonId);
|
167
|
}
|
168
|
}else{
|
169
|
classification.addParentChild(family, subFamily, sourceRef, null);
|
170
|
}
|
171
|
nextHigherTaxon = subFamily;
|
172
|
|
173
|
//tribe
|
174
|
Taxon tribe = getTaxon(state, rs, tribeStr, null, Rank.TRIBE(), null, taxonMap);
|
175
|
if (tribe != null){
|
176
|
Taxon tribeParent = getParent(tribe, classification);
|
177
|
if (tribeParent != null){
|
178
|
if (! compareTaxa(subFamily, tribeParent)){
|
179
|
logger.warn("Current subFamily and parent of tribe are not equal: " + taxonId);
|
180
|
}
|
181
|
}else{
|
182
|
classification.addParentChild(subFamily, tribe, sourceRef, null);
|
183
|
}
|
184
|
nextHigherTaxon = tribe;
|
185
|
}
|
186
|
|
187
|
|
188
|
//genus
|
189
|
String genusStr = rs.getString("dtSpcGenusakt");
|
190
|
String genusAuthorStr = rs.getString("dtSpcGenusaktauthor");
|
191
|
Taxon genus = getTaxon(state, rs, genusStr, null, Rank.GENUS(), genusAuthorStr, taxonMap);
|
192
|
Taxon genusParent = getParent(genus, classification);
|
193
|
|
194
|
if (genusParent != null){
|
195
|
if (! compareTaxa(genusParent, nextHigherTaxon)){
|
196
|
logger.warn("Current tribe/subfamily and parent of genus are not equal: " + taxonId);
|
197
|
}
|
198
|
}else{
|
199
|
classification.addParentChild(nextHigherTaxon, genus, sourceRef, null);
|
200
|
}
|
201
|
nextHigherTaxon = genus;
|
202
|
|
203
|
//subgenus
|
204
|
String subGenusStr = CdmBase.deproxy(species.getName(), ZoologicalName.class).getInfraGenericEpithet();
|
205
|
String subGenusAuthorStr = rs.getString("dtSpcSubgenaktauthor");
|
206
|
boolean hasSubgenus = StringUtils.isNotBlank(subGenusStr) || StringUtils.isNotBlank(subGenusAuthorStr);
|
207
|
if (hasSubgenus){
|
208
|
Taxon subGenus = getTaxon(state, rs, genusStr, subGenusStr, Rank.SUBGENUS(), subGenusAuthorStr, taxonMap);
|
209
|
classification.addParentChild(nextHigherTaxon, subGenus, sourceRef, null);
|
210
|
nextHigherTaxon = subGenus;
|
211
|
}
|
212
|
|
213
|
classification.addParentChild(nextHigherTaxon, species, sourceRef, null);
|
214
|
|
215
|
handleCountries(state, rs, species);
|
216
|
|
217
|
handleCommonNames(state, rs, species);
|
218
|
|
219
|
this.doIdCreatedUpdatedNotes(state, species, rs, taxonId, TAXON_NAMESPACE);
|
220
|
|
221
|
objectsToSave.add(species);
|
222
|
|
223
|
|
224
|
} catch (Exception e) {
|
225
|
logger.warn("Exception in current_species: IDcurrentspec " + taxonId + ". " + e.getMessage());
|
226
|
// e.printStackTrace();
|
227
|
}
|
228
|
|
229
|
}
|
230
|
|
231
|
// logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
|
232
|
|
233
|
logger.warn(pluralString + " to save: " + objectsToSave.size());
|
234
|
getTaxonService().save(objectsToSave);
|
235
|
|
236
|
return success;
|
237
|
} catch (SQLException e) {
|
238
|
logger.error("SQLException:" + e);
|
239
|
return false;
|
240
|
}
|
241
|
}
|
242
|
|
243
|
private void handleCountries(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
|
244
|
String countriesStr = rs.getString("dtSpcCountries");
|
245
|
if (isBlank(countriesStr)){
|
246
|
return;
|
247
|
}
|
248
|
String[] countriesSplit = countriesStr.split(";");
|
249
|
for (String countryStr : countriesSplit){
|
250
|
if (isBlank(countryStr)){
|
251
|
continue;
|
252
|
}
|
253
|
countryStr = countryStr.trim();
|
254
|
|
255
|
//TODO use isComplete
|
256
|
boolean isComplete = countryStr.endsWith(".");
|
257
|
if (isComplete){
|
258
|
countryStr = countryStr.substring(0,countryStr.length() - 1).trim();
|
259
|
}
|
260
|
boolean isDoubtful = countryStr.endsWith("[?]");
|
261
|
if (isDoubtful){
|
262
|
countryStr = countryStr.substring(0,countryStr.length() - 3).trim();
|
263
|
}
|
264
|
if (countryStr.startsWith("?")){
|
265
|
isDoubtful = true;
|
266
|
countryStr = countryStr.substring(1).trim();
|
267
|
}
|
268
|
|
269
|
|
270
|
|
271
|
countryStr = normalizeCountry(countryStr);
|
272
|
|
273
|
WaterbodyOrCountry country = getCountry(state, countryStr);
|
274
|
|
275
|
PresenceTerm status;
|
276
|
if (isDoubtful){
|
277
|
status = PresenceTerm.PRESENT_DOUBTFULLY();
|
278
|
}else{
|
279
|
status = PresenceTerm.PRESENT();
|
280
|
}
|
281
|
|
282
|
if (country != null){
|
283
|
TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
|
284
|
Distribution distribution = Distribution.NewInstance(country, status);
|
285
|
desc.addElement(distribution);
|
286
|
}else{
|
287
|
logger.warn("Country string not recognized: " + countryStr);
|
288
|
}
|
289
|
}
|
290
|
}
|
291
|
|
292
|
|
293
|
|
294
|
/**
|
295
|
* @param countryStr
|
296
|
* @return
|
297
|
*/
|
298
|
private String normalizeCountry(String countryStr) {
|
299
|
String result = countryStr.trim();
|
300
|
if (result.endsWith(".")){
|
301
|
result = result.substring(0,result.length() - 1);
|
302
|
}
|
303
|
return result;
|
304
|
}
|
305
|
|
306
|
private void handleCommonNames(GlobisImportState state, ResultSet rs, Taxon species) throws SQLException {
|
307
|
//DON't use, use seperate common name tables instead
|
308
|
|
309
|
// String commonNamesStr = rs.getString("vernacularnames");
|
310
|
// if (isBlank(commonNamesStr)){
|
311
|
// return;
|
312
|
// }
|
313
|
// String[] commonNamesSplit = commonNamesStr.split(";");
|
314
|
// for (String commonNameStr : commonNamesSplit){
|
315
|
// if (isBlank(commonNameStr)){
|
316
|
// continue;
|
317
|
// }
|
318
|
// Language language = null; //TODO
|
319
|
// CommonTaxonName commonName = CommonTaxonName.NewInstance(commonNameStr, language);
|
320
|
// TaxonDescription desc = getTaxonDescription(species, state.getTransactionalSourceReference(), false, true);
|
321
|
// desc.addElement(commonName);
|
322
|
// }
|
323
|
}
|
324
|
|
325
|
|
326
|
|
327
|
|
328
|
/**
|
329
|
* Compares 2 taxa, returns true of both taxa look similar
|
330
|
* @param genus
|
331
|
* @param nextHigherTaxon
|
332
|
* @return
|
333
|
*/
|
334
|
private boolean compareTaxa(Taxon taxon1, Taxon taxon2) {
|
335
|
ZoologicalName name1 = CdmBase.deproxy(taxon1.getName(), ZoologicalName.class);
|
336
|
ZoologicalName name2 = CdmBase.deproxy(taxon2.getName(), ZoologicalName.class);
|
337
|
if (!name1.getRank().equals(name2.getRank())){
|
338
|
return false;
|
339
|
}
|
340
|
if (! name1.getTitleCache().equals(name2.getTitleCache())){
|
341
|
return false;
|
342
|
}
|
343
|
return true;
|
344
|
}
|
345
|
|
346
|
|
347
|
|
348
|
|
349
|
private Taxon getParent(Taxon child, Classification classification) {
|
350
|
for (TaxonNode node : child.getTaxonNodes()){
|
351
|
if (node.getClassification().equals(classification)){
|
352
|
if (node.getParent() != null){
|
353
|
return node.getParent().getTaxon();
|
354
|
}else{
|
355
|
return null;
|
356
|
}
|
357
|
}
|
358
|
}
|
359
|
return null;
|
360
|
}
|
361
|
|
362
|
|
363
|
|
364
|
|
365
|
private Taxon getTaxon(GlobisImportState state, ResultSet rs, String uninomial, String infraGenericEpi, Rank rank, String author, Map<String, Taxon> taxonMap) {
|
366
|
if (isBlank(uninomial)){
|
367
|
return null;
|
368
|
}
|
369
|
|
370
|
String keyEpithet = StringUtils.isNotBlank(infraGenericEpi)? infraGenericEpi : uninomial ;
|
371
|
|
372
|
String key = keyEpithet + "@" + CdmUtils.Nz(author) + "@" + rank.getTitleCache();
|
373
|
Taxon taxon = taxonMap.get(key);
|
374
|
if (taxon == null){
|
375
|
ZoologicalName name = ZoologicalName.NewInstance(rank);
|
376
|
name.setGenusOrUninomial(uninomial);
|
377
|
if (isNotBlank(infraGenericEpi)){
|
378
|
name.setInfraGenericEpithet(infraGenericEpi);
|
379
|
}
|
380
|
taxon = Taxon.NewInstance(name, state.getTransactionalSourceReference());
|
381
|
|
382
|
taxonMap.put(key, taxon);
|
383
|
handleAuthorAndYear(author, name);
|
384
|
getTaxonService().save(taxon);
|
385
|
}
|
386
|
|
387
|
return taxon;
|
388
|
}
|
389
|
|
390
|
|
391
|
//fast and dirty is enough here
|
392
|
private Classification classification;
|
393
|
|
394
|
private Classification getClassification(GlobisImportState state) {
|
395
|
if (this.classification == null){
|
396
|
String name = state.getConfig().getClassificationName();
|
397
|
Reference<?> reference = state.getTransactionalSourceReference();
|
398
|
this.classification = Classification.NewInstance(name, reference, Language.DEFAULT());
|
399
|
classification.setUuid(state.getConfig().getClassificationUuid());
|
400
|
getClassificationService().save(classification);
|
401
|
}
|
402
|
return this.classification;
|
403
|
|
404
|
}
|
405
|
|
406
|
/* (non-Javadoc)
|
407
|
* @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
|
408
|
*/
|
409
|
public Taxon createObject(ResultSet rs, GlobisImportState state)
|
410
|
throws SQLException {
|
411
|
String speciesEpi = rs.getString("dtSpcSpcakt");
|
412
|
String subGenusEpi = rs.getString("dtSpcSubgenakt");
|
413
|
String genusEpi = rs.getString("dtSpcGenusakt");
|
414
|
String author = rs.getString("dtSpcAutor");
|
415
|
|
416
|
|
417
|
ZoologicalName zooName = ZoologicalName.NewInstance(Rank.SPECIES());
|
418
|
zooName.setSpecificEpithet(speciesEpi);
|
419
|
if (StringUtils.isNotBlank(subGenusEpi)){
|
420
|
zooName.setInfraGenericEpithet(subGenusEpi);
|
421
|
}
|
422
|
zooName.setGenusOrUninomial(genusEpi);
|
423
|
handleAuthorAndYear(author, zooName);
|
424
|
|
425
|
Taxon taxon = Taxon.NewInstance(zooName, state.getTransactionalSourceReference());
|
426
|
|
427
|
return taxon;
|
428
|
}
|
429
|
|
430
|
|
431
|
|
432
|
|
433
|
|
434
|
/* (non-Javadoc)
|
435
|
* @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
|
436
|
*/
|
437
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
|
438
|
String nameSpace;
|
439
|
Class cdmClass;
|
440
|
Set<String> idSet;
|
441
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
|
442
|
try{
|
443
|
Set<String> taxonIdSet = new HashSet<String>();
|
444
|
|
445
|
while (rs.next()){
|
446
|
// handleForeignKey(rs, taxonIdSet, "taxonId");
|
447
|
}
|
448
|
|
449
|
//taxon map
|
450
|
nameSpace = TAXON_NAMESPACE;
|
451
|
cdmClass = Taxon.class;
|
452
|
idSet = taxonIdSet;
|
453
|
Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
454
|
result.put(nameSpace, objectMap);
|
455
|
|
456
|
|
457
|
} catch (SQLException e) {
|
458
|
throw new RuntimeException(e);
|
459
|
}
|
460
|
return result;
|
461
|
}
|
462
|
|
463
|
/* (non-Javadoc)
|
464
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
|
465
|
*/
|
466
|
@Override
|
467
|
protected boolean doCheck(GlobisImportState state){
|
468
|
IOValidator<GlobisImportState> validator = new GlobisCurrentSpeciesImportValidator();
|
469
|
return validator.validate(state);
|
470
|
}
|
471
|
|
472
|
|
473
|
/* (non-Javadoc)
|
474
|
* @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
|
475
|
*/
|
476
|
protected boolean isIgnore(GlobisImportState state){
|
477
|
return ! state.getConfig().isDoCurrentTaxa();
|
478
|
}
|
479
|
|
480
|
|
481
|
|
482
|
|
483
|
|
484
|
}
|