1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.berlinModel.in;
|
11
|
|
12
|
import java.sql.ResultSet;
|
13
|
import java.sql.SQLException;
|
14
|
import java.util.ArrayList;
|
15
|
import java.util.HashMap;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
import java.util.Set;
|
20
|
|
21
|
import org.apache.commons.lang.StringUtils;
|
22
|
import org.apache.log4j.Logger;
|
23
|
import org.springframework.stereotype.Component;
|
24
|
|
25
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
26
|
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
|
27
|
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
|
28
|
import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelOccurrenceImportValidator;
|
29
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
30
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
31
|
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider;
|
32
|
import eu.etaxonomy.cdm.model.common.Annotation;
|
33
|
import eu.etaxonomy.cdm.model.common.AnnotationType;
|
34
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
35
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
36
|
import eu.etaxonomy.cdm.model.common.Language;
|
37
|
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
38
|
import eu.etaxonomy.cdm.model.description.Distribution;
|
39
|
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
|
40
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
41
|
import eu.etaxonomy.cdm.model.location.NamedArea;
|
42
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
43
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
44
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
45
|
import eu.etaxonomy.cdm.model.term.OrderedTermVocabulary;
|
46
|
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
|
47
|
|
48
|
|
49
|
/**
|
50
|
* @author a.mueller
|
51
|
* @since 20.03.2008
|
52
|
*/
|
53
|
@Component
|
54
|
public class BerlinModelOccurrenceImport extends BerlinModelImportBase {
|
55
|
|
56
|
private static final long serialVersionUID = -7918122767284077183L;
|
57
|
|
58
|
private static final Logger logger = Logger.getLogger(BerlinModelOccurrenceImport.class);
|
59
|
|
60
|
public static final String NAMESPACE = "Occurrence";
|
61
|
|
62
|
private static int modCount = 5000;
|
63
|
private static final String pluralString = "occurrences";
|
64
|
private static final String dbTableName = "emOccurrence"; //??
|
65
|
|
66
|
public BerlinModelOccurrenceImport(){
|
67
|
super(dbTableName, pluralString);
|
68
|
}
|
69
|
|
70
|
@Override
|
71
|
protected String getIdQuery(BerlinModelImportState state) {
|
72
|
String result = " SELECT occurrenceId FROM " + getTableName();
|
73
|
if (StringUtils.isNotBlank(state.getConfig().getOccurrenceFilter())){
|
74
|
result += " WHERE " + state.getConfig().getOccurrenceFilter();
|
75
|
}
|
76
|
return result;
|
77
|
}
|
78
|
|
79
|
@Override
|
80
|
protected String getRecordQuery(BerlinModelImportConfigurator config) {
|
81
|
String emCode = config.isIncludesAreaEmCode()? ", ar.EMCode" : "";
|
82
|
String strQuery = //DISTINCT because otherwise emOccurrenceSource creates multiple records for a single distribution
|
83
|
" SELECT DISTINCT pt.RIdentifier AS taxonId, occ.OccurrenceId, occ.Native, occ.Introduced, " +
|
84
|
" occ.Cultivated, occ.StatusUnknown, occ.WorldDistCompl, occ.Notes occNotes, " +
|
85
|
" sumcat.emOccurSumCatId, sumcat.Short, sumcat.Description, " +
|
86
|
" sumcat.OutputCode, ar.AreaId, ar.TDWGCode " + emCode +
|
87
|
" FROM emOccurrence occ " +
|
88
|
" INNER JOIN emArea ar ON occ.AreaFk = ar.AreaId " +
|
89
|
" INNER JOIN PTaxon pt ON occ.PTNameFk = pt.PTNameFk AND occ.PTRefFk = pt.PTRefFk " +
|
90
|
" LEFT OUTER JOIN emOccurSumCat sumcat ON occ.SummaryStatus = sumcat.emOccurSumCatId " +
|
91
|
" LEFT OUTER JOIN emOccurrenceSource ocs ON occ.OccurrenceId = ocs.OccurrenceFk " +
|
92
|
" WHERE (occ.OccurrenceId IN (" + ID_LIST_TOKEN + ") )" +
|
93
|
" ORDER BY pt.RIdentifier";
|
94
|
return strQuery;
|
95
|
}
|
96
|
|
97
|
|
98
|
@Override
|
99
|
public void doInvoke(BerlinModelImportState state) {
|
100
|
super.doInvoke(state);
|
101
|
}
|
102
|
|
103
|
private NamedArea getAreaByAreaId(int areaId) {
|
104
|
NamedArea result = null;
|
105
|
String areaIdStr = String.valueOf(areaId);
|
106
|
OrderedTermVocabulary<NamedArea> voc = getAreaVoc();
|
107
|
for (NamedArea area : voc.getTerms()){
|
108
|
for (IdentifiableSource source : area.getSources()){
|
109
|
if (areaIdStr.equals(source.getIdInSource()) && BerlinModelAreaImport.NAMESPACE.equals(source.getIdNamespace())){
|
110
|
if (result != null){
|
111
|
logger.warn("Result for areaId already exists. areaId: " + areaId);
|
112
|
}
|
113
|
result = area;
|
114
|
}
|
115
|
}
|
116
|
}
|
117
|
return result;
|
118
|
}
|
119
|
|
120
|
private OrderedTermVocabulary<NamedArea> areaVoc;
|
121
|
@SuppressWarnings("unchecked")
|
122
|
private OrderedTermVocabulary<NamedArea> getAreaVoc(){
|
123
|
if (areaVoc == null){
|
124
|
areaVoc = (OrderedTermVocabulary<NamedArea>)getVocabularyService().find(BerlinModelTransformer.uuidVocEuroMedAreas);
|
125
|
}
|
126
|
return areaVoc;
|
127
|
}
|
128
|
|
129
|
|
130
|
private String nullSafeTrim(String string) {
|
131
|
if (string == null){
|
132
|
return null;
|
133
|
}else{
|
134
|
return string.trim();
|
135
|
}
|
136
|
}
|
137
|
|
138
|
@Override
|
139
|
public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
|
140
|
boolean success = true;
|
141
|
@SuppressWarnings("rawtypes")
|
142
|
Set<TaxonBase> taxaToSave = new HashSet<>();
|
143
|
|
144
|
@SuppressWarnings("unchecked")
|
145
|
Map<String, TaxonBase<?>> taxonMap = partitioner.getObjectMap(BerlinModelTaxonImport.NAMESPACE);
|
146
|
|
147
|
ResultSet rs = partitioner.getResultSet();
|
148
|
|
149
|
try {
|
150
|
//map to store the mapping of duplicate berlin model occurrences to their real distributions
|
151
|
//duplicated may occur due to area mappings from BM areas to TDWG areas
|
152
|
Map<Integer, String> duplicateMap = new HashMap<>();
|
153
|
int oldTaxonId = -1;
|
154
|
TaxonDescription oldDescription = null;
|
155
|
int i = 0;
|
156
|
int countDescriptions = 0;
|
157
|
int countDistributions = 0;
|
158
|
int countDuplicates = 0;
|
159
|
//for each reference
|
160
|
while (rs.next()){
|
161
|
|
162
|
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("Facts handled: " + (i-1));}
|
163
|
|
164
|
int occurrenceId = rs.getInt("OccurrenceId");
|
165
|
int newTaxonId = rs.getInt("taxonId");
|
166
|
String notes = nullSafeTrim(rs.getString("occNotes"));
|
167
|
|
168
|
Integer emStatusId = nullSafeInt(rs, "emOccurSumCatId");
|
169
|
|
170
|
try {
|
171
|
//area(s)
|
172
|
List<NamedArea> areas = makeAreaList(state, partitioner, rs, occurrenceId);
|
173
|
if (areas.size() != 1){
|
174
|
logger.warn("Exactly 1 area expected but was " + areas.size() + ". OccId: " + occurrenceId);
|
175
|
if (areas.isEmpty()){
|
176
|
continue;
|
177
|
}
|
178
|
}
|
179
|
|
180
|
//status
|
181
|
PresenceAbsenceTerm status = null;
|
182
|
String alternativeStatusString = null;
|
183
|
if (emStatusId != null){
|
184
|
status = BerlinModelTransformer.occStatus2PresenceAbsence(emStatusId);
|
185
|
}else{
|
186
|
//EM
|
187
|
if (state.getConfig().isEuroMed() && areas.get(0).getUuid().equals(BerlinModelTransformer.uuidEM)){
|
188
|
String complete = rs.getString("WorldDistCompl");
|
189
|
if (complete == null){
|
190
|
//FIXME
|
191
|
status = PresenceAbsenceTerm.ENDEMISM_UNKNOWN();
|
192
|
alternativeStatusString = getStatusAnnotation(rs);
|
193
|
}else if (complete.equals("C")){
|
194
|
status = PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA();
|
195
|
logger.warn("EmStatusId undefined though WorldDistCompl is 'C'. This is an unexpected state. OccID: " + occurrenceId);
|
196
|
}else if (complete.equals("I")){
|
197
|
status = PresenceAbsenceTerm.NOT_ENDEMIC_FOR_THE_RELEVANT_AREA();
|
198
|
}else{
|
199
|
status = PresenceAbsenceTerm.ENDEMISM_UNKNOWN();
|
200
|
alternativeStatusString = getStatusAnnotation(rs);
|
201
|
}
|
202
|
}else{ //other areas
|
203
|
alternativeStatusString = getStatusAnnotation(rs);
|
204
|
status = getPresenceTerm(state, BerlinModelTransformer.uuidStatusUndefined, "Undefined", "Undefined status as status was not computed in Berlin Model", "none", false, null);
|
205
|
}
|
206
|
}
|
207
|
|
208
|
Reference sourceRef = state.getTransactionalSourceReference();
|
209
|
|
210
|
|
211
|
//create description(elements)
|
212
|
TaxonDescription taxonDescription = getTaxonDescription(newTaxonId, oldTaxonId, oldDescription, taxonMap, occurrenceId, sourceRef);
|
213
|
for (NamedArea area : areas){
|
214
|
Distribution distribution = Distribution.NewInstance(area, status);
|
215
|
if (StringUtils.isNotBlank(alternativeStatusString)){
|
216
|
AnnotationType type = getAnnotationType(state, BerlinModelTransformer.uuidAnnoTypeDistributionStatus, "Original distribution status", "Original distribution status", null, null);
|
217
|
Annotation annotation = Annotation.NewInstance(alternativeStatusString, type, null);
|
218
|
distribution.addAnnotation(annotation);
|
219
|
}
|
220
|
|
221
|
// distribution.setCitation(sourceRef);
|
222
|
if (taxonDescription != null) {
|
223
|
Distribution duplicate = checkIsNoDuplicate(taxonDescription, distribution, duplicateMap , occurrenceId);
|
224
|
if (duplicate == null){
|
225
|
taxonDescription.addElement(distribution);
|
226
|
distribution.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
|
227
|
countDistributions++;
|
228
|
if (taxonDescription != oldDescription){
|
229
|
taxaToSave.add(taxonDescription.getTaxon());
|
230
|
oldDescription = taxonDescription;
|
231
|
countDescriptions++;
|
232
|
}
|
233
|
}else{
|
234
|
countDuplicates++;
|
235
|
duplicate.addImportSource(String.valueOf(occurrenceId), NAMESPACE, state.getTransactionalSourceReference(), null);
|
236
|
logger.info("Distribution is duplicate"); }
|
237
|
} else {
|
238
|
logger.warn("Distribution " + area.getLabel() + " ignored. OccurrenceId = " + occurrenceId);
|
239
|
success = false;
|
240
|
}
|
241
|
//notes
|
242
|
if (isNotBlank(notes)){
|
243
|
Annotation annotation = Annotation.NewInstance(notes, Language.DEFAULT());
|
244
|
distribution.addAnnotation(annotation);
|
245
|
}
|
246
|
}
|
247
|
} catch (UnknownCdmTypeException e) {
|
248
|
logger.error("Unknown presenceAbsence status id: " + emStatusId);
|
249
|
e.printStackTrace();
|
250
|
success = false;
|
251
|
}
|
252
|
}
|
253
|
|
254
|
logger.info("Distributions: " + countDistributions + ", Descriptions: " + countDescriptions );
|
255
|
logger.info("Duplicate occurrences: " + (countDuplicates));
|
256
|
|
257
|
logger.info("Taxa to save: " + taxaToSave.size());
|
258
|
getTaxonService().save(taxaToSave);
|
259
|
|
260
|
return success;
|
261
|
} catch (SQLException e) {
|
262
|
logger.error("SQLException:" + e);
|
263
|
return false;
|
264
|
}
|
265
|
}
|
266
|
|
267
|
/**
|
268
|
* @param rs
|
269
|
* @return
|
270
|
* @throws SQLException
|
271
|
*/
|
272
|
protected String getStatusAnnotation(ResultSet rs) throws SQLException {
|
273
|
String alternativeStatusString;
|
274
|
String[] stringArray = new String[]{"Native: " + rs.getString("Native"), "Introduced: "+ rs.getString("Introduced"),
|
275
|
"Cultivated: " + rs.getString("Cultivated"), "StatusUnknown: " + rs.getString("StatusUnknown"),
|
276
|
"WorldDistCompl: " + rs.getString("WorldDistCompl")};
|
277
|
alternativeStatusString = CdmUtils.concat("; ", stringArray);
|
278
|
return alternativeStatusString;
|
279
|
}
|
280
|
|
281
|
/**
|
282
|
* @param state
|
283
|
* @param partitioner
|
284
|
* @param rs
|
285
|
* @param occurrenceId
|
286
|
* @param tdwgCodeString
|
287
|
* @param emCodeString
|
288
|
* @return
|
289
|
* @throws SQLException
|
290
|
*/
|
291
|
//Create area list
|
292
|
private List<NamedArea> makeAreaList(BerlinModelImportState state,
|
293
|
@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner,
|
294
|
ResultSet rs, int occurrenceId) throws SQLException {
|
295
|
|
296
|
List<NamedArea> areas = new ArrayList<>();
|
297
|
|
298
|
if (state.getConfig().isUseEmAreaVocabulary()){
|
299
|
Integer areaId = rs.getInt("AreaId");
|
300
|
NamedArea area = getAreaByAreaId(areaId);
|
301
|
if (area == null){
|
302
|
logger.warn("Area for areaId " + areaId + " not found.");
|
303
|
}
|
304
|
areas.add(area);
|
305
|
}else{
|
306
|
String tdwgCodeString = rs.getString("TDWGCode");
|
307
|
String emCodeString = state.getConfig().isIncludesAreaEmCode() ? rs.getString("EMCode") : null;
|
308
|
|
309
|
if (tdwgCodeString != null){
|
310
|
|
311
|
String[] tdwgCodes = new String[]{tdwgCodeString};
|
312
|
if (state.getConfig().isSplitTdwgCodes()){
|
313
|
tdwgCodes = tdwgCodeString.split(";");
|
314
|
}
|
315
|
|
316
|
for (String tdwgCode : tdwgCodes){
|
317
|
NamedArea area = TdwgAreaProvider.getAreaByTdwgAbbreviation(tdwgCode.trim());
|
318
|
if (area == null){
|
319
|
area = getOtherAreas(state, emCodeString, tdwgCodeString);
|
320
|
}
|
321
|
if (area != null){
|
322
|
areas.add(area);
|
323
|
}
|
324
|
}
|
325
|
}
|
326
|
|
327
|
if (areas.size()== 0){
|
328
|
NamedArea area = getOtherAreas(state, emCodeString, tdwgCodeString);
|
329
|
if (area != null){
|
330
|
areas.add(area);
|
331
|
}
|
332
|
}
|
333
|
if (areas.size() == 0){
|
334
|
String areaId = rs.getString("AreaId");
|
335
|
logger.warn("No areas defined for occurrence " + occurrenceId + ". EMCode: " + CdmUtils.Nz(emCodeString).trim() + ". AreaId: " + areaId );
|
336
|
}
|
337
|
}
|
338
|
return areas;
|
339
|
}
|
340
|
|
341
|
@Override
|
342
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
|
343
|
|
344
|
try{
|
345
|
|
346
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
|
347
|
Set<String> taxonIdSet = new HashSet<String>();
|
348
|
while (rs.next()){
|
349
|
handleForeignKey(rs, taxonIdSet, "taxonId");
|
350
|
}
|
351
|
|
352
|
//taxon map
|
353
|
String nameSpace = BerlinModelTaxonImport.NAMESPACE;
|
354
|
Class<?> cdmClass = TaxonBase.class;
|
355
|
Set<String> idSet = taxonIdSet;
|
356
|
@SuppressWarnings("unchecked")
|
357
|
Map<String, ? extends CdmBase> objectMap = (Map<String, TaxonBase<?>>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
|
358
|
result.put(nameSpace, objectMap);
|
359
|
|
360
|
return result;
|
361
|
} catch (SQLException e) {
|
362
|
throw new RuntimeException(e);
|
363
|
}
|
364
|
}
|
365
|
|
366
|
|
367
|
/**
|
368
|
* Tests if a distribution with the same tdwgArea and the same status already exists in the description.
|
369
|
* If so the old distribution is returned
|
370
|
* @param description
|
371
|
* @param tdwgArea
|
372
|
* @return false, if dupplicate exists. True otherwise.
|
373
|
*/
|
374
|
private Distribution checkIsNoDuplicate(TaxonDescription description, Distribution distribution, Map<Integer, String> duplicateMap, Integer bmDistributionId){
|
375
|
for (DescriptionElementBase descElBase : description.getElements()){
|
376
|
if (descElBase.isInstanceOf(Distribution.class)){
|
377
|
Distribution oldDistr = HibernateProxyHelper.deproxy(descElBase, Distribution.class);
|
378
|
NamedArea oldArea = oldDistr.getArea();
|
379
|
if (oldArea != null && oldArea.equals(distribution.getArea())){
|
380
|
PresenceAbsenceTerm oldStatus = oldDistr.getStatus();
|
381
|
if (oldStatus != null && oldStatus.equals(distribution.getStatus())){
|
382
|
duplicateMap.put(bmDistributionId, oldDistr.getSources().iterator().next().getIdInSource());
|
383
|
return oldDistr;
|
384
|
}
|
385
|
}
|
386
|
}
|
387
|
}
|
388
|
return null;
|
389
|
}
|
390
|
|
391
|
/**
|
392
|
* Use same TaxonDescription if two records belong to the same taxon
|
393
|
* @param newTaxonId
|
394
|
* @param oldTaxonId
|
395
|
* @param oldDescription
|
396
|
* @param taxonMap
|
397
|
* @return
|
398
|
*/
|
399
|
private TaxonDescription getTaxonDescription(int newTaxonId, int oldTaxonId, TaxonDescription oldDescription, Map<String, TaxonBase<?>> taxonMap, int occurrenceId, Reference sourceSec){
|
400
|
TaxonDescription result = null;
|
401
|
if (oldDescription == null || newTaxonId != oldTaxonId){
|
402
|
TaxonBase<?> taxonBase = taxonMap.get(String.valueOf(newTaxonId));
|
403
|
//TODO for testing
|
404
|
//TaxonBase taxonBase = Taxon.NewInstance(TaxonNameFactory.NewBotanicalInstance(Rank.SPECIES()), null);
|
405
|
Taxon taxon;
|
406
|
if ( taxonBase instanceof Taxon ) {
|
407
|
taxon = (Taxon) taxonBase;
|
408
|
} else if (taxonBase != null) {
|
409
|
logger.warn("TaxonBase for Occurrence " + occurrenceId + " was not of type Taxon but: " + taxonBase.getClass().getSimpleName());
|
410
|
return null;
|
411
|
} else {
|
412
|
logger.warn("TaxonBase for Occurrence " + occurrenceId + " is null.");
|
413
|
return null;
|
414
|
}
|
415
|
Set<TaxonDescription> descriptionSet= taxon.getDescriptions();
|
416
|
if (descriptionSet.size() > 0) {
|
417
|
result = descriptionSet.iterator().next();
|
418
|
}else{
|
419
|
result = TaxonDescription.NewInstance();
|
420
|
result.setTitleCache(sourceSec.getTitleCache(), true);
|
421
|
taxon.addDescription(result);
|
422
|
}
|
423
|
}else{
|
424
|
result = oldDescription;
|
425
|
}
|
426
|
return result;
|
427
|
}
|
428
|
|
429
|
@Override
|
430
|
protected boolean doCheck(BerlinModelImportState state){
|
431
|
IOValidator<BerlinModelImportState> validator = new BerlinModelOccurrenceImportValidator();
|
432
|
return validator.validate(state);
|
433
|
}
|
434
|
|
435
|
|
436
|
@Override
|
437
|
protected boolean isIgnore(BerlinModelImportState state){
|
438
|
if (! state.getConfig().isDoOccurrence()){
|
439
|
return true;
|
440
|
}else{
|
441
|
if (!this.checkSqlServerColumnExists(state.getConfig().getSource(), "emOccurrence", "OccurrenceId")){
|
442
|
logger.error("emOccurrence table or emOccurrenceId does not exist. Must ignore occurrence import");
|
443
|
return true;
|
444
|
}else{
|
445
|
return false;
|
446
|
}
|
447
|
}
|
448
|
}
|
449
|
|
450
|
}
|