1
|
// $Id$
|
2
|
/**
|
3
|
* Copyright (C) 2013 EDIT
|
4
|
* European Distributed Institute of Taxonomy
|
5
|
* http://www.e-taxonomy.eu
|
6
|
*
|
7
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
8
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
9
|
*/
|
10
|
package eu.etaxonomy.cdm.api.service.description;
|
11
|
|
12
|
import java.util.ArrayList;
|
13
|
import java.util.Arrays;
|
14
|
import java.util.HashMap;
|
15
|
import java.util.HashSet;
|
16
|
import java.util.Iterator;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
import java.util.Set;
|
20
|
import java.util.UUID;
|
21
|
|
22
|
import org.apache.log4j.Level;
|
23
|
import org.apache.log4j.Logger;
|
24
|
import org.hibernate.FlushMode;
|
25
|
import org.hibernate.HibernateException;
|
26
|
import org.hibernate.Session;
|
27
|
import org.hibernate.engine.spi.SessionFactoryImplementor;
|
28
|
import org.hibernate.search.Search;
|
29
|
import org.springframework.beans.factory.annotation.Autowired;
|
30
|
import org.springframework.orm.hibernate5.HibernateTransactionManager;
|
31
|
import org.springframework.stereotype.Service;
|
32
|
import org.springframework.transaction.TransactionDefinition;
|
33
|
import org.springframework.transaction.TransactionStatus;
|
34
|
import org.springframework.transaction.support.DefaultTransactionDefinition;
|
35
|
|
36
|
import eu.etaxonomy.cdm.api.service.IClassificationService;
|
37
|
import eu.etaxonomy.cdm.api.service.IDescriptionService;
|
38
|
import eu.etaxonomy.cdm.api.service.INameService;
|
39
|
import eu.etaxonomy.cdm.api.service.ITaxonService;
|
40
|
import eu.etaxonomy.cdm.api.service.ITermService;
|
41
|
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
|
42
|
import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
|
43
|
import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
|
44
|
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
|
45
|
import eu.etaxonomy.cdm.model.common.Extension;
|
46
|
import eu.etaxonomy.cdm.model.common.ExtensionType;
|
47
|
import eu.etaxonomy.cdm.model.common.Marker;
|
48
|
import eu.etaxonomy.cdm.model.common.MarkerType;
|
49
|
import eu.etaxonomy.cdm.model.common.OrderedTermBase;
|
50
|
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
51
|
import eu.etaxonomy.cdm.model.description.Distribution;
|
52
|
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
|
53
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
54
|
import eu.etaxonomy.cdm.model.location.NamedArea;
|
55
|
import eu.etaxonomy.cdm.model.name.Rank;
|
56
|
import eu.etaxonomy.cdm.model.taxon.Classification;
|
57
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
58
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
59
|
import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
|
60
|
import eu.etaxonomy.cdm.persistence.query.OrderHint;
|
61
|
|
62
|
/**
|
63
|
*
|
64
|
* <h2>GENERAL NOTES </h2>
|
65
|
* <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
|
66
|
* version 14 written in Visual Basic and still need to be
|
67
|
* adapted to the java version of the transmission engine!</em>
|
68
|
*
|
69
|
* <h3>summaryStatus</h3>
|
70
|
*
|
71
|
* Each distribution information has a summaryStatus, this is an summary of the status codes
|
72
|
* as stored in the fields of emOccurrence native, introduced, cultivated, ...
|
73
|
* The summaryStatus seems to be equivalent to the CDM DistributionStatus
|
74
|
*
|
75
|
* <h3>map generation</h3>
|
76
|
*
|
77
|
* When generating maps from the accumulated distribution information some special cases have to be handled:
|
78
|
* <ol>
|
79
|
* <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
|
80
|
* data is available, the calculated data has to be given preference over other data.
|
81
|
* </li>
|
82
|
* <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
|
83
|
* status should be shown in the map, whereas the super area should be ignored.
|
84
|
* </li>
|
85
|
* </ol>
|
86
|
*
|
87
|
* @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
|
88
|
* @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
|
89
|
* @date Feb 22, 2013
|
90
|
*/
|
91
|
@Service
|
92
|
public class TransmissionEngineDistribution { //TODO extends IoBase?
|
93
|
|
94
|
public static final String EXTENSION_VALUE_PREFIX = "transmissionEngineDistribution.priority:";
|
95
|
|
96
|
public static final Logger logger = Logger.getLogger(TransmissionEngineDistribution.class);
|
97
|
|
98
|
/**
|
99
|
* only used for performance testing
|
100
|
*/
|
101
|
final boolean ONLY_FISRT_BATCH = false;
|
102
|
|
103
|
|
104
|
protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
|
105
|
"description.markers.markerType",
|
106
|
"description.elements.markers.markerType",
|
107
|
"description.elements.area",
|
108
|
"description.elements.status",
|
109
|
"description.elements.sources.citation.authorship",
|
110
|
// "description.elements.sources.nameUsedInSource",
|
111
|
// "description.elements.multilanguageText",
|
112
|
// "name.status.type",
|
113
|
});
|
114
|
|
115
|
|
116
|
/**
|
117
|
* A map which contains the status terms as key and the priority as value
|
118
|
* The map will contain both, the PresenceTerms and the AbsenceTerms
|
119
|
*/
|
120
|
private Map<PresenceAbsenceTerm, Integer> statusPriorityMap = null;
|
121
|
|
122
|
@Autowired
|
123
|
private IDescriptionService descriptionService;
|
124
|
|
125
|
@Autowired
|
126
|
private ITermService termService;
|
127
|
|
128
|
@Autowired
|
129
|
private ITaxonService taxonService;
|
130
|
|
131
|
@Autowired
|
132
|
private IClassificationService classificationService;
|
133
|
|
134
|
@Autowired
|
135
|
private INameService mameService;
|
136
|
|
137
|
@Autowired
|
138
|
private HibernateTransactionManager transactionManager;
|
139
|
|
140
|
private List<PresenceAbsenceTerm> byAreaIgnoreStatusList = null;
|
141
|
|
142
|
private List<PresenceAbsenceTerm> byRankIgnoreStatusList = null;
|
143
|
|
144
|
private final Map<NamedArea, Set<NamedArea>> subAreaMap = new HashMap<NamedArea, Set<NamedArea>>();
|
145
|
|
146
|
private final List<OrderHint> emptyOrderHints = new ArrayList<OrderHint>(0);
|
147
|
|
148
|
|
149
|
/**
|
150
|
* byAreaIgnoreStatusList contains by default:
|
151
|
* <ul>
|
152
|
* <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
|
153
|
* <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
|
154
|
* <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
|
155
|
* <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
|
156
|
* <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
|
157
|
* </ul>
|
158
|
*
|
159
|
* @return the byAreaIgnoreStatusList
|
160
|
*/
|
161
|
public List<PresenceAbsenceTerm> getByAreaIgnoreStatusList() {
|
162
|
if(byAreaIgnoreStatusList == null ){
|
163
|
byAreaIgnoreStatusList = Arrays.asList(
|
164
|
new PresenceAbsenceTerm[] {
|
165
|
PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(),
|
166
|
PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(),
|
167
|
PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(),
|
168
|
PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(),
|
169
|
PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE()
|
170
|
// TODO what about PresenceAbsenceTerm.ABSENT() also ignore?
|
171
|
});
|
172
|
}
|
173
|
return byAreaIgnoreStatusList;
|
174
|
}
|
175
|
|
176
|
/**
|
177
|
* @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
|
178
|
*/
|
179
|
public void setByAreaIgnoreStatusList(List<PresenceAbsenceTerm> byAreaIgnoreStatusList) {
|
180
|
this.byAreaIgnoreStatusList = byAreaIgnoreStatusList;
|
181
|
}
|
182
|
|
183
|
/**
|
184
|
* byRankIgnoreStatusList contains by default
|
185
|
* <ul>
|
186
|
* <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
|
187
|
* </ul>
|
188
|
*
|
189
|
* @return the byRankIgnoreStatusList
|
190
|
*/
|
191
|
public List<PresenceAbsenceTerm> getByRankIgnoreStatusList() {
|
192
|
|
193
|
if (byRankIgnoreStatusList == null) {
|
194
|
byRankIgnoreStatusList = Arrays.asList(
|
195
|
new PresenceAbsenceTerm[] {
|
196
|
PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()
|
197
|
});
|
198
|
}
|
199
|
return byRankIgnoreStatusList;
|
200
|
}
|
201
|
|
202
|
/**
|
203
|
* @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
|
204
|
*/
|
205
|
public void setByRankIgnoreStatusList(List<PresenceAbsenceTerm> byRankIgnoreStatusList) {
|
206
|
this.byRankIgnoreStatusList = byRankIgnoreStatusList;
|
207
|
}
|
208
|
|
209
|
/**
|
210
|
*
|
211
|
* @param superAreas
|
212
|
*/
|
213
|
public TransmissionEngineDistribution() {
|
214
|
}
|
215
|
|
216
|
/**
|
217
|
* initializes the map which contains the status terms as key and the priority as value
|
218
|
* The map will contain both, the PresenceTerms and the AbsenceTerms
|
219
|
*/
|
220
|
private void initializeStatusPriorityMap() {
|
221
|
|
222
|
statusPriorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
|
223
|
Integer priority;
|
224
|
|
225
|
// PresenceTerms
|
226
|
for(PresenceAbsenceTerm term : termService.list(PresenceAbsenceTerm.class, null, null, null, null)){
|
227
|
priority = getPriorityFor(term);
|
228
|
if(priority != null){
|
229
|
statusPriorityMap.put(term, priority);
|
230
|
}
|
231
|
}
|
232
|
}
|
233
|
|
234
|
/**
|
235
|
* Compares the PresenceAbsenceTermBase terms <code>a</code> and <code>b</code> after
|
236
|
* the priority as stored in the statusPriorityMap. The PresenceAbsenceTermBase with
|
237
|
* the higher priority is returned. a well be returned if a == b,
|
238
|
* If either a or b are null b or a is returned.
|
239
|
*
|
240
|
* @see initializeStatusPriorityMap()
|
241
|
*
|
242
|
* @param a
|
243
|
* @param b
|
244
|
* @return
|
245
|
*/
|
246
|
private PresenceAbsenceTerm choosePreferred(PresenceAbsenceTerm a, PresenceAbsenceTerm b){
|
247
|
|
248
|
if (statusPriorityMap == null) {
|
249
|
initializeStatusPriorityMap();
|
250
|
}
|
251
|
|
252
|
if (b == null) {
|
253
|
return a;
|
254
|
}
|
255
|
if (a == null) {
|
256
|
return b;
|
257
|
}
|
258
|
|
259
|
if (statusPriorityMap.get(a) == null) {
|
260
|
logger.warn("No priority found in map for " + a.getLabel());
|
261
|
return b;
|
262
|
}
|
263
|
if (statusPriorityMap.get(b) == null) {
|
264
|
logger.warn("No priority found in map for " + b.getLabel());
|
265
|
return a;
|
266
|
}
|
267
|
if(statusPriorityMap.get(a) < statusPriorityMap.get(b)){
|
268
|
return b;
|
269
|
} else {
|
270
|
return a;
|
271
|
}
|
272
|
}
|
273
|
|
274
|
/**
|
275
|
* reads the priority for the given status term from the extensions.
|
276
|
*
|
277
|
* @param term
|
278
|
* @return the priority value
|
279
|
*/
|
280
|
private Integer getPriorityFor(DefinedTermBase<?> term) {
|
281
|
Set<Extension> extensions = term.getExtensions();
|
282
|
for(Extension extension : extensions){
|
283
|
if(!extension.getType().equals(ExtensionType.ORDER())) {
|
284
|
continue;
|
285
|
}
|
286
|
int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
|
287
|
if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
|
288
|
try {
|
289
|
Integer priority = Integer.valueOf(extension.getValue().substring(EXTENSION_VALUE_PREFIX.length()));
|
290
|
return priority;
|
291
|
} catch (NumberFormatException e) {
|
292
|
logger.warn("Invalid number format in Extension:" + extension.getValue());
|
293
|
}
|
294
|
}
|
295
|
}
|
296
|
logger.warn("no priority defined for '" + term.getLabel() + "'");
|
297
|
return null;
|
298
|
}
|
299
|
|
300
|
/**
|
301
|
* runs both steps
|
302
|
* <ul>
|
303
|
* <li>Step 1: Accumulate occurrence records by area</li>
|
304
|
* <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
|
305
|
* the status of all children are accumulated on each rank starting from
|
306
|
* lower rank to upper rank.</li>
|
307
|
* </ul>
|
308
|
*
|
309
|
* @param superAreas
|
310
|
* the areas to which the subordinate areas should be projected.
|
311
|
* @param lowerRank
|
312
|
* @param upperRank
|
313
|
* @param classification
|
314
|
* @param classification
|
315
|
* limit the accumulation process to a specific classification
|
316
|
* (not yet implemented)
|
317
|
* @param monitor
|
318
|
* the progress monitor to use for reporting progress to the
|
319
|
* user. It is the caller's responsibility to call done() on the
|
320
|
* given monitor. Accepts null, indicating that no progress
|
321
|
* should be reported and that the operation cannot be cancelled.
|
322
|
*/
|
323
|
public void accumulate(AggregationMode mode, List<NamedArea> superAreas, Rank lowerRank, Rank upperRank,
|
324
|
Classification classification, IProgressMonitor monitor) {
|
325
|
|
326
|
if (monitor == null) {
|
327
|
monitor = new NullProgressMonitor();
|
328
|
}
|
329
|
|
330
|
|
331
|
// only for debugging:
|
332
|
logger.setLevel(Level.DEBUG); // TRACE will slow down a lot since it forces loading all term representations
|
333
|
//Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
|
334
|
|
335
|
logger.info("Hibernate JDBC Batch size: "
|
336
|
+ ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
|
337
|
|
338
|
Set<Classification> classifications = new HashSet<Classification>();
|
339
|
if(classification == null) {
|
340
|
classifications.addAll(classificationService.listClassifications(null, null, null, null));
|
341
|
} else {
|
342
|
classifications.add(classification);
|
343
|
}
|
344
|
|
345
|
int aggregationWorkTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
|
346
|
|
347
|
// take start time for performance testing
|
348
|
// NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
|
349
|
double start = System.currentTimeMillis();
|
350
|
|
351
|
monitor.beginTask("Accumulating distributions", (classifications.size() * aggregationWorkTicks) + 1 );
|
352
|
updatePriorities();
|
353
|
monitor.worked(1);
|
354
|
|
355
|
List<Rank> ranks = rankInterval(lowerRank, upperRank);
|
356
|
|
357
|
for(Classification _classification : classifications) {
|
358
|
|
359
|
ClassificationLookupDTO classificationLookupDao = classificationService.classificationLookup(_classification);
|
360
|
classificationLookupDao.filterInclude(ranks);
|
361
|
|
362
|
double end1 = System.currentTimeMillis();
|
363
|
logger.info("Time elapsed for classificationLookup() : " + (end1 - start) / (1000) + "s");
|
364
|
double start2 = System.currentTimeMillis();
|
365
|
|
366
|
monitor.subTask("Accumulating distributions to super areas for " + _classification.getTitleCache());
|
367
|
if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
|
368
|
accumulateByArea(superAreas, classificationLookupDao, new SubProgressMonitor(monitor, 200), true);
|
369
|
}
|
370
|
monitor.subTask("Accumulating distributions to higher ranks for " + _classification.getTitleCache());
|
371
|
|
372
|
double end2 = System.currentTimeMillis();
|
373
|
logger.info("Time elapsed for accumulateByArea() : " + (end2 - start2) / (1000) + "s");
|
374
|
|
375
|
double start3 = System.currentTimeMillis();
|
376
|
if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
|
377
|
accumulateByRank(ranks, classificationLookupDao, new SubProgressMonitor(monitor, 200), mode.equals(AggregationMode.byRanks));
|
378
|
}
|
379
|
|
380
|
double end3 = System.currentTimeMillis();
|
381
|
logger.info("Time elapsed for accumulateByRank() : " + (end3 - start3) / (1000) + "s");
|
382
|
logger.info("Time elapsed for accumulate(): " + (end3 - start) / (1000) + "s");
|
383
|
|
384
|
if(ONLY_FISRT_BATCH) {
|
385
|
monitor.done();
|
386
|
break;
|
387
|
}
|
388
|
}
|
389
|
}
|
390
|
|
391
|
|
392
|
/**
|
393
|
* Step 1: Accumulate occurrence records by area
|
394
|
* <ul>
|
395
|
* <li>areas are projected to super areas e.g.: HS <-- HS(A), HS(G), HS(S)</li>
|
396
|
* <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
|
397
|
* <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
|
398
|
* <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
|
399
|
* <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
|
400
|
* <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
|
401
|
* </ul>
|
402
|
*
|
403
|
* @param superAreas
|
404
|
* the areas to which the subordinate areas should be projected
|
405
|
* @param classificationLookupDao
|
406
|
*
|
407
|
*/
|
408
|
protected void accumulateByArea(List<NamedArea> superAreas, ClassificationLookupDTO classificationLookupDao, IProgressMonitor subMonitor, boolean doClearDescriptions) {
|
409
|
|
410
|
int batchSize = 1000;
|
411
|
|
412
|
TransactionStatus txStatus = startTransaction(false);
|
413
|
|
414
|
// reload superAreas TODO is it faster to getSession().merge(object) ??
|
415
|
Set<UUID> superAreaUuids = new HashSet<UUID>(superAreas.size());
|
416
|
for (NamedArea superArea : superAreas){
|
417
|
superAreaUuids.add(superArea.getUuid());
|
418
|
}
|
419
|
|
420
|
// visit all accepted taxa
|
421
|
subMonitor.beginTask("Accumulating by area ", classificationLookupDao.getTaxonIds().size());
|
422
|
Iterator<Integer> taxonIdIterator = classificationLookupDao.getTaxonIds().iterator();
|
423
|
|
424
|
while (taxonIdIterator.hasNext()) {
|
425
|
|
426
|
if(txStatus == null) {
|
427
|
// transaction has been comitted at the end of this batch, start a new one
|
428
|
txStatus = startTransaction(false);
|
429
|
}
|
430
|
|
431
|
// the session is cleared after each batch, so load the superAreaList for each batch
|
432
|
List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
|
433
|
|
434
|
// load taxa for this batch
|
435
|
List<TaxonBase> taxa = null;
|
436
|
Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
|
437
|
while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
|
438
|
taxonIds.add(taxonIdIterator.next());
|
439
|
}
|
440
|
|
441
|
// logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
|
442
|
|
443
|
taxa = taxonService.listByIds(taxonIds, null, null, emptyOrderHints, TAXONDESCRIPTION_INIT_STRATEGY);
|
444
|
|
445
|
// iterate over the taxa and accumulate areas
|
446
|
for(TaxonBase taxonBase : taxa) {
|
447
|
if(logger.isDebugEnabled()){
|
448
|
logger.debug("accumulateByArea() - taxon :" + taxonToString(taxonBase));
|
449
|
}
|
450
|
|
451
|
Taxon taxon = (Taxon)taxonBase;
|
452
|
TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
|
453
|
List<Distribution> distributions = distributionsFor(taxon);
|
454
|
|
455
|
// Step through superAreas for accumulation of subAreas
|
456
|
for (NamedArea superArea : superAreaList){
|
457
|
|
458
|
// accumulate all sub area status
|
459
|
PresenceAbsenceTerm accumulatedStatus = null;
|
460
|
// TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
|
461
|
Set<NamedArea> subAreas = getSubAreasFor(superArea);
|
462
|
for(NamedArea subArea : subAreas){
|
463
|
if(logger.isTraceEnabled()){
|
464
|
logger.trace("accumulateByArea() - \t\t" + termToString(subArea));
|
465
|
}
|
466
|
// step through all distributions for the given subArea
|
467
|
for(Distribution distribution : distributions){
|
468
|
if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
|
469
|
PresenceAbsenceTerm status = distribution.getStatus();
|
470
|
if(logger.isTraceEnabled()){
|
471
|
logger.trace("accumulateByArea() - \t\t" + termToString(subArea) + ": " + termToString(status));
|
472
|
}
|
473
|
// skip all having a status value different of those in byAreaIgnoreStatusList
|
474
|
if (getByAreaIgnoreStatusList().contains(status)){
|
475
|
continue;
|
476
|
}
|
477
|
accumulatedStatus = choosePreferred(accumulatedStatus, status);
|
478
|
}
|
479
|
}
|
480
|
} // next sub area
|
481
|
if (accumulatedStatus != null) {
|
482
|
if(logger.isDebugEnabled()){
|
483
|
logger.debug("accumulateByArea() - \t >> " + termToString(superArea) + ": " + termToString(accumulatedStatus));
|
484
|
}
|
485
|
// store new distribution element for superArea in taxon description
|
486
|
Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatus);
|
487
|
newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
|
488
|
description.addElement(newDistribitionElement);
|
489
|
}
|
490
|
|
491
|
} // next super area ....
|
492
|
|
493
|
descriptionService.saveOrUpdate(description);
|
494
|
taxonService.saveOrUpdate(taxon);
|
495
|
subMonitor.worked(1);
|
496
|
|
497
|
} // next taxon
|
498
|
|
499
|
flushAndClear();
|
500
|
|
501
|
// commit for every batch, otherwise the persistent context
|
502
|
// may grow too much and eats up all the heap
|
503
|
commitTransaction(txStatus);
|
504
|
txStatus = null;
|
505
|
|
506
|
if(ONLY_FISRT_BATCH) {
|
507
|
break;
|
508
|
}
|
509
|
|
510
|
} // next batch of taxa
|
511
|
|
512
|
subMonitor.done();
|
513
|
}
|
514
|
|
515
|
/**
|
516
|
* Step 2: Accumulate by ranks staring from lower rank to upper rank, the status of all children
|
517
|
* are accumulated on each rank starting from lower rank to upper rank.
|
518
|
* <ul>
|
519
|
* <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
|
520
|
* up to upper rank (e.g. Genus)</li>
|
521
|
* <li>the accumulation id done for each distribution area found in the included taxa</li>
|
522
|
* <li>areas of subtaxa with status endemic are ignored</li>
|
523
|
* <li>the status with the highest priority determines the value for the accumulated distribution</li>
|
524
|
* <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
|
525
|
* this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
|
526
|
*</ul>
|
527
|
*/
|
528
|
protected void accumulateByRank(List<Rank> rankInterval, ClassificationLookupDTO classificationLookupDao, IProgressMonitor subMonitor, boolean doClearDescriptions) {
|
529
|
|
530
|
int batchSize = 500;
|
531
|
|
532
|
TransactionStatus txStatus = startTransaction(false);
|
533
|
|
534
|
// the loadRankSpecificRootNodes() method not only finds
|
535
|
// taxa of the specified rank but also taxa of lower ranks
|
536
|
// if no taxon of the specified rank exists, so we need to
|
537
|
// remember which taxa have been processed already
|
538
|
Set<Integer> taxaProcessedIds = new HashSet<Integer>();
|
539
|
List<TaxonBase> taxa = null;
|
540
|
List<TaxonBase> childTaxa = null;
|
541
|
|
542
|
List<Rank> ranks = rankInterval;
|
543
|
|
544
|
int ticksPerRank = 100;
|
545
|
subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
|
546
|
|
547
|
for (Rank rank : ranks) {
|
548
|
|
549
|
if(logger.isDebugEnabled()){
|
550
|
logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
|
551
|
}
|
552
|
|
553
|
SubProgressMonitor taxonSubMonitor = null;
|
554
|
Set<Integer> taxonIdsPerRank = classificationLookupDao.getTaxonIdByRank().get(rank);
|
555
|
if(taxonIdsPerRank == null || taxonIdsPerRank.isEmpty()) {
|
556
|
continue;
|
557
|
}
|
558
|
Iterator<Integer> taxonIdIterator = taxonIdsPerRank.iterator();
|
559
|
while (taxonIdIterator.hasNext()) {
|
560
|
|
561
|
if(txStatus == null) {
|
562
|
// transaction has been comitted at the end of this batch, start a new one
|
563
|
txStatus = startTransaction(false);
|
564
|
}
|
565
|
|
566
|
// load taxa for this batch
|
567
|
Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
|
568
|
while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
|
569
|
taxonIds.add(taxonIdIterator.next());
|
570
|
}
|
571
|
|
572
|
taxa = taxonService.listByIds(taxonIds, null, null, emptyOrderHints, null);
|
573
|
|
574
|
if(taxonSubMonitor == null) {
|
575
|
taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
|
576
|
taxonSubMonitor.beginTask("Accumulating by rank " + termToString(rank), taxa.size());
|
577
|
}
|
578
|
|
579
|
// if(logger.isDebugEnabled()){
|
580
|
// logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
|
581
|
// }
|
582
|
|
583
|
for(TaxonBase taxonBase : taxa) {
|
584
|
|
585
|
Taxon taxon = (Taxon)taxonBase;
|
586
|
if (taxaProcessedIds.contains(taxon.getId())) {
|
587
|
if(logger.isDebugEnabled()){
|
588
|
logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
|
589
|
}
|
590
|
continue;
|
591
|
}
|
592
|
taxaProcessedIds.add(taxon.getId());
|
593
|
if(logger.isDebugEnabled()){
|
594
|
logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
|
595
|
}
|
596
|
|
597
|
// Step through direct taxonomic children for accumulation
|
598
|
Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>();
|
599
|
|
600
|
Set<Integer> childTaxonIds = classificationLookupDao.getChildTaxonMap().get(taxon.getId());
|
601
|
if(childTaxonIds != null && !childTaxonIds.isEmpty()) {
|
602
|
childTaxa = taxonService.listByIds(childTaxonIds, null, null, emptyOrderHints, TAXONDESCRIPTION_INIT_STRATEGY);
|
603
|
|
604
|
for (TaxonBase childTaxonBase : childTaxa){
|
605
|
|
606
|
Taxon childTaxon = (Taxon) childTaxonBase;
|
607
|
getSession().setReadOnly(childTaxon, true);
|
608
|
if(logger.isTraceEnabled()){
|
609
|
logger.trace(" subtaxon :" + taxonToString(childTaxon));
|
610
|
}
|
611
|
|
612
|
for(Distribution distribution : distributionsFor(childTaxon) ) {
|
613
|
PresenceAbsenceTerm status = distribution.getStatus();
|
614
|
NamedArea area = distribution.getArea();
|
615
|
if (status == null || getByRankIgnoreStatusList().contains(status)){
|
616
|
continue;
|
617
|
}
|
618
|
accumulatedStatusMap.put(area, choosePreferred(accumulatedStatusMap.get(area), status));
|
619
|
}
|
620
|
}
|
621
|
|
622
|
if(accumulatedStatusMap.size() > 0) {
|
623
|
TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
|
624
|
for (NamedArea area : accumulatedStatusMap.keySet()) {
|
625
|
// store new distribution element in new Description
|
626
|
Distribution newDistribitionElement = Distribution.NewInstance(area, accumulatedStatusMap.get(area));
|
627
|
newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
|
628
|
description.addElement(newDistribitionElement);
|
629
|
}
|
630
|
taxonService.saveOrUpdate(taxon);
|
631
|
descriptionService.saveOrUpdate(description);
|
632
|
}
|
633
|
|
634
|
}
|
635
|
taxonSubMonitor.worked(1); // one taxon worked
|
636
|
|
637
|
} // next taxon ....
|
638
|
|
639
|
flushAndClear();
|
640
|
|
641
|
// commit for every batch, otherwise the persistent context
|
642
|
// may grow too much and eats up all the heap
|
643
|
commitTransaction(txStatus);
|
644
|
txStatus = null;
|
645
|
|
646
|
if(ONLY_FISRT_BATCH) {
|
647
|
break;
|
648
|
}
|
649
|
} // next batch
|
650
|
|
651
|
if(taxonSubMonitor != null) { // TODO taxonSubpager, this check should not be needed
|
652
|
taxonSubMonitor.done();
|
653
|
}
|
654
|
subMonitor.worked(1);
|
655
|
|
656
|
if(ONLY_FISRT_BATCH) {
|
657
|
break;
|
658
|
}
|
659
|
} // next Rank
|
660
|
|
661
|
subMonitor.done();
|
662
|
}
|
663
|
|
664
|
/**
|
665
|
* @param lowerRank
|
666
|
* @param upperRank
|
667
|
* @return
|
668
|
*/
|
669
|
private List<Rank> rankInterval(Rank lowerRank, Rank upperRank) {
|
670
|
|
671
|
TransactionStatus txStatus = startTransaction(false);
|
672
|
Rank currentRank = lowerRank;
|
673
|
List<Rank> ranks = new ArrayList<Rank>();
|
674
|
ranks.add(currentRank);
|
675
|
while (!currentRank.isHigher(upperRank)) {
|
676
|
currentRank = findNextHigherRank(currentRank);
|
677
|
ranks.add(currentRank);
|
678
|
}
|
679
|
commitTransaction(txStatus);
|
680
|
txStatus = null;
|
681
|
return ranks;
|
682
|
}
|
683
|
|
684
|
/**
|
685
|
* @return
|
686
|
*/
|
687
|
private Session getSession() {
|
688
|
return descriptionService.getSession();
|
689
|
}
|
690
|
|
691
|
/**
|
692
|
*
|
693
|
*/
|
694
|
private void flushAndClear() {
|
695
|
logger.debug("flushing and clearing session ...");
|
696
|
getSession().flush();
|
697
|
try {
|
698
|
Search.getFullTextSession(getSession()).flushToIndexes();
|
699
|
} catch (HibernateException e) {
|
700
|
/* IGNORE - Hibernate Search Event listeners not configured ... */
|
701
|
if(!e.getMessage().startsWith("Hibernate Search Event listeners not configured")){
|
702
|
throw e;
|
703
|
}
|
704
|
}
|
705
|
getSession().clear();
|
706
|
}
|
707
|
|
708
|
|
709
|
// TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
|
710
|
public TransactionStatus startTransaction(Boolean readOnly) {
|
711
|
|
712
|
DefaultTransactionDefinition defaultTxDef = new DefaultTransactionDefinition();
|
713
|
defaultTxDef.setReadOnly(readOnly);
|
714
|
TransactionDefinition txDef = defaultTxDef;
|
715
|
|
716
|
// Log some transaction-related debug information.
|
717
|
if (logger.isTraceEnabled()) {
|
718
|
logger.trace("Transaction name = " + txDef.getName());
|
719
|
logger.trace("Transaction facets:");
|
720
|
logger.trace("Propagation behavior = " + txDef.getPropagationBehavior());
|
721
|
logger.trace("Isolation level = " + txDef.getIsolationLevel());
|
722
|
logger.trace("Timeout = " + txDef.getTimeout());
|
723
|
logger.trace("Read Only = " + txDef.isReadOnly());
|
724
|
// org.springframework.orm.hibernate5.HibernateTransactionManager
|
725
|
// provides more transaction/session-related debug information.
|
726
|
}
|
727
|
|
728
|
TransactionStatus txStatus = transactionManager.getTransaction(txDef);
|
729
|
|
730
|
getSession().setFlushMode(FlushMode.COMMIT);
|
731
|
|
732
|
return txStatus;
|
733
|
}
|
734
|
|
735
|
// TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
|
736
|
public void commitTransaction(TransactionStatus txStatus){
|
737
|
logger.debug("commiting transaction ...");
|
738
|
transactionManager.commit(txStatus);
|
739
|
return;
|
740
|
}
|
741
|
|
742
|
/**
|
743
|
* returns the next higher rank
|
744
|
*
|
745
|
* TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
|
746
|
*
|
747
|
* @param rank
|
748
|
* @return
|
749
|
*/
|
750
|
private Rank findNextHigherRank(Rank rank) {
|
751
|
rank = (Rank) termService.load(rank.getUuid());
|
752
|
return rank.getNextHigherTerm();
|
753
|
// OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
|
754
|
// return rankVocabulary.getNextHigherTerm(rank);
|
755
|
}
|
756
|
|
757
|
/**
|
758
|
* Either finds an existing taxon description of the given taxon or creates a new one.
|
759
|
* If the doClear is set all existing description elements will be cleared.
|
760
|
*
|
761
|
* @param taxon
|
762
|
* @param doClear will remove all existing Distributions if the taxon already
|
763
|
* has a MarkerType.COMPUTED() TaxonDescription
|
764
|
* @return
|
765
|
*/
|
766
|
private TaxonDescription findComputedDescription(Taxon taxon, boolean doClear) {
|
767
|
|
768
|
String descriptionTitle = this.getClass().getSimpleName();
|
769
|
|
770
|
// find existing one
|
771
|
for (TaxonDescription description : taxon.getDescriptions()) {
|
772
|
if (description.hasMarker(MarkerType.COMPUTED(), true)) {
|
773
|
logger.debug("reusing description for " + taxon.getTitleCache());
|
774
|
if (doClear) {
|
775
|
int deleteCount = 0;
|
776
|
Set<DescriptionElementBase> deleteCandidates = new HashSet<DescriptionElementBase>();
|
777
|
for (DescriptionElementBase descriptionElement : description.getElements()) {
|
778
|
if(descriptionElement instanceof Distribution) {
|
779
|
deleteCandidates.add(descriptionElement);
|
780
|
}
|
781
|
}
|
782
|
if(deleteCandidates.size() > 0){
|
783
|
for(DescriptionElementBase descriptionElement : deleteCandidates) {
|
784
|
description.removeElement(descriptionElement);
|
785
|
descriptionService.deleteDescriptionElement(descriptionElement);
|
786
|
descriptionElement = null;
|
787
|
deleteCount++;
|
788
|
}
|
789
|
descriptionService.saveOrUpdate(description);
|
790
|
logger.debug("\t" + deleteCount +" distributions cleared");
|
791
|
}
|
792
|
|
793
|
}
|
794
|
return description;
|
795
|
}
|
796
|
}
|
797
|
|
798
|
// create a new one
|
799
|
logger.debug("creating new description for " + taxon.getTitleCache());
|
800
|
TaxonDescription description = TaxonDescription.NewInstance(taxon);
|
801
|
description.setTitleCache(descriptionTitle, true);
|
802
|
description.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
|
803
|
return description;
|
804
|
}
|
805
|
|
806
|
/**
|
807
|
* @param superArea
|
808
|
* @return
|
809
|
*/
|
810
|
private Set<NamedArea> getSubAreasFor(NamedArea superArea) {
|
811
|
|
812
|
if(!subAreaMap.containsKey(superArea)) {
|
813
|
if(logger.isDebugEnabled()){
|
814
|
logger.debug("loading included areas for " + superArea.getLabel());
|
815
|
}
|
816
|
subAreaMap.put(superArea, superArea.getIncludes());
|
817
|
}
|
818
|
return subAreaMap.get(superArea);
|
819
|
}
|
820
|
|
821
|
/**
|
822
|
* @param taxon
|
823
|
* @return
|
824
|
*/
|
825
|
private List<Distribution> distributionsFor(Taxon taxon) {
|
826
|
List<Distribution> distributions = new ArrayList<Distribution>();
|
827
|
for(TaxonDescription description: taxon.getDescriptions()) {
|
828
|
for(DescriptionElementBase deb : description.getElements()) {
|
829
|
if(deb instanceof Distribution) {
|
830
|
distributions.add((Distribution)deb);
|
831
|
}
|
832
|
}
|
833
|
}
|
834
|
return distributions;
|
835
|
}
|
836
|
|
837
|
/**
|
838
|
* @param taxon
|
839
|
* @param logger2
|
840
|
* @return
|
841
|
*/
|
842
|
private String taxonToString(TaxonBase taxon) {
|
843
|
if(logger.isTraceEnabled()) {
|
844
|
return taxon.getTitleCache();
|
845
|
} else {
|
846
|
return taxon.toString();
|
847
|
}
|
848
|
}
|
849
|
|
850
|
/**
|
851
|
* @param taxon
|
852
|
* @param logger2
|
853
|
* @return
|
854
|
*/
|
855
|
private String termToString(OrderedTermBase<?> term) {
|
856
|
if(logger.isTraceEnabled()) {
|
857
|
return term.getLabel() + " [" + term.getIdInVocabulary() + "]";
|
858
|
} else {
|
859
|
return term.getIdInVocabulary();
|
860
|
}
|
861
|
}
|
862
|
|
863
|
/**
|
864
|
* Sets the priorities for presence and absence terms, the priorities are stored in extensions.
|
865
|
* This method will start a new transaction and commits it after the work is done.
|
866
|
*/
|
867
|
public void updatePriorities() {
|
868
|
|
869
|
TransactionStatus txStatus = startTransaction(false);
|
870
|
|
871
|
Map<PresenceAbsenceTerm, Integer> priorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
|
872
|
|
873
|
priorityMap.put(PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(), 1);
|
874
|
priorityMap.put(PresenceAbsenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
|
875
|
priorityMap.put(PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(), 3);
|
876
|
priorityMap.put(PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(), 20);
|
877
|
priorityMap.put(PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(), 30);
|
878
|
priorityMap.put(PresenceAbsenceTerm.CULTIVATED(), 45);
|
879
|
priorityMap.put(PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE(), 40);
|
880
|
priorityMap.put(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE(), 60);
|
881
|
priorityMap.put(PresenceAbsenceTerm.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
|
882
|
priorityMap.put(PresenceAbsenceTerm.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
|
883
|
priorityMap.put(PresenceAbsenceTerm.INTRODUCED(), 90);
|
884
|
priorityMap.put(PresenceAbsenceTerm.INTRODUCED_ADVENTITIOUS(), 100);
|
885
|
priorityMap.put(PresenceAbsenceTerm.INTRODUCED_NATURALIZED(), 110);
|
886
|
priorityMap.put(PresenceAbsenceTerm.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
|
887
|
priorityMap.put(PresenceAbsenceTerm.NATIVE(), 130); // null
|
888
|
priorityMap.put(PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
|
889
|
|
890
|
for(PresenceAbsenceTerm term : priorityMap.keySet()) {
|
891
|
// load the term
|
892
|
term = (PresenceAbsenceTerm) termService.load(term.getUuid());
|
893
|
// find the extension
|
894
|
Extension priorityExtension = null;
|
895
|
Set<Extension> extensions = term.getExtensions();
|
896
|
for(Extension extension : extensions){
|
897
|
if (!extension.getType().equals(ExtensionType.ORDER())) {
|
898
|
continue;
|
899
|
}
|
900
|
int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
|
901
|
if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
|
902
|
priorityExtension = extension;
|
903
|
break;
|
904
|
}
|
905
|
}
|
906
|
if(priorityExtension == null) {
|
907
|
priorityExtension = Extension.NewInstance(term, null, ExtensionType.ORDER());
|
908
|
}
|
909
|
priorityExtension.setValue(EXTENSION_VALUE_PREFIX + priorityMap.get(term));
|
910
|
|
911
|
// save the term
|
912
|
termService.saveOrUpdate(term);
|
913
|
if (logger.isDebugEnabled()) {
|
914
|
logger.debug("Priority updated for " + term.getLabel());
|
915
|
}
|
916
|
}
|
917
|
|
918
|
commitTransaction(txStatus);
|
919
|
}
|
920
|
|
921
|
public enum AggregationMode {
|
922
|
byAreas,
|
923
|
byRanks,
|
924
|
byAreasAndRanks
|
925
|
|
926
|
}
|
927
|
}
|