Project

General

Profile

Download (41 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2013 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.api.service.description;
11

    
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Iterator;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20
import java.util.UUID;
21

    
22
import org.apache.log4j.Logger;
23
import org.hibernate.FlushMode;
24
import org.hibernate.HibernateException;
25
import org.hibernate.Session;
26
import org.hibernate.engine.spi.SessionFactoryImplementor;
27
import org.hibernate.search.Search;
28
import org.springframework.beans.factory.annotation.Autowired;
29
import org.springframework.orm.hibernate5.HibernateTransactionManager;
30
import org.springframework.stereotype.Service;
31
import org.springframework.transaction.TransactionDefinition;
32
import org.springframework.transaction.TransactionStatus;
33
import org.springframework.transaction.support.DefaultTransactionDefinition;
34

    
35
import eu.etaxonomy.cdm.api.service.IClassificationService;
36
import eu.etaxonomy.cdm.api.service.IDescriptionService;
37
import eu.etaxonomy.cdm.api.service.INameService;
38
import eu.etaxonomy.cdm.api.service.ITaxonService;
39
import eu.etaxonomy.cdm.api.service.ITermService;
40
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
41
import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
42
import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
43
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
44
import eu.etaxonomy.cdm.model.common.Extension;
45
import eu.etaxonomy.cdm.model.common.ExtensionType;
46
import eu.etaxonomy.cdm.model.common.Marker;
47
import eu.etaxonomy.cdm.model.common.MarkerType;
48
import eu.etaxonomy.cdm.model.common.OrderedTermBase;
49
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
50
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
51
import eu.etaxonomy.cdm.model.description.Distribution;
52
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
53
import eu.etaxonomy.cdm.model.description.TaxonDescription;
54
import eu.etaxonomy.cdm.model.location.NamedArea;
55
import eu.etaxonomy.cdm.model.name.Rank;
56
import eu.etaxonomy.cdm.model.taxon.Classification;
57
import eu.etaxonomy.cdm.model.taxon.Taxon;
58
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
59
import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
60
import eu.etaxonomy.cdm.persistence.query.OrderHint;
61

    
62
/**
63
 *
64
 * <h2>GENERAL NOTES </h2>
65
 * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
66
 * version 14 written in Visual Basic and still need to be
67
 * adapted to the java version of the transmission engine!</em>
68
 *
69
 * <h3>summaryStatus</h3>
70
 *
71
 *   Each distribution information has a summaryStatus, this is an summary of the status codes
72
 *   as stored in the fields of emOccurrence native, introduced, cultivated, ...
73
 *   The summaryStatus seems to be equivalent to  the CDM DistributionStatus
74
 *
75
 * <h3>map generation</h3>
76
 *
77
 *   When generating maps from the accumulated distribution information some special cases have to be handled:
78
 * <ol>
79
 *   <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
80
 *       data is available, the calculated data has to be given preference over other data.
81
 *   </li>
82
 *   <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
83
 *       status should be shown in the map, whereas the super area should be ignored.
84
 *   </li>
85
 * </ol>
86
 *
87
 * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
88
 * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
89
 * @date Feb 22, 2013
90
 */
91
@Service
92
public class TransmissionEngineDistribution { //TODO extends IoBase?
93

    
94
    public static final String EXTENSION_VALUE_PREFIX = "transmissionEngineDistribution.priority:";
95

    
96
    public static final Logger logger = Logger.getLogger(TransmissionEngineDistribution.class);
97

    
98
    /**
99
     * only used for performance testing
100
     */
101
    final boolean ONLY_FISRT_BATCH = false;
102

    
103

    
104
    protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
105
            "description.markers.markerType",
106
            "description.elements.markers.markerType",
107
            "description.elements.area",
108
            "description.elements.status",
109
            "description.elements.sources.citation.authorship",
110
//            "description.elements.sources.nameUsedInSource",
111
//            "description.elements.multilanguageText",
112
//            "name.status.type",
113
    });
114

    
115

    
116
    /**
117
     * A map which contains the status terms as key and the priority as value
118
     * The map will contain both, the PresenceTerms and the AbsenceTerms
119
     */
120
    private Map<PresenceAbsenceTerm, Integer> statusPriorityMap = null;
121

    
122
    @Autowired
123
    private IDescriptionService descriptionService;
124

    
125
    @Autowired
126
    private ITermService termService;
127

    
128
    @Autowired
129
    private ITaxonService taxonService;
130

    
131
    @Autowired
132
    private IClassificationService classificationService;
133

    
134
    @Autowired
135
    private INameService mameService;
136

    
137
    @Autowired
138
    private HibernateTransactionManager transactionManager;
139

    
140
    private List<PresenceAbsenceTerm> byAreaIgnoreStatusList = null;
141

    
142
    private List<PresenceAbsenceTerm> byRankIgnoreStatusList = null;
143

    
144
    private final Map<NamedArea, Set<NamedArea>> subAreaMap = new HashMap<NamedArea, Set<NamedArea>>();
145

    
146
    private final List<OrderHint> emptyOrderHints = new ArrayList<OrderHint>(0);
147

    
148

    
149
    /**
150
     * byAreaIgnoreStatusList contains by default:
151
     *  <ul>
152
     *    <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
153
     *    <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
154
     *    <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
155
     *    <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
156
     *    <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
157
     *  </ul>
158
     *
159
     * @return the byAreaIgnoreStatusList
160
     */
161
    public List<PresenceAbsenceTerm> getByAreaIgnoreStatusList() {
162
        if(byAreaIgnoreStatusList == null ){
163
            byAreaIgnoreStatusList = Arrays.asList(
164
                    new PresenceAbsenceTerm[] {
165
                    		PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(),
166
                    		PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(),
167
                    		PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(),
168
                    		PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(),
169
                    		PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE()
170
                            // TODO what about PresenceAbsenceTerm.ABSENT() also ignore?
171
                    });
172
        }
173
        return byAreaIgnoreStatusList;
174
    }
175

    
176
    /**
177
     * @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
178
     */
179
    public void setByAreaIgnoreStatusList(List<PresenceAbsenceTerm> byAreaIgnoreStatusList) {
180
        this.byAreaIgnoreStatusList = byAreaIgnoreStatusList;
181
    }
182

    
183
    /**
184
     * byRankIgnoreStatusList contains by default
185
     *  <ul>
186
     *    <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
187
     *  </ul>
188
     *
189
     * @return the byRankIgnoreStatusList
190
     */
191
    public List<PresenceAbsenceTerm> getByRankIgnoreStatusList() {
192

    
193
        if (byRankIgnoreStatusList == null) {
194
            byRankIgnoreStatusList = Arrays.asList(
195
                    new PresenceAbsenceTerm[] {
196
                    		PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()
197
                    });
198
        }
199
        return byRankIgnoreStatusList;
200
    }
201

    
202
    /**
203
     * @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
204
     */
205
    public void setByRankIgnoreStatusList(List<PresenceAbsenceTerm> byRankIgnoreStatusList) {
206
        this.byRankIgnoreStatusList = byRankIgnoreStatusList;
207
    }
208

    
209
    /**
210
     *
211
     * @param superAreas
212
     */
213
    public TransmissionEngineDistribution() {
214
    }
215

    
216
    /**
217
     * initializes the map which contains the status terms as key and the priority as value
218
     * The map will contain both, the PresenceTerms and the AbsenceTerms
219
     */
220
    private void initializeStatusPriorityMap() {
221

    
222
        statusPriorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
223
        Integer priority;
224

    
225
        // PresenceTerms
226
        for(PresenceAbsenceTerm term : termService.list(PresenceAbsenceTerm.class, null, null, null, null)){
227
            priority = getPriorityFor(term);
228
            if(priority != null){
229
                statusPriorityMap.put(term, priority);
230
            }
231
        }
232
    }
233

    
234
    /**
235
     * Compares the PresenceAbsenceTermBase terms contained in <code>a.status</code> and <code>b.status</code> after
236
     * the priority as stored in the statusPriorityMap. The StatusAndSources object with
237
     * the higher priority is returned. In the case of <code>a == b</code> the sources of b will be added to the sources
238
     * of a.
239
     *
240
     * If either a or b or the status are null b or a is returned.
241
     *
242
     * @see initializeStatusPriorityMap()
243
     *
244
     * @param a
245
     * @param b
246
     * @param sourcesForWinnerB
247
     *  In the case when <code>b</code> is preferred over <code>a</code> these Set of sources will be added to the sources of <code>b</code>
248
     * @return
249
     */
250
    private StatusAndSources choosePreferred(StatusAndSources a, StatusAndSources b, Set<DescriptionElementSource> sourcesForWinnerB){
251

    
252
        if (statusPriorityMap == null) {
253
            initializeStatusPriorityMap();
254
        }
255

    
256
        if (b == null || b.status == null) {
257
            return a;
258
        }
259
        if (a == null || a.status == null) {
260
            return b;
261
        }
262

    
263
        if (statusPriorityMap.get(a.status) == null) {
264
            logger.warn("No priority found in map for " + a.status.getLabel());
265
            return b;
266
        }
267
        if (statusPriorityMap.get(b.status) == null) {
268
            logger.warn("No priority found in map for " + b.status.getLabel());
269
            return a;
270
        }
271
        if(statusPriorityMap.get(a.status) < statusPriorityMap.get(b.status)){
272
            if(sourcesForWinnerB != null) {
273
                b.addSources(sourcesForWinnerB);
274
            }
275
            return b;
276
        } else if (statusPriorityMap.get(a.status) == statusPriorityMap.get(b.status)){
277
            a.addSources(b.sources);
278
            return a;
279
        } else {
280
            return a;
281
        }
282
    }
283

    
284
    /**
285
     * reads the priority for the given status term from the extensions.
286
     *
287
     * @param term
288
     * @return the priority value
289
     */
290
    private Integer getPriorityFor(DefinedTermBase<?> term) {
291
        Set<Extension> extensions = term.getExtensions();
292
        for(Extension extension : extensions){
293
            if(!extension.getType().equals(ExtensionType.ORDER())) {
294
                continue;
295
            }
296
            int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
297
            if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
298
                try {
299
                    Integer priority = Integer.valueOf(extension.getValue().substring(EXTENSION_VALUE_PREFIX.length()));
300
                    return priority;
301
                } catch (NumberFormatException e) {
302
                    logger.warn("Invalid number format in Extension:" + extension.getValue());
303
                }
304
            }
305
        }
306
        logger.warn("no priority defined for '" + term.getLabel() + "'");
307
        return null;
308
    }
309

    
310
    /**
311
     * runs both steps
312
     * <ul>
313
     * <li>Step 1: Accumulate occurrence records by area</li>
314
     * <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
315
     * the status of all children are accumulated on each rank starting from
316
     * lower rank to upper rank.</li>
317
     * </ul>
318
     *
319
     * @param superAreas
320
     *            the areas to which the subordinate areas should be projected.
321
     * @param lowerRank
322
     * @param upperRank
323
     * @param classification
324
     * @param classification
325
     *            limit the accumulation process to a specific classification
326
     *            (not yet implemented)
327
     * @param monitor
328
     *            the progress monitor to use for reporting progress to the
329
     *            user. It is the caller's responsibility to call done() on the
330
     *            given monitor. Accepts null, indicating that no progress
331
     *            should be reported and that the operation cannot be cancelled.
332
     */
333
    public void accumulate(AggregationMode mode, List<NamedArea> superAreas, Rank lowerRank, Rank upperRank,
334
            Classification classification, IProgressMonitor monitor) {
335

    
336
        if (monitor == null) {
337
            monitor = new NullProgressMonitor();
338
        }
339

    
340

    
341
        // only for debugging:
342
        //logger.setLevel(Level.TRACE); // TRACE will slow down a lot since it forces loading all term representations
343
        //Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
344

    
345
        logger.info("Hibernate JDBC Batch size: "
346
                + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
347

    
348
        Set<Classification> classifications = new HashSet<Classification>();
349
        if(classification == null) {
350
            classifications.addAll(classificationService.listClassifications(null, null, null, null));
351
        } else {
352
            classifications.add(classification);
353
        }
354

    
355
        int aggregationWorkTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
356

    
357
        // take start time for performance testing
358
        // NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
359
        double start = System.currentTimeMillis();
360

    
361
        monitor.beginTask("Accumulating distributions", (classifications.size() * aggregationWorkTicks) + 1 );
362
        updatePriorities();
363
        monitor.worked(1);
364

    
365
        List<Rank> ranks = rankInterval(lowerRank, upperRank);
366

    
367
        for(Classification _classification : classifications) {
368

    
369
            ClassificationLookupDTO classificationLookupDao = classificationService.classificationLookup(_classification);
370
            classificationLookupDao.filterInclude(ranks);
371

    
372
            double end1 = System.currentTimeMillis();
373
            logger.info("Time elapsed for classificationLookup() : " + (end1 - start) / (1000) + "s");
374
            double start2 = System.currentTimeMillis();
375

    
376
            monitor.subTask("Accumulating distributions to super areas for " + _classification.getTitleCache());
377
            if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
378
                accumulateByArea(superAreas, classificationLookupDao, new SubProgressMonitor(monitor, 200), true);
379
            }
380
            monitor.subTask("Accumulating distributions to higher ranks for " + _classification.getTitleCache());
381

    
382
            double end2 = System.currentTimeMillis();
383
            logger.info("Time elapsed for accumulateByArea() : " + (end2 - start2) / (1000) + "s");
384

    
385
            double start3 = System.currentTimeMillis();
386
            if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
387
                accumulateByRank(ranks, classificationLookupDao, new SubProgressMonitor(monitor, 200), mode.equals(AggregationMode.byRanks));
388
            }
389

    
390
            double end3 = System.currentTimeMillis();
391
            logger.info("Time elapsed for accumulateByRank() : " + (end3 - start3) / (1000) + "s");
392
            logger.info("Time elapsed for accumulate(): " + (end3 - start) / (1000) + "s");
393

    
394
            if(ONLY_FISRT_BATCH) {
395
                monitor.done();
396
                break;
397
            }
398
        }
399
    }
400

    
401

    
402
    /**
403
     * Step 1: Accumulate occurrence records by area
404
     * <ul>
405
     * <li>areas are projected to super areas e.g.:  HS <-- HS(A), HS(G), HS(S)</li>
406
     * <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
407
     * <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
408
     * <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
409
     * <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
410
     * <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
411
     * </ul>
412
     *
413
     * @param superAreas
414
     *      the areas to which the subordinate areas should be projected
415
     * @param classificationLookupDao
416
     *
417
     */
418
    protected void accumulateByArea(List<NamedArea> superAreas, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
419

    
420
        int batchSize = 1000;
421

    
422
        TransactionStatus txStatus = startTransaction(false);
423

    
424
        // reload superAreas TODO is it faster to getSession().merge(object) ??
425
        Set<UUID> superAreaUuids = new HashSet<UUID>(superAreas.size());
426
        for (NamedArea superArea : superAreas){
427
            superAreaUuids.add(superArea.getUuid());
428
        }
429

    
430
        // visit all accepted taxa
431
        subMonitor.beginTask("Accumulating by area ",  classificationLookupDao.getTaxonIds().size());
432
        Iterator<Integer> taxonIdIterator = classificationLookupDao.getTaxonIds().iterator();
433

    
434
        while (taxonIdIterator.hasNext()) {
435

    
436
            if(txStatus == null) {
437
                // transaction has been comitted at the end of this batch, start a new one
438
                txStatus = startTransaction(false);
439
            }
440

    
441
            // the session is cleared after each batch, so load the superAreaList for each batch
442
            List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
443

    
444
            // load taxa for this batch
445
            List<TaxonBase> taxa = null;
446
            List<Integer> taxonIds = new ArrayList<Integer>(batchSize);
447
            while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
448
                taxonIds.add(taxonIdIterator.next());
449
            }
450

    
451
//            logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
452

    
453
            taxa = taxonService.loadByIds(taxonIds, TAXONDESCRIPTION_INIT_STRATEGY);
454

    
455
            // iterate over the taxa and accumulate areas
456
            for(TaxonBase taxonBase : taxa) {
457
                if(logger.isDebugEnabled()){
458
                    logger.debug("accumulateByArea() - taxon :" + taxonToString(taxonBase));
459
                }
460

    
461
                Taxon taxon = (Taxon)taxonBase;
462
                TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
463
                List<Distribution> distributions = distributionsFor(taxon);
464

    
465
                // Step through superAreas for accumulation of subAreas
466
                for (NamedArea superArea : superAreaList){
467

    
468
                    // accumulate all sub area status
469
                    StatusAndSources accumulatedStatusAndSources = null;
470
                    // TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
471
                    Set<NamedArea> subAreas = getSubAreasFor(superArea);
472
                    for(NamedArea subArea : subAreas){
473
                        if(logger.isTraceEnabled()){
474
                            logger.trace("accumulateByArea() - \t\t" + termToString(subArea));
475
                        }
476
                        // step through all distributions for the given subArea
477
                        for(Distribution distribution : distributions){
478
                            if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
479
                                PresenceAbsenceTerm status = distribution.getStatus();
480
                                if(logger.isTraceEnabled()){
481
                                    logger.trace("accumulateByArea() - \t\t" + termToString(subArea) + ": " + termToString(status));
482
                                }
483
                                // skip all having a status value different of those in byAreaIgnoreStatusList
484
                                if (getByAreaIgnoreStatusList().contains(status)){
485
                                    continue;
486
                                }
487
                                StatusAndSources subStatusAndSources = new StatusAndSources(status, distribution.getSources());
488
                                accumulatedStatusAndSources = choosePreferred(accumulatedStatusAndSources, subStatusAndSources, null);
489
                            }
490
                        }
491
                    } // next sub area
492
                    if (accumulatedStatusAndSources != null) {
493
                        if(logger.isDebugEnabled()){
494
                            logger.debug("accumulateByArea() - \t >> " + termToString(superArea) + ": " + termToString(accumulatedStatusAndSources.status));
495
                        }
496
                        // store new distribution element for superArea in taxon description
497
                        Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatusAndSources.status);
498
                        newDistribitionElement.getSources().addAll(accumulatedStatusAndSources.sources);
499
                        newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
500
                        description.addElement(newDistribitionElement);
501
                    }
502

    
503
                } // next super area ....
504

    
505
                descriptionService.saveOrUpdate(description);
506
                taxonService.saveOrUpdate(taxon);
507
                subMonitor.worked(1);
508

    
509
            } // next taxon
510

    
511
            flushAndClear();
512

    
513
            // commit for every batch, otherwise the persistent context
514
            // may grow too much and eats up all the heap
515
            commitTransaction(txStatus);
516
            txStatus = null;
517

    
518
            if(ONLY_FISRT_BATCH) {
519
                break;
520
            }
521

    
522
        } // next batch of taxa
523

    
524
        subMonitor.done();
525
    }
526

    
527
   /**
528
    * Step 2: Accumulate by ranks starting from lower rank to upper rank, the status of all children
529
    * are accumulated on each rank starting from lower rank to upper rank.
530
    * <ul>
531
    * <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
532
    *    up to upper rank (e.g. Genus)</li>
533
    *  <li>the accumulation id done for each distribution area found in the included taxa</li>
534
    *  <li>areas of subtaxa with status endemic are ignored</li>
535
    *  <li>the status with the highest priority determines the value for the accumulated distribution</li>
536
    *  <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
537
    *    this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
538
    *</ul>
539
    */
540
    protected void accumulateByRank(List<Rank> rankInterval, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
541

    
542
        int batchSize = 500;
543

    
544
        TransactionStatus txStatus = startTransaction(false);
545

    
546
        // the loadRankSpecificRootNodes() method not only finds
547
        // taxa of the specified rank but also taxa of lower ranks
548
        // if no taxon of the specified rank exists, so we need to
549
        // remember which taxa have been processed already
550
        Set<Integer> taxaProcessedIds = new HashSet<Integer>();
551
        List<TaxonBase> taxa = null;
552
        List<TaxonBase> childTaxa = null;
553

    
554
        List<Rank> ranks = rankInterval;
555

    
556
        int ticksPerRank = 100;
557
        subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
558

    
559
        for (Rank rank : ranks) {
560

    
561
            if(logger.isDebugEnabled()){
562
                logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
563
            }
564

    
565
            SubProgressMonitor taxonSubMonitor = null;
566
            Set<Integer> taxonIdsPerRank = classificationLookupDao.getTaxonIdByRank().get(rank);
567
            if(taxonIdsPerRank == null || taxonIdsPerRank.isEmpty()) {
568
                continue;
569
            }
570
            Iterator<Integer> taxonIdIterator = taxonIdsPerRank.iterator();
571
            while (taxonIdIterator.hasNext()) {
572

    
573
                if(txStatus == null) {
574
                    // transaction has been committed at the end of this batch, start a new one
575
                    txStatus = startTransaction(false);
576
                }
577

    
578
                // load taxa for this batch
579
                List<Integer> taxonIds = new ArrayList<Integer>(batchSize);
580
                while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
581
                    taxonIds.add(taxonIdIterator.next());
582
                }
583

    
584
                taxa = taxonService.loadByIds(taxonIds, null);
585

    
586
                if(taxonSubMonitor == null) {
587
                    taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
588
                    taxonSubMonitor.beginTask("Accumulating by rank " + termToString(rank), taxa.size());
589
                }
590

    
591
//                if(logger.isDebugEnabled()){
592
//                           logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
593
//                }
594

    
595
                for(TaxonBase taxonBase : taxa) {
596

    
597
                    Taxon taxon = (Taxon)taxonBase;
598
                    if (taxaProcessedIds.contains(taxon.getId())) {
599
                        if(logger.isDebugEnabled()){
600
                            logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
601
                        }
602
                        continue;
603
                    }
604
                    taxaProcessedIds.add(taxon.getId());
605
                    if(logger.isDebugEnabled()){
606
                        logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
607
                    }
608

    
609
                    // Step through direct taxonomic children for accumulation
610
                    Map<NamedArea, StatusAndSources> accumulatedStatusMap = new HashMap<NamedArea, StatusAndSources>();
611

    
612
                    List<Integer> childTaxonIds = new ArrayList<>();
613
                    Set<Integer> childSet = classificationLookupDao.getChildTaxonMap().get(taxon.getId());
614
                    if(childSet != null) {
615
                        childTaxonIds.addAll(childSet);
616
                    }
617
                    if(!childTaxonIds.isEmpty()) {
618
                        childTaxa = taxonService.loadByIds(childTaxonIds, TAXONDESCRIPTION_INIT_STRATEGY);
619

    
620
                        for (TaxonBase childTaxonBase : childTaxa){
621

    
622
                            Taxon childTaxon = (Taxon) childTaxonBase;
623
                            getSession().setReadOnly(childTaxon, true);
624
                            if(logger.isTraceEnabled()){
625
                                logger.trace("                   subtaxon :" + taxonToString(childTaxon));
626
                            }
627

    
628
                            for(Distribution distribution : distributionsFor(childTaxon) ) {
629
                                PresenceAbsenceTerm status = distribution.getStatus();
630
                                NamedArea area = distribution.getArea();
631
                                if (status == null || getByRankIgnoreStatusList().contains(status)){
632
                                  continue;
633
                                }
634

    
635
                                StatusAndSources subStatusAndSources = new StatusAndSources(status, distribution.getSources());
636
                                accumulatedStatusMap.put(area, choosePreferred(accumulatedStatusMap.get(area), subStatusAndSources, null));
637
                             }
638
                        }
639

    
640
                        if(accumulatedStatusMap.size() > 0) {
641
                            TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
642
                            for (NamedArea area : accumulatedStatusMap.keySet()) {
643
                                Distribution distribition = findDistribution(description, area, accumulatedStatusMap.get(area).status);
644
                                if(distribition == null) {
645
                                    // create a new distribution element
646
                                    distribition = Distribution.NewInstance(area, accumulatedStatusMap.get(area).status);
647
                                    distribition.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
648
                                }
649
                                addSourcesDeduplicated(distribition.getSources(), accumulatedStatusMap.get(area).sources);
650

    
651
                                description.addElement(distribition);
652
                            }
653
                            taxonService.saveOrUpdate(taxon);
654
                            descriptionService.saveOrUpdate(description);
655
                        }
656

    
657
                    }
658
                    taxonSubMonitor.worked(1); // one taxon worked
659

    
660
                } // next taxon ....
661

    
662
                flushAndClear();
663

    
664
                // commit for every batch, otherwise the persistent context
665
                // may grow too much and eats up all the heap
666
                commitTransaction(txStatus);
667
                txStatus = null;
668

    
669
                if(ONLY_FISRT_BATCH) {
670
                    break;
671
                }
672
            } // next batch
673

    
674
            if(taxonSubMonitor != null) { // TODO taxonSubpager, this check should not be needed
675
                taxonSubMonitor.done();
676
            }
677
            subMonitor.worked(1);
678

    
679
            if(ONLY_FISRT_BATCH) {
680
                break;
681
            }
682
        } // next Rank
683

    
684
        subMonitor.done();
685
    }
686

    
687
/**
688
 * @param description
689
 * @param area
690
 * @param status
691
 * @return
692
 */
693
private Distribution findDistribution(TaxonDescription description, NamedArea area, PresenceAbsenceTerm status) {
694
    for(DescriptionElementBase item : description.getElements()) {
695
        if(!(item instanceof Distribution)) {
696
            continue;
697
        }
698
        Distribution distribution = ((Distribution)item);
699
        if(distribution.getArea().equals(area) && distribution.getStatus().equals(status)) {
700
            return distribution;
701
        }
702
    }
703
    return null;
704
}
705

    
706
/**
707
 * @param lowerRank
708
 * @param upperRank
709
 * @return
710
 */
711
private List<Rank> rankInterval(Rank lowerRank, Rank upperRank) {
712

    
713
    TransactionStatus txStatus = startTransaction(false);
714
    Rank currentRank = lowerRank;
715
    List<Rank> ranks = new ArrayList<Rank>();
716
    ranks.add(currentRank);
717
    while (!currentRank.isHigher(upperRank)) {
718
        currentRank = findNextHigherRank(currentRank);
719
        ranks.add(currentRank);
720
    }
721
    commitTransaction(txStatus);
722
    txStatus = null;
723
    return ranks;
724
}
725

    
726
    /**
727
     * @return
728
     */
729
    private Session getSession() {
730
        return descriptionService.getSession();
731
    }
732

    
733
    /**
734
     *
735
     */
736
    private void flushAndClear() {
737
        logger.debug("flushing and clearing session ...");
738
        getSession().flush();
739
        try {
740
            Search.getFullTextSession(getSession()).flushToIndexes();
741
        } catch (HibernateException e) {
742
            /* IGNORE - Hibernate Search Event listeners not configured ... */
743
            if(!e.getMessage().startsWith("Hibernate Search Event listeners not configured")){
744
                throw e;
745
            }
746
        }
747
        getSession().clear();
748
    }
749

    
750

    
751
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
752
    public TransactionStatus startTransaction(Boolean readOnly) {
753

    
754
        DefaultTransactionDefinition defaultTxDef = new DefaultTransactionDefinition();
755
        defaultTxDef.setReadOnly(readOnly);
756
        TransactionDefinition txDef = defaultTxDef;
757

    
758
        // Log some transaction-related debug information.
759
        if (logger.isTraceEnabled()) {
760
            logger.trace("Transaction name = " + txDef.getName());
761
            logger.trace("Transaction facets:");
762
            logger.trace("Propagation behavior = " + txDef.getPropagationBehavior());
763
            logger.trace("Isolation level = " + txDef.getIsolationLevel());
764
            logger.trace("Timeout = " + txDef.getTimeout());
765
            logger.trace("Read Only = " + txDef.isReadOnly());
766
            // org.springframework.orm.hibernate5.HibernateTransactionManager
767
            // provides more transaction/session-related debug information.
768
        }
769

    
770
        TransactionStatus txStatus = transactionManager.getTransaction(txDef);
771

    
772
        getSession().setFlushMode(FlushMode.COMMIT);
773

    
774
        return txStatus;
775
    }
776

    
777
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
778
    public void commitTransaction(TransactionStatus txStatus){
779
        logger.debug("commiting transaction ...");
780
        transactionManager.commit(txStatus);
781
        return;
782
    }
783

    
784
    /**
785
     * returns the next higher rank
786
     *
787
     * TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
788
     *
789
     * @param rank
790
     * @return
791
     */
792
    private Rank findNextHigherRank(Rank rank) {
793
        rank = (Rank) termService.load(rank.getUuid());
794
        return rank.getNextHigherTerm();
795
//        OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
796
//        return rankVocabulary.getNextHigherTerm(rank);
797
    }
798

    
799
    /**
800
     * Either finds an existing taxon description of the given taxon or creates a new one.
801
     * If the doClear is set all existing description elements will be cleared.
802
     *
803
     * @param taxon
804
     * @param doClear will remove all existing Distributions if the taxon already
805
     * has a MarkerType.COMPUTED() TaxonDescription
806
     * @return
807
     */
808
    private TaxonDescription findComputedDescription(Taxon taxon, boolean doClear) {
809

    
810
        String descriptionTitle = this.getClass().getSimpleName();
811

    
812
        // find existing one
813
        for (TaxonDescription description : taxon.getDescriptions()) {
814
            if (description.hasMarker(MarkerType.COMPUTED(), true)) {
815
                logger.debug("reusing description for " + taxon.getTitleCache());
816
                if (doClear) {
817
                    int deleteCount = 0;
818
                    Set<DescriptionElementBase> deleteCandidates = new HashSet<DescriptionElementBase>();
819
                    for (DescriptionElementBase descriptionElement : description.getElements()) {
820
                        if(descriptionElement instanceof Distribution) {
821
                            deleteCandidates.add(descriptionElement);
822
                        }
823
                    }
824
                    if(deleteCandidates.size() > 0){
825
                        for(DescriptionElementBase descriptionElement : deleteCandidates) {
826
                            description.removeElement(descriptionElement);
827
                            descriptionService.deleteDescriptionElement(descriptionElement);
828
                            descriptionElement = null;
829
                            deleteCount++;
830
                        }
831
                        descriptionService.saveOrUpdate(description);
832
                        logger.debug("\t" + deleteCount +" distributions cleared");
833
                    }
834

    
835
                }
836
                return description;
837
            }
838
        }
839

    
840
        // create a new one
841
        logger.debug("creating new description for " + taxon.getTitleCache());
842
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
843
        description.setTitleCache(descriptionTitle, true);
844
        description.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
845
        return description;
846
    }
847

    
848
    /**
849
     * @param superArea
850
     * @return
851
     */
852
    private Set<NamedArea> getSubAreasFor(NamedArea superArea) {
853

    
854
        if(!subAreaMap.containsKey(superArea)) {
855
            if(logger.isDebugEnabled()){
856
                logger.debug("loading included areas for " + superArea.getLabel());
857
            }
858
            subAreaMap.put(superArea, superArea.getIncludes());
859
        }
860
        return subAreaMap.get(superArea);
861
    }
862

    
863
    /**
864
     * @param taxon
865
     * @return
866
     */
867
    private List<Distribution> distributionsFor(Taxon taxon) {
868
        List<Distribution> distributions = new ArrayList<Distribution>();
869
        for(TaxonDescription description: taxon.getDescriptions()) {
870
            for(DescriptionElementBase deb : description.getElements()) {
871
                if(deb instanceof Distribution) {
872
                    distributions.add((Distribution)deb);
873
                }
874
            }
875
        }
876
        return distributions;
877
    }
878

    
879
    /**
880
     * @param taxon
881
     * @param logger2
882
     * @return
883
     */
884
    private String taxonToString(TaxonBase taxon) {
885
        if(logger.isTraceEnabled()) {
886
            return taxon.getTitleCache();
887
        } else {
888
            return taxon.toString();
889
        }
890
    }
891

    
892
    /**
893
     * @param taxon
894
     * @param logger2
895
     * @return
896
     */
897
    private String termToString(OrderedTermBase<?> term) {
898
        if(logger.isTraceEnabled()) {
899
            return term.getLabel() + " [" + term.getIdInVocabulary() + "]";
900
        } else {
901
            return term.getIdInVocabulary();
902
        }
903
    }
904

    
905
    /**
906
     * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
907
     * This method will start a new transaction and commits it after the work is done.
908
     */
909
    public void updatePriorities() {
910

    
911
        TransactionStatus txStatus = startTransaction(false);
912

    
913
        Map<PresenceAbsenceTerm, Integer> priorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
914

    
915
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(), 1);
916
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
917
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(), 3);
918
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(), 20);
919
        priorityMap.put(PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(), 30);
920
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED(), 45);
921
        priorityMap.put(PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE(), 40);
922
        priorityMap.put(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE(), 60);
923
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
924
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
925
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED(), 90);
926
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_ADVENTITIOUS(), 100);
927
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_NATURALIZED(), 110);
928
        priorityMap.put(PresenceAbsenceTerm.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
929
        priorityMap.put(PresenceAbsenceTerm.NATIVE(), 130); // null
930
        priorityMap.put(PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
931

    
932
        for(PresenceAbsenceTerm term : priorityMap.keySet()) {
933
            // load the term
934
            term = (PresenceAbsenceTerm) termService.load(term.getUuid());
935
            // find the extension
936
            Extension priorityExtension = null;
937
            Set<Extension> extensions = term.getExtensions();
938
            for(Extension extension : extensions){
939
                if (!extension.getType().equals(ExtensionType.ORDER())) {
940
                    continue;
941
                }
942
                int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
943
                if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
944
                    priorityExtension = extension;
945
                    break;
946
                }
947
            }
948
            if(priorityExtension == null) {
949
                priorityExtension = Extension.NewInstance(term, null, ExtensionType.ORDER());
950
            }
951
            priorityExtension.setValue(EXTENSION_VALUE_PREFIX + priorityMap.get(term));
952

    
953
            // save the term
954
            termService.saveOrUpdate(term);
955
            if (logger.isDebugEnabled()) {
956
                logger.debug("Priority updated for " + term.getLabel());
957
            }
958
        }
959

    
960
        commitTransaction(txStatus);
961
    }
962

    
963
    public static void addSourcesDeduplicated(Set<DescriptionElementSource> target, Set<DescriptionElementSource> sources) {
964
        for(DescriptionElementSource source : sources) {
965
            boolean contained = false;
966
            for(DescriptionElementSource existingSource: target) {
967
                if(existingSource.equalsByShallowCompare(source)) {
968
                    contained = true;
969
                    break;
970
                }
971
            }
972
            if(!contained) {
973
                try {
974
                    target.add((DescriptionElementSource)source.clone());
975
                } catch (CloneNotSupportedException e) {
976
                    // should never happen
977
                    throw new RuntimeException(e);
978
                }
979
            }
980
        }
981
    }
982

    
983
    public enum AggregationMode {
984
        byAreas,
985
        byRanks,
986
        byAreasAndRanks
987

    
988
    }
989

    
990
    private class StatusAndSources {
991

    
992
        private final PresenceAbsenceTerm status;
993

    
994
        private final Set<DescriptionElementSource> sources = new HashSet<>();
995

    
996
        public StatusAndSources(PresenceAbsenceTerm status, Set<DescriptionElementSource> sources) {
997
            this.status = status;
998
            addSourcesDeduplicated(this.sources, sources);
999
        }
1000

    
1001
        /**
1002
         * @param sources
1003
         */
1004
        public void addSources(Set<DescriptionElementSource> sources) {
1005
            addSourcesDeduplicated(this.sources, sources);
1006
        }
1007

    
1008
    }
1009
}
    (1-1/1)