Project

General

Profile

Download (37.5 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2013 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.api.service.description;
11

    
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Iterator;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20
import java.util.UUID;
21

    
22
import org.apache.log4j.Level;
23
import org.apache.log4j.Logger;
24
import org.hibernate.FlushMode;
25
import org.hibernate.HibernateException;
26
import org.hibernate.Session;
27
import org.hibernate.engine.spi.SessionFactoryImplementor;
28
import org.hibernate.search.Search;
29
import org.springframework.beans.factory.annotation.Autowired;
30
import org.springframework.orm.hibernate5.HibernateTransactionManager;
31
import org.springframework.stereotype.Service;
32
import org.springframework.transaction.TransactionDefinition;
33
import org.springframework.transaction.TransactionStatus;
34
import org.springframework.transaction.support.DefaultTransactionDefinition;
35

    
36
import eu.etaxonomy.cdm.api.service.IClassificationService;
37
import eu.etaxonomy.cdm.api.service.IDescriptionService;
38
import eu.etaxonomy.cdm.api.service.INameService;
39
import eu.etaxonomy.cdm.api.service.ITaxonService;
40
import eu.etaxonomy.cdm.api.service.ITermService;
41
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
42
import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
43
import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
44
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
45
import eu.etaxonomy.cdm.model.common.Extension;
46
import eu.etaxonomy.cdm.model.common.ExtensionType;
47
import eu.etaxonomy.cdm.model.common.Marker;
48
import eu.etaxonomy.cdm.model.common.MarkerType;
49
import eu.etaxonomy.cdm.model.common.OrderedTermBase;
50
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
51
import eu.etaxonomy.cdm.model.description.Distribution;
52
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
53
import eu.etaxonomy.cdm.model.description.TaxonDescription;
54
import eu.etaxonomy.cdm.model.location.NamedArea;
55
import eu.etaxonomy.cdm.model.name.Rank;
56
import eu.etaxonomy.cdm.model.taxon.Classification;
57
import eu.etaxonomy.cdm.model.taxon.Taxon;
58
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
59
import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
60
import eu.etaxonomy.cdm.persistence.query.OrderHint;
61

    
62
/**
63
 *
64
 * <h2>GENERAL NOTES </h2>
65
 * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
66
 * version 14 written in Visual Basic and still need to be
67
 * adapted to the java version of the transmission engine!</em>
68
 *
69
 * <h3>summaryStatus</h3>
70
 *
71
 *   Each distribution information has a summaryStatus, this is an summary of the status codes
72
 *   as stored in the fields of emOccurrence native, introduced, cultivated, ...
73
 *   The summaryStatus seems to be equivalent to  the CDM DistributionStatus
74
 *
75
 * <h3>map generation</h3>
76
 *
77
 *   When generating maps from the accumulated distribution information some special cases have to be handled:
78
 * <ol>
79
 *   <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
80
 *       data is available, the calculated data has to be given preference over other data.
81
 *   </li>
82
 *   <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
83
 *       status should be shown in the map, whereas the super area should be ignored.
84
 *   </li>
85
 * </ol>
86
 *
87
 * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
88
 * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
89
 * @date Feb 22, 2013
90
 */
91
@Service
92
public class TransmissionEngineDistribution { //TODO extends IoBase?
93

    
94
    public static final String EXTENSION_VALUE_PREFIX = "transmissionEngineDistribution.priority:";
95

    
96
    public static final Logger logger = Logger.getLogger(TransmissionEngineDistribution.class);
97

    
98
    /**
99
     * only used for performance testing
100
     */
101
    final boolean ONLY_FISRT_BATCH = false;
102

    
103

    
104
    protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
105
            "description.markers.markerType",
106
            "description.elements.markers.markerType",
107
            "description.elements.area",
108
            "description.elements.status",
109
            "description.elements.sources.citation.authorship",
110
//            "description.elements.sources.nameUsedInSource",
111
//            "description.elements.multilanguageText",
112
//            "name.status.type",
113
    });
114

    
115

    
116
    /**
117
     * A map which contains the status terms as key and the priority as value
118
     * The map will contain both, the PresenceTerms and the AbsenceTerms
119
     */
120
    private Map<PresenceAbsenceTerm, Integer> statusPriorityMap = null;
121

    
122
    @Autowired
123
    private IDescriptionService descriptionService;
124

    
125
    @Autowired
126
    private ITermService termService;
127

    
128
    @Autowired
129
    private ITaxonService taxonService;
130

    
131
    @Autowired
132
    private IClassificationService classificationService;
133

    
134
    @Autowired
135
    private INameService mameService;
136

    
137
    @Autowired
138
    private HibernateTransactionManager transactionManager;
139

    
140
    private List<PresenceAbsenceTerm> byAreaIgnoreStatusList = null;
141

    
142
    private List<PresenceAbsenceTerm> byRankIgnoreStatusList = null;
143

    
144
    private final Map<NamedArea, Set<NamedArea>> subAreaMap = new HashMap<NamedArea, Set<NamedArea>>();
145

    
146
    private final List<OrderHint> emptyOrderHints = new ArrayList<OrderHint>(0);
147

    
148

    
149
    /**
150
     * byAreaIgnoreStatusList contains by default:
151
     *  <ul>
152
     *    <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
153
     *    <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
154
     *    <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
155
     *    <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
156
     *    <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
157
     *  </ul>
158
     *
159
     * @return the byAreaIgnoreStatusList
160
     */
161
    public List<PresenceAbsenceTerm> getByAreaIgnoreStatusList() {
162
        if(byAreaIgnoreStatusList == null ){
163
            byAreaIgnoreStatusList = Arrays.asList(
164
                    new PresenceAbsenceTerm[] {
165
                    		PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(),
166
                    		PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(),
167
                    		PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(),
168
                    		PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(),
169
                    		PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE()
170
                            // TODO what about PresenceAbsenceTerm.ABSENT() also ignore?
171
                    });
172
        }
173
        return byAreaIgnoreStatusList;
174
    }
175

    
176
    /**
177
     * @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
178
     */
179
    public void setByAreaIgnoreStatusList(List<PresenceAbsenceTerm> byAreaIgnoreStatusList) {
180
        this.byAreaIgnoreStatusList = byAreaIgnoreStatusList;
181
    }
182

    
183
    /**
184
     * byRankIgnoreStatusList contains by default
185
     *  <ul>
186
     *    <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
187
     *  </ul>
188
     *
189
     * @return the byRankIgnoreStatusList
190
     */
191
    public List<PresenceAbsenceTerm> getByRankIgnoreStatusList() {
192

    
193
        if (byRankIgnoreStatusList == null) {
194
            byRankIgnoreStatusList = Arrays.asList(
195
                    new PresenceAbsenceTerm[] {
196
                    		PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()
197
                    });
198
        }
199
        return byRankIgnoreStatusList;
200
    }
201

    
202
    /**
203
     * @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
204
     */
205
    public void setByRankIgnoreStatusList(List<PresenceAbsenceTerm> byRankIgnoreStatusList) {
206
        this.byRankIgnoreStatusList = byRankIgnoreStatusList;
207
    }
208

    
209
    /**
210
     *
211
     * @param superAreas
212
     */
213
    public TransmissionEngineDistribution() {
214
    }
215

    
216
    /**
217
     * initializes the map which contains the status terms as key and the priority as value
218
     * The map will contain both, the PresenceTerms and the AbsenceTerms
219
     */
220
    private void initializeStatusPriorityMap() {
221

    
222
        statusPriorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
223
        Integer priority;
224

    
225
        // PresenceTerms
226
        for(PresenceAbsenceTerm term : termService.list(PresenceAbsenceTerm.class, null, null, null, null)){
227
            priority = getPriorityFor(term);
228
            if(priority != null){
229
                statusPriorityMap.put(term, priority);
230
            }
231
        }
232
    }
233

    
234
    /**
235
     * Compares the PresenceAbsenceTermBase terms <code>a</code> and <code>b</code>  and
236
     * returns the PresenceAbsenceTermBase with the higher priority as stored in the statusPriorityMap.
237
     * If either a or b are null b or a is returned.
238
     *
239
     * @see initializeStatusPriorityMap()
240
     *
241
     * @param a
242
     * @param b
243
     * @return
244
     */
245
    private PresenceAbsenceTerm choosePreferred(PresenceAbsenceTerm a, PresenceAbsenceTerm b){
246

    
247
        if (statusPriorityMap == null) {
248
            initializeStatusPriorityMap();
249
        }
250

    
251
        if (b == null) {
252
            return a;
253
        }
254
        if (a == null) {
255
            return b;
256
        }
257

    
258
        if (statusPriorityMap.get(a) == null) {
259
            logger.warn("No priority found in map for " + a.getLabel());
260
            return b;
261
        }
262
        if (statusPriorityMap.get(b) == null) {
263
            logger.warn("No priority found in map for " + b.getLabel());
264
            return a;
265
        }
266
        if(statusPriorityMap.get(a) > statusPriorityMap.get(b)){
267
            return a;
268
        } else {
269
            return b;
270
        }
271
    }
272

    
273
    /**
274
     * reads the priority for the given status term from the extensions.
275
     *
276
     * @param term
277
     * @return the priority value
278
     */
279
    private Integer getPriorityFor(DefinedTermBase<?> term) {
280
        Set<Extension> extensions = term.getExtensions();
281
        for(Extension extension : extensions){
282
            if(!extension.getType().equals(ExtensionType.ORDER())) {
283
                continue;
284
            }
285
            int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
286
            if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
287
                try {
288
                    Integer priority = Integer.valueOf(extension.getValue().substring(EXTENSION_VALUE_PREFIX.length()));
289
                    return priority;
290
                } catch (NumberFormatException e) {
291
                    logger.warn("Invalid number format in Extension:" + extension.getValue());
292
                }
293
            }
294
        }
295
        logger.warn("no priority defined for '" + term.getLabel() + "'");
296
        return null;
297
    }
298

    
299
    /**
300
     * runs both steps
301
     * <ul>
302
     * <li>Step 1: Accumulate occurrence records by area</li>
303
     * <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
304
     * the status of all children are accumulated on each rank starting from
305
     * lower rank to upper rank.</li>
306
     * </ul>
307
     *
308
     * @param superAreas
309
     *            the areas to which the subordinate areas should be projected.
310
     * @param lowerRank
311
     * @param upperRank
312
     * @param classification
313
     * @param classification
314
     *            limit the accumulation process to a specific classification
315
     *            (not yet implemented)
316
     * @param monitor
317
     *            the progress monitor to use for reporting progress to the
318
     *            user. It is the caller's responsibility to call done() on the
319
     *            given monitor. Accepts null, indicating that no progress
320
     *            should be reported and that the operation cannot be cancelled.
321
     */
322
    public void accumulate(AggregationMode mode, List<NamedArea> superAreas, Rank lowerRank, Rank upperRank,
323
            Classification classification, IProgressMonitor monitor) {
324

    
325
        if (monitor == null) {
326
            monitor = new NullProgressMonitor();
327
        }
328

    
329

    
330
        // only for debugging:
331
        logger.setLevel(Level.DEBUG); // TRACE will slow down a lot since it forces loading all term representations
332
        //Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
333

    
334
        logger.info("Hibernate JDBC Batch size: "
335
                + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
336

    
337
        Set<Classification> classifications = new HashSet<Classification>();
338
        if(classification == null) {
339
            classifications.addAll(classificationService.listClassifications(null, null, null, null));
340
        } else {
341
            classifications.add(classification);
342
        }
343

    
344
        int aggregationWorkTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
345

    
346
        // take start time for performance testing
347
        // NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
348
        double start = System.currentTimeMillis();
349

    
350
        monitor.beginTask("Accumulating distributions", (classifications.size() * aggregationWorkTicks) + 1 );
351
        updatePriorities();
352
        monitor.worked(1);
353

    
354
        List<Rank> ranks = rankInterval(lowerRank, upperRank);
355

    
356
        for(Classification _classification : classifications) {
357

    
358
            ClassificationLookupDTO classificationLookupDao = classificationService.classificationLookup(_classification);
359
            classificationLookupDao.filter(ranks);
360

    
361
            double end1 = System.currentTimeMillis();
362
            logger.info("Time elapsed for classificationLookup() : " + (end1 - start) / (1000) + "s");
363
            double start2 = System.currentTimeMillis();
364

    
365
            monitor.subTask("Accumulating distributions to super areas for " + _classification.getTitleCache());
366
            if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
367
                accumulateByArea(superAreas, classificationLookupDao, new SubProgressMonitor(monitor, 200), true);
368
            }
369
            monitor.subTask("Accumulating distributions to higher ranks for " + _classification.getTitleCache());
370

    
371
            double end2 = System.currentTimeMillis();
372
            logger.info("Time elapsed for accumulateByArea() : " + (end2 - start2) / (1000) + "s");
373

    
374
            double start3 = System.currentTimeMillis();
375
            if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
376
                accumulateByRank(ranks, classificationLookupDao, new SubProgressMonitor(monitor, 200), mode.equals(AggregationMode.byRanks));
377
            }
378

    
379
            double end3 = System.currentTimeMillis();
380
            logger.info("Time elapsed for accumulateByRank() : " + (end3 - start3) / (1000) + "s");
381
            logger.info("Time elapsed for accumulate(): " + (end3 - start) / (1000) + "s");
382

    
383
            if(ONLY_FISRT_BATCH) {
384
                monitor.done();
385
                break;
386
            }
387
        }
388
    }
389

    
390

    
391
    /**
392
     * Step 1: Accumulate occurrence records by area
393
     * <ul>
394
     * <li>areas are projected to super areas e.g.:  HS <-- HS(A), HS(G), HS(S)</li>
395
     * <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
396
     * <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
397
     * <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
398
     * <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
399
     * <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
400
     * </ul>
401
     *
402
     * @param superAreas
403
     *      the areas to which the subordinate areas should be projected
404
     * @param classificationLookupDao
405
     *
406
     */
407
    protected void accumulateByArea(List<NamedArea> superAreas, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
408

    
409
        int batchSize = 1000;
410

    
411
        TransactionStatus txStatus = startTransaction(false);
412

    
413
        // reload superAreas TODO is it faster to getSession().merge(object) ??
414
        Set<UUID> superAreaUuids = new HashSet<UUID>(superAreas.size());
415
        for (NamedArea superArea : superAreas){
416
            superAreaUuids.add(superArea.getUuid());
417
        }
418

    
419
        // visit all accepted taxa
420
        subMonitor.beginTask("Accumulating by area ",  classificationLookupDao.getTaxonIds().size());
421
        Iterator<Integer> taxonIdIterator = classificationLookupDao.getTaxonIds().iterator();
422

    
423
        while (taxonIdIterator.hasNext()) {
424

    
425
            if(txStatus == null) {
426
                // transaction has been comitted at the end of this batch, start a new one
427
                txStatus = startTransaction(false);
428
            }
429

    
430
            // the session is cleared after each batch, so load the superAreaList for each batch
431
            List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
432

    
433
            // load taxa for this batch
434
            List<TaxonBase> taxa = null;
435
            Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
436
            while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
437
                taxonIds.add(taxonIdIterator.next());
438
            }
439

    
440
//            logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
441

    
442
            taxa = taxonService.listByIds(taxonIds, null, null, emptyOrderHints, TAXONDESCRIPTION_INIT_STRATEGY);
443

    
444
            // iterate over the taxa and accumulate areas
445
            for(TaxonBase taxonBase : taxa) {
446
                if(logger.isDebugEnabled()){
447
                    logger.debug("accumulateByArea() - taxon :" + taxonToString(taxonBase));
448
                }
449

    
450
                Taxon taxon = (Taxon)taxonBase;
451
                TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
452
                List<Distribution> distributions = distributionsFor(taxon);
453

    
454

    
455
                // Step through superAreas for accumulation of subAreas
456
                for (NamedArea superArea : superAreaList){
457

    
458
                    // accumulate all sub area status
459
                    PresenceAbsenceTerm accumulatedStatus = null;
460
                    // TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
461
                    Set<NamedArea> subAreas = getSubAreasFor(superArea);
462
                    for(NamedArea subArea : subAreas){
463
                        if(logger.isTraceEnabled()){
464
                            logger.trace("accumulateByArea() - \t\t" + termToString(subArea));
465
                        }
466
                        // step through all distributions for the given subArea
467
                        for(Distribution distribution : distributions){
468
                            if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
469
                                PresenceAbsenceTerm status = distribution.getStatus();
470
                                if(logger.isTraceEnabled()){
471
                                    logger.trace("accumulateByArea() - \t\t" + termToString(subArea) + ": " + termToString(status));
472
                                }
473
                                // skip all having a status value different of those in byAreaIgnoreStatusList
474
                                if (getByAreaIgnoreStatusList().contains(status)){
475
                                    continue;
476
                                }
477
                                accumulatedStatus = choosePreferred(accumulatedStatus, status);
478
                            }
479
                        }
480
                    } // next sub area
481
                    if (accumulatedStatus != null) {
482
                        if(logger.isDebugEnabled()){
483
                            logger.debug("accumulateByArea() - \t >> " + termToString(superArea) + ": " + termToString(accumulatedStatus));
484
                        }
485
                        // store new distribution element for superArea in taxon description
486
                        Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatus);
487
                        newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
488
                        description.addElement(newDistribitionElement);
489
                    }
490

    
491
                } // next super area ....
492

    
493
                descriptionService.saveOrUpdate(description);
494
                taxonService.saveOrUpdate(taxon);
495
                subMonitor.worked(1);
496

    
497
            } // next taxon
498

    
499
            flushAndClear();
500

    
501
            // commit for every batch, otherwise the persistent context
502
            // may grow too much and eats up all the heap
503
            commitTransaction(txStatus);
504
            txStatus = null;
505

    
506
            if(ONLY_FISRT_BATCH) {
507
                break;
508
            }
509

    
510
        } // next batch of taxa
511

    
512
        subMonitor.done();
513
    }
514

    
515
   /**
516
    * Step 2: Accumulate by ranks staring from lower rank to upper rank, the status of all children
517
    * are accumulated on each rank starting from lower rank to upper rank.
518
    * <ul>
519
    * <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
520
    *    up to upper rank (e.g. Genus)</li>
521
    *  <li>the accumulation id done for each distribution area found in the included taxa</li>
522
    *  <li>areas of subtaxa with status endemic are ignored</li>
523
    *  <li>the status with the highest priority determines the value for the accumulated distribution</li>
524
    *  <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
525
    *    this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
526
    *</ul>
527
    */
528
    protected void accumulateByRank(List<Rank> rankInterval, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
529

    
530
        int batchSize = 500;
531

    
532
        TransactionStatus txStatus = startTransaction(false);
533

    
534
        // the loadRankSpecificRootNodes() method not only finds
535
        // taxa of the specified rank but also taxa of lower ranks
536
        // if no taxon of the specified rank exists, so we need to
537
        // remember which taxa have been processed already
538
        Set<Integer> taxaProcessedIds = new HashSet<Integer>();
539
        List<TaxonBase> taxa = null;
540
        List<TaxonBase> childTaxa = null;
541

    
542
        List<Rank> ranks = rankInterval;
543

    
544
        int ticksPerRank = 100;
545
        subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
546

    
547
        for (Rank rank : ranks) {
548

    
549
            if(logger.isDebugEnabled()){
550
                logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
551
            }
552

    
553
            SubProgressMonitor taxonSubMonitor = null;
554
            Set<Integer> taxonIdsPerRank = classificationLookupDao.getTaxonIdByRank().get(rank);
555
            if(taxonIdsPerRank == null || taxonIdsPerRank.isEmpty()) {
556
                continue;
557
            }
558
            Iterator<Integer> taxonIdIterator = taxonIdsPerRank.iterator();
559
            while (taxonIdIterator.hasNext()) {
560

    
561
                if(txStatus == null) {
562
                    // transaction has been comitted at the end of this batch, start a new one
563
                    txStatus = startTransaction(false);
564
                }
565

    
566
                // load taxa for this batch
567
                Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
568
                while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
569
                    taxonIds.add(taxonIdIterator.next());
570
                }
571

    
572
                taxa = taxonService.listByIds(taxonIds, null, null, emptyOrderHints, null);
573

    
574
                if(taxonSubMonitor == null) {
575
                    taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
576
                    taxonSubMonitor.beginTask("Accumulating by rank " + termToString(rank), taxa.size());
577
                }
578

    
579
//                if(logger.isDebugEnabled()){
580
//                           logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
581
//                }
582

    
583
                for(TaxonBase taxonBase : taxa) {
584

    
585
                    Taxon taxon = (Taxon)taxonBase;
586
                    if (taxaProcessedIds.contains(taxon.getId())) {
587
                        if(logger.isDebugEnabled()){
588
                            logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
589
                        }
590
                        continue;
591
                    }
592
                    taxaProcessedIds.add(taxon.getId());
593
                    if(logger.isDebugEnabled()){
594
                        logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
595
                    }
596

    
597
                    // Step through direct taxonomic children for accumulation
598
                    Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>();
599

    
600
                    Set<Integer> childTaxonIds = classificationLookupDao.getChildTaxonMap().get(taxon.getId());
601
                    if(childTaxonIds != null && !childTaxonIds.isEmpty()) {
602
                        childTaxa = taxonService.listByIds(childTaxonIds, null, null, emptyOrderHints, TAXONDESCRIPTION_INIT_STRATEGY);
603

    
604
                        for (TaxonBase childTaxonBase : childTaxa){
605

    
606
                            Taxon childTaxon = (Taxon) childTaxonBase;
607
                            getSession().setReadOnly(childTaxon, true);
608
                            if(logger.isTraceEnabled()){
609
                                logger.trace("                   subtaxon :" + taxonToString(childTaxon));
610
                            }
611

    
612
                            for(Distribution distribution : distributionsFor(childTaxon) ) {
613
                                PresenceAbsenceTerm status = distribution.getStatus();
614
                                NamedArea area = distribution.getArea();
615
                                if (status == null || getByRankIgnoreStatusList().contains(status)){
616
                                  continue;
617
                                }
618
                                accumulatedStatusMap.put(area, choosePreferred(accumulatedStatusMap.get(area), status));
619
                             }
620
                        }
621

    
622
                        if(accumulatedStatusMap.size() > 0) {
623
                            TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
624
                            for (NamedArea area : accumulatedStatusMap.keySet()) {
625
                                // store new distribution element in new Description
626
                                Distribution newDistribitionElement = Distribution.NewInstance(area, accumulatedStatusMap.get(area));
627
                                newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
628
                                description.addElement(newDistribitionElement);
629
                            }
630
                            taxonService.saveOrUpdate(taxon);
631
                            descriptionService.saveOrUpdate(description);
632
                        }
633

    
634
                    }
635
                    taxonSubMonitor.worked(1); // one taxon worked
636

    
637
                } // next taxon ....
638

    
639
                flushAndClear();
640

    
641
                // commit for every batch, otherwise the persistent context
642
                // may grow too much and eats up all the heap
643
                commitTransaction(txStatus);
644
                txStatus = null;
645

    
646
                if(ONLY_FISRT_BATCH) {
647
                    break;
648
                }
649
            } // next batch
650

    
651
            if(taxonSubMonitor != null) { // TODO taxonSubpager, this check should not be needed
652
                taxonSubMonitor.done();
653
            }
654
            subMonitor.worked(1);
655

    
656
            if(ONLY_FISRT_BATCH) {
657
                break;
658
            }
659
        } // next Rank
660

    
661
        subMonitor.done();
662
    }
663

    
664
/**
665
 * @param lowerRank
666
 * @param upperRank
667
 * @return
668
 */
669
private List<Rank> rankInterval(Rank lowerRank, Rank upperRank) {
670
    Rank currentRank = lowerRank;
671
    List<Rank> ranks = new ArrayList<Rank>();
672
    ranks.add(currentRank);
673
    while (!currentRank.isHigher(upperRank)) {
674
        currentRank = findNextHigherRank(currentRank);
675
        ranks.add(currentRank);
676
    }
677
    return ranks;
678
}
679

    
680
    /**
681
     * @return
682
     */
683
    private Session getSession() {
684
        return descriptionService.getSession();
685
    }
686

    
687
    /**
688
     *
689
     */
690
    private void flushAndClear() {
691
        logger.debug("flushing and clearing session ...");
692
        getSession().flush();
693
        try {
694
            Search.getFullTextSession(getSession()).flushToIndexes();
695
        } catch (HibernateException e) {
696
            /* IGNORE - Hibernate Search Event listeners not configured ... */
697
            if(!e.getMessage().startsWith("Hibernate Search Event listeners not configured")){
698
                throw e;
699
            }
700
        }
701
        getSession().clear();
702
    }
703

    
704

    
705
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
706
    public TransactionStatus startTransaction(Boolean readOnly) {
707

    
708
        DefaultTransactionDefinition defaultTxDef = new DefaultTransactionDefinition();
709
        defaultTxDef.setReadOnly(readOnly);
710
        TransactionDefinition txDef = defaultTxDef;
711

    
712
        // Log some transaction-related debug information.
713
        if (logger.isTraceEnabled()) {
714
            logger.trace("Transaction name = " + txDef.getName());
715
            logger.trace("Transaction facets:");
716
            logger.trace("Propagation behavior = " + txDef.getPropagationBehavior());
717
            logger.trace("Isolation level = " + txDef.getIsolationLevel());
718
            logger.trace("Timeout = " + txDef.getTimeout());
719
            logger.trace("Read Only = " + txDef.isReadOnly());
720
            // org.springframework.orm.hibernate5.HibernateTransactionManager
721
            // provides more transaction/session-related debug information.
722
        }
723

    
724
        TransactionStatus txStatus = transactionManager.getTransaction(txDef);
725

    
726
        getSession().setFlushMode(FlushMode.COMMIT);
727

    
728
        return txStatus;
729
    }
730

    
731
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
732
    public void commitTransaction(TransactionStatus txStatus){
733
        logger.debug("commiting transaction ...");
734
        transactionManager.commit(txStatus);
735
        return;
736
    }
737

    
738
    /**
739
     * returns the next higher rank
740
     *
741
     * TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
742
     *
743
     * @param rank
744
     * @return
745
     */
746
    private Rank findNextHigherRank(Rank rank) {
747
        rank = (Rank) termService.load(rank.getUuid());
748
        return rank.getNextHigherTerm();
749
//        OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
750
//        return rankVocabulary.getNextHigherTerm(rank);
751
    }
752

    
753
    /**
754
     * Either finds an existing taxon description of the given taxon or creates a new one.
755
     * If the doClear is set all existing description elements will be cleared.
756
     *
757
     * @param taxon
758
     * @param doClear will remove all existing Distributions if the taxon already
759
     * has a MarkerType.COMPUTED() TaxonDescription
760
     * @return
761
     */
762
    private TaxonDescription findComputedDescription(Taxon taxon, boolean doClear) {
763

    
764
        String descriptionTitle = this.getClass().getSimpleName();
765

    
766
        // find existing one
767
        for (TaxonDescription description : taxon.getDescriptions()) {
768
            if (description.hasMarker(MarkerType.COMPUTED(), true)) {
769
                logger.debug("reusing description for " + taxon.getTitleCache());
770
                if (doClear) {
771
                    int deleteCount = 0;
772
                    Set<DescriptionElementBase> deleteCandidates = new HashSet<DescriptionElementBase>();
773
                    for (DescriptionElementBase descriptionElement : description.getElements()) {
774
                        if(descriptionElement instanceof Distribution) {
775
                            deleteCandidates.add(descriptionElement);
776
                        }
777
                    }
778
                    if(deleteCandidates.size() > 0){
779
                        for(DescriptionElementBase descriptionElement : deleteCandidates) {
780
                            description.removeElement(descriptionElement);
781
                            descriptionService.deleteDescriptionElement(descriptionElement);
782
                            descriptionElement = null;
783
                            deleteCount++;
784
                        }
785
                        descriptionService.saveOrUpdate(description);
786
                        logger.debug("\t" + deleteCount +" distributions cleared");
787
                    }
788

    
789
                }
790
                return description;
791
            }
792
        }
793

    
794
        // create a new one
795
        logger.debug("creating new description for " + taxon.getTitleCache());
796
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
797
        description.setTitleCache(descriptionTitle, true);
798
        description.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
799
        return description;
800
    }
801

    
802
    /**
803
     * @param superArea
804
     * @return
805
     */
806
    private Set<NamedArea> getSubAreasFor(NamedArea superArea) {
807

    
808
        if(!subAreaMap.containsKey(superArea)) {
809
            if(logger.isDebugEnabled()){
810
                logger.debug("loading included areas for " + superArea.getLabel());
811
            }
812
            subAreaMap.put(superArea, superArea.getIncludes());
813
        }
814
        return subAreaMap.get(superArea);
815
    }
816

    
817
    /**
818
     * @param taxon
819
     * @return
820
     */
821
    private List<Distribution> distributionsFor(Taxon taxon) {
822
        List<Distribution> distributions = new ArrayList<Distribution>();
823
        for(TaxonDescription description: taxon.getDescriptions()) {
824
            for(DescriptionElementBase deb : description.getElements()) {
825
                if(deb instanceof Distribution) {
826
                    distributions.add((Distribution)deb);
827
                }
828
            }
829
        }
830
        return distributions;
831
    }
832

    
833
    /**
834
     * @param taxon
835
     * @param logger2
836
     * @return
837
     */
838
    private String taxonToString(TaxonBase taxon) {
839
        if(logger.isTraceEnabled()) {
840
            return taxon.getTitleCache();
841
        } else {
842
            return taxon.toString();
843
        }
844
    }
845

    
846
    /**
847
     * @param taxon
848
     * @param logger2
849
     * @return
850
     */
851
    private String termToString(OrderedTermBase<?> term) {
852
        if(logger.isTraceEnabled()) {
853
            return term.getLabel() + " [" + term.getIdInVocabulary() + "]";
854
        } else {
855
            return term.getIdInVocabulary();
856
        }
857
    }
858

    
859
    /**
860
     * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
861
     * This method will start a new transaction and commits it after the work is done.
862
     */
863
    public void updatePriorities() {
864

    
865
        TransactionStatus txStatus = startTransaction(false);
866

    
867
        Map<PresenceAbsenceTerm, Integer> priorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
868

    
869
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(), 1);
870
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
871
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(), 3);
872
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(), 20);
873
        priorityMap.put(PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(), 30);
874
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED(), 45);
875
        priorityMap.put(PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE(), 40);
876
        priorityMap.put(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE(), 60);
877
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
878
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
879
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED(), 90);
880
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_ADVENTITIOUS(), 100);
881
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_NATURALIZED(), 110);
882
        priorityMap.put(PresenceAbsenceTerm.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
883
        priorityMap.put(PresenceAbsenceTerm.NATIVE(), 130); // null
884
        priorityMap.put(PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
885

    
886
        for(PresenceAbsenceTerm term : priorityMap.keySet()) {
887
            // load the term
888
            term = (PresenceAbsenceTerm) termService.load(term.getUuid());
889
            // find the extension
890
            Extension priorityExtension = null;
891
            Set<Extension> extensions = term.getExtensions();
892
            for(Extension extension : extensions){
893
                if (!extension.getType().equals(ExtensionType.ORDER())) {
894
                    continue;
895
                }
896
                int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
897
                if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
898
                    priorityExtension = extension;
899
                    break;
900
                }
901
            }
902
            if(priorityExtension == null) {
903
                priorityExtension = Extension.NewInstance(term, null, ExtensionType.ORDER());
904
            }
905
            priorityExtension.setValue(EXTENSION_VALUE_PREFIX + priorityMap.get(term));
906

    
907
            // save the term
908
            termService.saveOrUpdate(term);
909
            if (logger.isDebugEnabled()) {
910
                logger.debug("Priority updated for " + term.getLabel());
911
            }
912
        }
913

    
914
        commitTransaction(txStatus);
915
    }
916

    
917
    public enum AggregationMode {
918
        byAreas,
919
        byRanks,
920
        byAreasAndRanks
921

    
922
    }
923
}
    (1-1/1)