Project

General

Profile

Download (36.8 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2013 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.api.service.description;
11

    
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Iterator;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20
import java.util.UUID;
21

    
22
import org.apache.log4j.Level;
23
import org.apache.log4j.Logger;
24
import org.hibernate.FlushMode;
25
import org.hibernate.HibernateException;
26
import org.hibernate.Session;
27
import org.hibernate.engine.spi.SessionFactoryImplementor;
28
import org.hibernate.search.Search;
29
import org.springframework.beans.factory.annotation.Autowired;
30
import org.springframework.orm.hibernate5.HibernateTransactionManager;
31
import org.springframework.stereotype.Service;
32
import org.springframework.transaction.TransactionDefinition;
33
import org.springframework.transaction.TransactionStatus;
34
import org.springframework.transaction.support.DefaultTransactionDefinition;
35

    
36
import eu.etaxonomy.cdm.api.service.IClassificationService;
37
import eu.etaxonomy.cdm.api.service.IDescriptionService;
38
import eu.etaxonomy.cdm.api.service.INameService;
39
import eu.etaxonomy.cdm.api.service.ITaxonService;
40
import eu.etaxonomy.cdm.api.service.ITermService;
41
import eu.etaxonomy.cdm.api.service.pager.Pager;
42
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
43
import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
44
import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
45
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
46
import eu.etaxonomy.cdm.model.common.Extension;
47
import eu.etaxonomy.cdm.model.common.ExtensionType;
48
import eu.etaxonomy.cdm.model.common.Marker;
49
import eu.etaxonomy.cdm.model.common.MarkerType;
50
import eu.etaxonomy.cdm.model.common.OrderedTermBase;
51
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
52
import eu.etaxonomy.cdm.model.description.Distribution;
53
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
54
import eu.etaxonomy.cdm.model.description.TaxonDescription;
55
import eu.etaxonomy.cdm.model.location.NamedArea;
56
import eu.etaxonomy.cdm.model.name.Rank;
57
import eu.etaxonomy.cdm.model.taxon.Classification;
58
import eu.etaxonomy.cdm.model.taxon.Taxon;
59
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
60
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
61
import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
62

    
63
/**
64
 *
65
 * <h2>GENERAL NOTES </h2>
66
 * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
67
 * version 14 written in Visual Basic and still need to be
68
 * adapted to the java version of the transmission engine!</em>
69
 *
70
 * <h3>summaryStatus</h3>
71
 *
72
 *   Each distribution information has a summaryStatus, this is an summary of the status codes
73
 *   as stored in the fields of emOccurrence native, introduced, cultivated, ...
74
 *   The summaryStatus seems to be equivalent to  the CDM DistributionStatus
75
 *
76
 * <h3>map generation</h3>
77
 *
78
 *   When generating maps from the accumulated distribution information some special cases have to be handled:
79
 * <ol>
80
 *   <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
81
 *       data is available, the calculated data has to be given preference over other data.
82
 *   </li>
83
 *   <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
84
 *       status should be shown in the map, whereas the super area should be ignored.
85
 *   </li>
86
 * </ol>
87
 *
88
 * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
89
 * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
90
 * @date Feb 22, 2013
91
 */
92
@Service
93
public class TransmissionEngineDistribution { //TODO extends IoBase?
94

    
95
    public static final String EXTENSION_VALUE_PREFIX = "transmissionEngineDistribution.priority:";
96

    
97
    public static final Logger logger = Logger.getLogger(TransmissionEngineDistribution.class);
98

    
99
    /**
100
     * only used for performance testing
101
     */
102
    final boolean ONLY_FISRT_BATCH = true;
103

    
104

    
105
    protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
106
            "description.markers.markerType",
107
            "description.elements.markers.markerType",
108
            "description.elements.area",
109
            "description.elements.status",
110
            "description.elements.sources.citation.authorship",
111
//            "description.elements.sources.nameUsedInSource",
112
//            "description.elements.multilanguageText",
113
//            "name.status.type",
114
    });
115

    
116

    
117
    /**
118
     * A map which contains the status terms as key and the priority as value
119
     * The map will contain both, the PresenceTerms and the AbsenceTerms
120
     */
121
    private Map<PresenceAbsenceTerm, Integer> statusPriorityMap = null;
122

    
123
    @Autowired
124
    private IDescriptionService descriptionService;
125

    
126
    @Autowired
127
    private ITermService termService;
128

    
129
    @Autowired
130
    private ITaxonService taxonService;
131

    
132
    @Autowired
133
    private IClassificationService classificationService;
134

    
135
    @Autowired
136
    private INameService mameService;
137

    
138
    @Autowired
139
    private HibernateTransactionManager transactionManager;
140

    
141
    private List<PresenceAbsenceTerm> byAreaIgnoreStatusList = null;
142

    
143
    private List<PresenceAbsenceTerm> byRankIgnoreStatusList = null;
144

    
145
    private final Map<NamedArea, Set<NamedArea>> subAreaMap = new HashMap<NamedArea, Set<NamedArea>>();
146

    
147

    
148
    /**
149
     * byAreaIgnoreStatusList contains by default:
150
     *  <ul>
151
     *    <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
152
     *    <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
153
     *    <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
154
     *    <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
155
     *    <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
156
     *  </ul>
157
     *
158
     * @return the byAreaIgnoreStatusList
159
     */
160
    public List<PresenceAbsenceTerm> getByAreaIgnoreStatusList() {
161
        if(byAreaIgnoreStatusList == null ){
162
            byAreaIgnoreStatusList = Arrays.asList(
163
                    new PresenceAbsenceTerm[] {
164
                    		PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(),
165
                    		PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(),
166
                    		PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(),
167
                    		PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(),
168
                    		PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE()
169
                            // TODO what about PresenceAbsenceTerm.ABSENT() also ignore?
170
                    });
171
        }
172
        return byAreaIgnoreStatusList;
173
    }
174

    
175
    /**
176
     * @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
177
     */
178
    public void setByAreaIgnoreStatusList(List<PresenceAbsenceTerm> byAreaIgnoreStatusList) {
179
        this.byAreaIgnoreStatusList = byAreaIgnoreStatusList;
180
    }
181

    
182
    /**
183
     * byRankIgnoreStatusList contains by default
184
     *  <ul>
185
     *    <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
186
     *  </ul>
187
     *
188
     * @return the byRankIgnoreStatusList
189
     */
190
    public List<PresenceAbsenceTerm> getByRankIgnoreStatusList() {
191

    
192
        if (byRankIgnoreStatusList == null) {
193
            byRankIgnoreStatusList = Arrays.asList(
194
                    new PresenceAbsenceTerm[] {
195
                    		PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()
196
                    });
197
        }
198
        return byRankIgnoreStatusList;
199
    }
200

    
201
    /**
202
     * @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
203
     */
204
    public void setByRankIgnoreStatusList(List<PresenceAbsenceTerm> byRankIgnoreStatusList) {
205
        this.byRankIgnoreStatusList = byRankIgnoreStatusList;
206
    }
207

    
208
    /**
209
     *
210
     * @param superAreas
211
     */
212
    public TransmissionEngineDistribution() {
213
    }
214

    
215
    /**
216
     * initializes the map which contains the status terms as key and the priority as value
217
     * The map will contain both, the PresenceTerms and the AbsenceTerms
218
     */
219
    private void initializeStatusPriorityMap() {
220

    
221
        statusPriorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
222
        Integer priority;
223

    
224
        // PresenceTerms
225
        for(PresenceAbsenceTerm term : termService.list(PresenceAbsenceTerm.class, null, null, null, null)){
226
            priority = getPriorityFor(term);
227
            if(priority != null){
228
                statusPriorityMap.put(term, priority);
229
            }
230
        }
231
    }
232

    
233
    /**
234
     * Compares the PresenceAbsenceTermBase terms <code>a</code> and <code>b</code>  and
235
     * returns the PresenceAbsenceTermBase with the higher priority as stored in the statusPriorityMap.
236
     * If either a or b are null b or a is returned.
237
     *
238
     * @see initializeStatusPriorityMap()
239
     *
240
     * @param a
241
     * @param b
242
     * @return
243
     */
244
    private PresenceAbsenceTerm choosePreferred(PresenceAbsenceTerm a, PresenceAbsenceTerm b){
245

    
246
        if (statusPriorityMap == null) {
247
            initializeStatusPriorityMap();
248
        }
249

    
250
        if (b == null) {
251
            return a;
252
        }
253
        if (a == null) {
254
            return b;
255
        }
256

    
257
        if (statusPriorityMap.get(a) == null) {
258
            logger.warn("No priority found in map for " + a.getLabel());
259
            return b;
260
        }
261
        if (statusPriorityMap.get(b) == null) {
262
            logger.warn("No priority found in map for " + b.getLabel());
263
            return a;
264
        }
265
        if(statusPriorityMap.get(a) > statusPriorityMap.get(b)){
266
            return a;
267
        } else {
268
            return b;
269
        }
270
    }
271

    
272
    /**
273
     * reads the priority for the given status term from the extensions.
274
     *
275
     * @param term
276
     * @return the priority value
277
     */
278
    private Integer getPriorityFor(DefinedTermBase<?> term) {
279
        Set<Extension> extensions = term.getExtensions();
280
        for(Extension extension : extensions){
281
            if(!extension.getType().equals(ExtensionType.ORDER())) {
282
                continue;
283
            }
284
            int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
285
            if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
286
                try {
287
                    Integer priority = Integer.valueOf(extension.getValue().substring(EXTENSION_VALUE_PREFIX.length()));
288
                    return priority;
289
                } catch (NumberFormatException e) {
290
                    logger.warn("Invalid number format in Extension:" + extension.getValue());
291
                }
292
            }
293
        }
294
        logger.warn("no priority defined for '" + term.getLabel() + "'");
295
        return null;
296
    }
297

    
298
    /**
299
     * runs both steps
300
     * <ul>
301
     * <li>Step 1: Accumulate occurrence records by area</li>
302
     * <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
303
     * the status of all children are accumulated on each rank starting from
304
     * lower rank to upper rank.</li>
305
     * </ul>
306
     *
307
     * @param superAreas
308
     *            the areas to which the subordinate areas should be projected.
309
     * @param lowerRank
310
     * @param upperRank
311
     * @param classification
312
     * @param classification
313
     *            limit the accumulation process to a specific classification
314
     *            (not yet implemented)
315
     * @param monitor
316
     *            the progress monitor to use for reporting progress to the
317
     *            user. It is the caller's responsibility to call done() on the
318
     *            given monitor. Accepts null, indicating that no progress
319
     *            should be reported and that the operation cannot be cancelled.
320
     */
321
    public void accumulate(AggregationMode mode, List<NamedArea> superAreas, Rank lowerRank, Rank upperRank,
322
            Classification classification, IProgressMonitor monitor) {
323

    
324
        if (monitor == null) {
325
            monitor = new NullProgressMonitor();
326
        }
327

    
328
        logger.setLevel(Level.INFO); // TRACE will slow down a lot since it forces loading all term representations
329

    
330
        logger.info("Hibernate JDBC Batch size: "
331
                + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
332

    
333
        // only for debugging:
334
        logger.setLevel(Level.INFO);
335
        //Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
336

    
337
        Set<Classification> classifications = new HashSet<Classification>();
338
        if(classification == null) {
339
            classifications.addAll(classificationService.listClassifications(null, null, null, null));
340
        } else {
341
            classifications.add(classification);
342
        }
343

    
344
        int aggregationWorkTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
345

    
346
        // take start time for performance testing
347
        // NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
348
        double start = System.currentTimeMillis();
349

    
350
        monitor.beginTask("Accumulating distributions", (classifications.size() * aggregationWorkTicks) + 1 );
351
        updatePriorities();
352
        monitor.worked(1);
353

    
354
        for(Classification _classification : classifications) {
355

    
356
            ClassificationLookupDTO classificationLookupDao = classificationService.classificationLookup(_classification);
357

    
358
            double end1 = System.currentTimeMillis();
359
            logger.info("Time elapsed for classificationLookup() : " + (end1 - start) / (1000) + "s");
360
            double start2 = System.currentTimeMillis();
361

    
362
            monitor.subTask("Accumulating distributions to super areas for " + _classification.getTitleCache());
363
            if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
364
                accumulateByArea(superAreas, classificationLookupDao, new SubProgressMonitor(monitor, 200), true);
365
            }
366
            monitor.subTask("Accumulating distributions to higher ranks for " + _classification.getTitleCache());
367

    
368
            double end2 = System.currentTimeMillis();
369
            logger.info("Time elapsed for accumulateByArea() : " + (end2 - start2) / (1000) + "s");
370

    
371
            double start3 = System.currentTimeMillis();
372
            if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
373
                accumulateByRank(lowerRank, upperRank, classification, new SubProgressMonitor(monitor, 200), mode.equals(AggregationMode.byRanks));
374
            }
375

    
376
            double end3 = System.currentTimeMillis();
377
            logger.info("Time elapsed for accumulateByRank() : " + (end3 - start3) / (1000) + "s");
378
            logger.info("Time elapsed for accumulate(): " + (end3 - start) / (1000) + "s");
379

    
380
            if(ONLY_FISRT_BATCH) {
381
                monitor.done();
382
                break;
383
            }
384
        }
385
    }
386

    
387
    /**
388
     * @return
389
     */
390
    private Session getSession() {
391
        return descriptionService.getSession();
392
    }
393

    
394
    /**
395
     * Step 1: Accumulate occurrence records by area
396
     * <ul>
397
     * <li>areas are projected to super areas e.g.:  HS <-- HS(A), HS(G), HS(S)</li>
398
     * <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
399
     * <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
400
     * <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
401
     * <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
402
     * <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
403
     * </ul>
404
     *
405
     * @param superAreas
406
     *      the areas to which the subordinate areas should be projected
407
     * @param classificationLookupDao
408
     *
409
     */
410
    protected void accumulateByArea(List<NamedArea> superAreas, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
411

    
412
        int batchSize = 1000;
413

    
414
        TransactionStatus txStatus = startTransaction(false);
415

    
416
        // reload superAreas TODO is it faster to getSession().merge(object) ??
417
        Set<UUID> superAreaUuids = new HashSet<UUID>(superAreas.size());
418
        for (NamedArea superArea : superAreas){
419
            superAreaUuids.add(superArea.getUuid());
420
        }
421
        List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
422

    
423
        // visit all accepted taxa
424
        subMonitor.beginTask("Accumulating by area ",  classificationLookupDao.getTaxonIds().size());
425
        Iterator<Integer> taxonIdIterator = classificationLookupDao.getTaxonIds().iterator();
426

    
427
        while (taxonIdIterator.hasNext()) {
428

    
429
            if(txStatus == null) {
430
                // transaction has been comitted at the end of this batch, start a new one
431
                txStatus = startTransaction(false);
432
            }
433

    
434
            // load taxa for this batch
435
            List<TaxonBase> taxa = new ArrayList<TaxonBase>(batchSize);
436
            Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
437
            while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
438
                taxonIds.add(taxonIdIterator.next());
439
            }
440

    
441
//            logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
442

    
443
            taxa = taxonService.listByIds(taxonIds, null, null, null, TAXONDESCRIPTION_INIT_STRATEGY);
444

    
445
            // iterate over the taxa and accumulate areas
446
            for(TaxonBase taxon : taxa) {
447
                if(logger.isDebugEnabled()){
448
                    logger.debug("accumulateByArea() - taxon :" + taxonToString(taxon));
449
                }
450

    
451
                TaxonDescription description = findComputedDescription((Taxon)taxon, doClearDescriptions);
452
                List<Distribution> distributions = distributionsFor((Taxon)taxon);
453

    
454
                // Step through superAreas for accumulation of subAreas
455
                for (NamedArea superArea : superAreaList){
456

    
457
                    // accumulate all sub area status
458
                    PresenceAbsenceTerm accumulatedStatus = null;
459
                    // TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
460
                    Set<NamedArea> subAreas = getSubAreasFor(superArea);
461
                    for(NamedArea subArea : subAreas){
462
                        if(logger.isTraceEnabled()){
463
                            logger.trace("accumulateByArea() - \t\t" + termToString(subArea));
464
                        }
465
                        // step through all distributions for the given subArea
466
                        for(Distribution distribution : distributions){
467
                            if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
468
                                PresenceAbsenceTerm status = distribution.getStatus();
469
                                if(logger.isTraceEnabled()){
470
                                    logger.trace("accumulateByArea() - \t\t" + termToString(subArea) + ": " + termToString(status));
471
                                }
472
                                // skip all having a status value different of those in byAreaIgnoreStatusList
473
                                if (getByAreaIgnoreStatusList().contains(status)){
474
                                    continue;
475
                                }
476
                                accumulatedStatus = choosePreferred(accumulatedStatus, status);
477
                            }
478
                        }
479
                    } // next sub area
480
                    if (accumulatedStatus != null) {
481
                        if(logger.isDebugEnabled()){
482
                            logger.debug("accumulateByArea() - \t >> " + termToString(superArea) + ": " + termToString(accumulatedStatus));
483
                        }
484
                        // store new distribution element for superArea in taxon description
485
                        Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatus);
486
                        newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
487
                        description.addElement(newDistribitionElement);
488
                    }
489

    
490
                } // next super area ....
491

    
492
                descriptionService.saveOrUpdate(description);
493
                taxonService.saveOrUpdate(taxon);
494
                subMonitor.worked(1);
495

    
496
            } // next taxon
497

    
498
            flushAndClear();
499

    
500
            // commit for every batch, otherwise the persistent context
501
            // may grow too much and eats up all the heap
502
            commitTransaction(txStatus);
503
            txStatus = null;
504

    
505
            if(ONLY_FISRT_BATCH) {
506
                break;
507
            }
508

    
509
        } // next batch of taxa
510

    
511
        subMonitor.done();
512
    }
513

    
514
    /**
515
     * @param taxon
516
     * @param logger2
517
     * @return
518
     */
519
    private String taxonToString(TaxonBase taxon) {
520
        if(logger.isTraceEnabled()) {
521
            return taxon.getTitleCache();
522
        } else {
523
            return taxon.toString();
524
        }
525
    }
526

    
527
    /**
528
     * @param taxon
529
     * @param logger2
530
     * @return
531
     */
532
    private String termToString(OrderedTermBase<?> term) {
533
        if(logger.isTraceEnabled()) {
534
            return term.getLabel() + " [" + term.getIdInVocabulary() + "]";
535
        } else {
536
            return term.getIdInVocabulary();
537
        }
538
    }
539

    
540
   /**
541
    * Step 2: Accumulate by ranks staring from lower rank to upper rank, the status of all children
542
    * are accumulated on each rank starting from lower rank to upper rank.
543
    * <ul>
544
    * <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
545
    *    up to upper rank (e.g. Genus)</li>
546
    *  <li>the accumulation id done for each distribution area found in the included taxa</li>
547
    *  <li>areas of subtaxa with status endemic are ignored</li>
548
    *  <li>the status with the highest priority determines the value for the accumulated distribution</li>
549
    *  <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
550
    *    this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
551
    *</ul>
552
    */
553
    protected void accumulateByRank(Rank lowerRank, Rank upperRank, Classification classification,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
554

    
555
        int batchSize = 500;
556

    
557
        TransactionStatus txStatus = startTransaction(false);
558

    
559
        // the loadRankSpecificRootNodes() method not only finds
560
        // taxa of the specified rank but also taxa of lower ranks
561
        // if no taxon of the specified rank exists, so we need to
562
        // remember which taxa have been processed already
563
        Set<Integer> taxaProcessedIds = new HashSet<Integer>();
564

    
565
        Rank currentRank = lowerRank;
566
        List<Rank> ranks = new ArrayList<Rank>();
567
        ranks.add(currentRank);
568
        while (!currentRank.isHigher(upperRank)) {
569
            currentRank = findNextHigherRank(currentRank);
570
            ranks.add(currentRank);
571
        }
572

    
573
        int ticksPerRank = 100;
574
        subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
575

    
576
        for (Rank rank : ranks) {
577

    
578
            if(logger.isDebugEnabled()){
579
                logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
580
            }
581

    
582
            Pager<TaxonNode> taxonPager = null;
583
            int pageIndex = 0;
584
            boolean isLastPage = false;
585
            SubProgressMonitor taxonSubMonitor = null;
586
            while (!isLastPage) {
587

    
588
                if(txStatus == null) {
589
                    // transaction has been comitted at the end of this batch, start a new one
590
                    txStatus = startTransaction(false);
591
                }
592

    
593
                taxonPager = classificationService
594
                        .pageRankSpecificRootNodes(classification, rank, batchSize, pageIndex++, null);
595

    
596
                if(taxonSubMonitor == null) {
597
                    taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
598
                    taxonSubMonitor.beginTask("Accumulating by rank " + rank.getLabel(), taxonPager.getCount().intValue());
599
                }
600

    
601
                if(taxonPager != null){
602
                    if(logger.isDebugEnabled()){
603
                               logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
604
                    }
605
                } else {
606
                    logger.error("accumulateByRank() - taxonNode pager was NULL");
607
                }
608

    
609
                if(taxonPager != null){
610
                    isLastPage = taxonPager.getRecords().size() < batchSize;
611
                    if (taxonPager.getRecords().size() == 0){
612
                        break;
613
                    }
614

    
615
                    for(TaxonNode taxonNode : taxonPager.getRecords()) {
616

    
617
                        Taxon taxon = taxonNode.getTaxon();
618
                        if (taxaProcessedIds.contains(taxon.getId())) {
619
                            if(logger.isDebugEnabled()){
620
                                logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
621
                            }
622
                            continue;
623
                        }
624
                        taxaProcessedIds.add(taxon.getId());
625
                        if(logger.isDebugEnabled()){
626
                            logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
627
                        }
628

    
629
                        // Step through direct taxonomic children for accumulation
630
                        Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>();
631

    
632
                        for (TaxonNode subTaxonNode : taxonNode.getChildNodes()){
633

    
634
                            getSession().setReadOnly(taxonNode, true);
635
                            if(logger.isTraceEnabled()){
636
                                logger.trace("                   subtaxon :" + taxonToString(subTaxonNode.getTaxon()));
637
                            }
638

    
639
                            for(Distribution distribution : distributionsFor(subTaxonNode.getTaxon()) ) {
640
                                PresenceAbsenceTerm status = distribution.getStatus();
641
                                NamedArea area = distribution.getArea();
642
                                if (status == null || getByRankIgnoreStatusList().contains(status)){
643
                                  continue;
644
                                }
645
                                accumulatedStatusMap.put(area, choosePreferred(accumulatedStatusMap.get(area), status));
646
                             }
647
                        }
648

    
649
                        if(accumulatedStatusMap.size() > 0) {
650
                            TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
651
                            for (NamedArea area : accumulatedStatusMap.keySet()) {
652
                                // store new distribution element in new Description
653
                                Distribution newDistribitionElement = Distribution.NewInstance(area, accumulatedStatusMap.get(area));
654
                                newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
655
                                description.addElement(newDistribitionElement);
656
                            }
657
                            taxonService.saveOrUpdate(taxon);
658
                            descriptionService.saveOrUpdate(description);
659
                        }
660
                        taxonSubMonitor.worked(1); // one taxon worked
661

    
662
                    } // next taxon node ....
663
                }
664
                taxonPager = null;
665
                flushAndClear();
666

    
667
                // commit for every batch, otherwise the persistent context
668
                // may grow too much and eats up all the heap
669
                commitTransaction(txStatus);
670
                txStatus = null;
671

    
672
                if(ONLY_FISRT_BATCH) {
673
                    break;
674
                }
675
            } // next batch
676

    
677
            if(taxonSubMonitor != null) { // TODO taxonSubpager, this check should not be needed
678
                taxonSubMonitor.done();
679
            }
680
            subMonitor.worked(1);
681

    
682
            if(ONLY_FISRT_BATCH) {
683
                break;
684
            }
685
        } // next Rank
686

    
687
        subMonitor.done();
688
    }
689

    
690
    /**
691
     *
692
     */
693
    private void flushAndClear() {
694
        logger.debug("flushing and clearing session ...");
695
        getSession().flush();
696
        try {
697
            Search.getFullTextSession(getSession()).flushToIndexes();
698
        } catch (HibernateException e) {
699
            /* IGNORE - Hibernate Search Event listeners not configured ... */
700
            if(!e.getMessage().startsWith("Hibernate Search Event listeners not configured")){
701
                throw e;
702
            }
703
        }
704
        getSession().clear();
705
    }
706

    
707

    
708
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
709
    public TransactionStatus startTransaction(Boolean readOnly) {
710

    
711
        DefaultTransactionDefinition defaultTxDef = new DefaultTransactionDefinition();
712
        defaultTxDef.setReadOnly(readOnly);
713
        TransactionDefinition txDef = defaultTxDef;
714

    
715
        // Log some transaction-related debug information.
716
        if (logger.isTraceEnabled()) {
717
            logger.trace("Transaction name = " + txDef.getName());
718
            logger.trace("Transaction facets:");
719
            logger.trace("Propagation behavior = " + txDef.getPropagationBehavior());
720
            logger.trace("Isolation level = " + txDef.getIsolationLevel());
721
            logger.trace("Timeout = " + txDef.getTimeout());
722
            logger.trace("Read Only = " + txDef.isReadOnly());
723
            // org.springframework.orm.hibernate5.HibernateTransactionManager
724
            // provides more transaction/session-related debug information.
725
        }
726

    
727
        TransactionStatus txStatus = transactionManager.getTransaction(txDef);
728

    
729
        getSession().setFlushMode(FlushMode.COMMIT);
730

    
731
        return txStatus;
732
    }
733

    
734
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
735
    public void commitTransaction(TransactionStatus txStatus){
736
        logger.debug("commiting transaction ...");
737
        transactionManager.commit(txStatus);
738
        return;
739
    }
740

    
741
    /**
742
     * returns the next higher rank
743
     *
744
     * TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
745
     *
746
     * @param rank
747
     * @return
748
     */
749
    private Rank findNextHigherRank(Rank rank) {
750
        rank = (Rank) termService.load(rank.getUuid());
751
        return rank.getNextHigherTerm();
752
//        OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
753
//        return rankVocabulary.getNextHigherTerm(rank);
754
    }
755

    
756
    /**
757
     * Either finds an existing taxon description of the given taxon or creates a new one.
758
     * If the doClear is set all existing description elements will be cleared.
759
     *
760
     * @param taxon
761
     * @param doClear will remove all existing Distributions if the taxon already
762
     * has a MarkerType.COMPUTED() TaxonDescription
763
     * @return
764
     */
765
    private TaxonDescription findComputedDescription(Taxon taxon, boolean doClear) {
766

    
767
        String descriptionTitle = this.getClass().getSimpleName();
768

    
769
        // find existing one
770
        for (TaxonDescription description : taxon.getDescriptions()) {
771
            if (description.hasMarker(MarkerType.COMPUTED(), true)) {
772
                logger.debug("reusing description for " + taxon.getTitleCache());
773
                if (doClear) {
774
                    int deleteCount = 0;
775
                    Set<DescriptionElementBase> deleteCandidates = new HashSet<DescriptionElementBase>();
776
                    for (DescriptionElementBase descriptionElement : description.getElements()) {
777
                        if(descriptionElement instanceof Distribution) {
778
                            deleteCandidates.add(descriptionElement);
779
                        }
780
                    }
781
                    if(deleteCandidates.size() > 0){
782
                        for(DescriptionElementBase descriptionElement : deleteCandidates) {
783
                            description.removeElement(descriptionElement);
784
                            descriptionService.deleteDescriptionElement(descriptionElement);
785
                            descriptionElement = null;
786
                            deleteCount++;
787
                        }
788
                        descriptionService.saveOrUpdate(description);
789
                        logger.debug("\t" + deleteCount +" distributions cleared");
790
                    }
791

    
792
                }
793
                return description;
794
            }
795
        }
796

    
797
        // create a new one
798
        logger.debug("creating new description for " + taxon.getTitleCache());
799
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
800
        description.setTitleCache(descriptionTitle, true);
801
        description.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
802
        return description;
803
    }
804

    
805
    /**
806
     * @param superArea
807
     * @return
808
     */
809
    private Set<NamedArea> getSubAreasFor(NamedArea superArea) {
810

    
811
        if(!subAreaMap.containsKey(superArea)) {
812
            if(logger.isDebugEnabled()){
813
                logger.debug("loading included areas for " + superArea.getLabel());
814
            }
815
            subAreaMap.put(superArea, superArea.getIncludes());
816
        }
817
        return subAreaMap.get(superArea);
818
    }
819

    
820
    /**
821
     * @param taxon
822
     * @return
823
     */
824
    private List<Distribution> distributionsFor(Taxon taxon) {
825
        List<Distribution> distributions = new ArrayList<Distribution>();
826
        for(TaxonDescription description: taxon.getDescriptions()) {
827
            for(DescriptionElementBase deb : description.getElements()) {
828
                if(deb instanceof Distribution) {
829
                    distributions.add((Distribution)deb);
830
                }
831
            }
832
        }
833
        return distributions;
834
    }
835

    
836
    /**
837
     * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
838
     * This method will start a new transaction and commits it after the work is done.
839
     */
840
    public void updatePriorities() {
841

    
842
        TransactionStatus txStatus = startTransaction(false);
843

    
844
        Map<PresenceAbsenceTerm, Integer> priorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
845

    
846
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(), 1);
847
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
848
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(), 3);
849
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(), 20);
850
        priorityMap.put(PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(), 30);
851
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED(), 45);
852
        priorityMap.put(PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE(), 40);
853
        priorityMap.put(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE(), 60);
854
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
855
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
856
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED(), 90);
857
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_ADVENTITIOUS(), 100);
858
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_NATURALIZED(), 110);
859
        priorityMap.put(PresenceAbsenceTerm.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
860
        priorityMap.put(PresenceAbsenceTerm.NATIVE(), 130); // null
861
        priorityMap.put(PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
862

    
863
        for(PresenceAbsenceTerm term : priorityMap.keySet()) {
864
            // load the term
865
            term = (PresenceAbsenceTerm) termService.load(term.getUuid());
866
            // find the extension
867
            Extension priorityExtension = null;
868
            Set<Extension> extensions = term.getExtensions();
869
            for(Extension extension : extensions){
870
                if (!extension.getType().equals(ExtensionType.ORDER())) {
871
                    continue;
872
                }
873
                int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
874
                if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
875
                    priorityExtension = extension;
876
                    break;
877
                }
878
            }
879
            if(priorityExtension == null) {
880
                priorityExtension = Extension.NewInstance(term, null, ExtensionType.ORDER());
881
            }
882
            priorityExtension.setValue(EXTENSION_VALUE_PREFIX + priorityMap.get(term));
883

    
884
            // save the term
885
            termService.saveOrUpdate(term);
886
            if (logger.isDebugEnabled()) {
887
                logger.debug("Priority updated for " + term.getLabel());
888
            }
889
        }
890

    
891
        commitTransaction(txStatus);
892
    }
893

    
894
    public enum AggregationMode {
895
        byAreas,
896
        byRanks,
897
        byAreasAndRanks
898

    
899
    }
900
}
    (1-1/1)