Project

General

Profile

Download (36.8 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2013 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.api.service.description;
11

    
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Iterator;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20
import java.util.UUID;
21

    
22
import org.apache.log4j.Level;
23
import org.apache.log4j.Logger;
24
import org.hibernate.FlushMode;
25
import org.hibernate.HibernateException;
26
import org.hibernate.Session;
27
import org.hibernate.engine.spi.SessionFactoryImplementor;
28
import org.hibernate.search.Search;
29
import org.springframework.beans.factory.annotation.Autowired;
30
import org.springframework.orm.hibernate5.HibernateTransactionManager;
31
import org.springframework.stereotype.Service;
32
import org.springframework.transaction.TransactionDefinition;
33
import org.springframework.transaction.TransactionStatus;
34
import org.springframework.transaction.support.DefaultTransactionDefinition;
35

    
36
import eu.etaxonomy.cdm.api.service.IClassificationService;
37
import eu.etaxonomy.cdm.api.service.IDescriptionService;
38
import eu.etaxonomy.cdm.api.service.INameService;
39
import eu.etaxonomy.cdm.api.service.ITaxonService;
40
import eu.etaxonomy.cdm.api.service.ITermService;
41
import eu.etaxonomy.cdm.api.service.pager.Pager;
42
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
43
import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
44
import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
45
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
46
import eu.etaxonomy.cdm.model.common.Extension;
47
import eu.etaxonomy.cdm.model.common.ExtensionType;
48
import eu.etaxonomy.cdm.model.common.Marker;
49
import eu.etaxonomy.cdm.model.common.MarkerType;
50
import eu.etaxonomy.cdm.model.common.OrderedTermBase;
51
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
52
import eu.etaxonomy.cdm.model.description.Distribution;
53
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
54
import eu.etaxonomy.cdm.model.description.TaxonDescription;
55
import eu.etaxonomy.cdm.model.location.NamedArea;
56
import eu.etaxonomy.cdm.model.name.Rank;
57
import eu.etaxonomy.cdm.model.taxon.Classification;
58
import eu.etaxonomy.cdm.model.taxon.Taxon;
59
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
60
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
61
import eu.etaxonomy.cdm.persistence.dao.taxon.IClassificationDao;
62
import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
63

    
64
/**
65
 * The TransmissionEngineDistribution is meant to be used from within a service class.
66
 *
67
 * <h2>GENERAL NOTES </h2>
68
 * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
69
 * version 14 written in Visual Basic and still need to be
70
 * adapted to the java version of the transmission engine!</em>
71
 *
72
 * <h3>summaryStatus</h3>
73
 *
74
 *   Each distribution information has a summaryStatus, this is an summary of the status codes
75
 *   as stored in the fields of emOccurrence native, introduced, cultivated, ...
76
 *   The summaryStatus seems to be equivalent to  the CDM DistributionStatus
77
 *
78
 * <h3>map generation</h3>
79
 *
80
 *   When generating maps from the accumulated distribution information some special cases have to be handled:
81
 * <ol>
82
 *   <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
83
 *       data is available, the calculated data has to be given preference over other data.
84
 *   </li>
85
 *   <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
86
 *       status should be shown in the map, whereas the super area should be ignored.
87
 *   </li>
88
 * </ol>
89
 *
90
 * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
91
 * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
92
 * @date Feb 22, 2013
93
 */
94
@Service
95

    
96
public class TransmissionEngineDistribution { //TODO extends IoBase?
97

    
98
    public static final String EXTENSION_VALUE_PREFIX = "transmissionEngineDistribution.priority:";
99

    
100
    public static final Logger logger = Logger.getLogger(TransmissionEngineDistribution.class);
101

    
102
    /**
103
     * only used for performance testing
104
     */
105
    final boolean ONLY_FISRT_BATCH = false;
106

    
107

    
108
    protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
109
            "description.markers.markerType",
110
            "description.elements.markers.markerType",
111
            "description.elements.area",
112
            "description.elements.sources.citation.authorship",
113
            "description.elements.sources.nameUsedInSource",
114
            "description.elements.multilanguageText",
115
            "name.status.type",
116
    });
117

    
118

    
119
    /**
120
     * A map which contains the status terms as key and the priority as value
121
     * The map will contain both, the PresenceTerms and the AbsenceTerms
122
     */
123
    private Map<PresenceAbsenceTerm, Integer> statusPriorityMap = null;
124

    
125
    @Autowired
126
    private IDescriptionService descriptionService;
127

    
128
    @Autowired
129
    private ITermService termService;
130

    
131
    @Autowired
132
    private ITaxonService taxonService;
133

    
134
    @Autowired
135
    private IClassificationService classificationService;
136

    
137
    @Autowired
138
    private IClassificationDao classificationDao;
139

    
140
    @Autowired
141
    private INameService mameService;
142

    
143
    @Autowired
144
    private HibernateTransactionManager transactionManager;
145

    
146
    private List<PresenceAbsenceTerm> byAreaIgnoreStatusList = null;
147

    
148
    private List<PresenceAbsenceTerm> byRankIgnoreStatusList = null;
149

    
150
    private final Map<NamedArea, Set<NamedArea>> subAreaMap = new HashMap<NamedArea, Set<NamedArea>>();
151

    
152

    
153
    /**
154
     * byAreaIgnoreStatusList contains by default:
155
     *  <ul>
156
     *    <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
157
     *    <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
158
     *    <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
159
     *    <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
160
     *    <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
161
     *  </ul>
162
     *
163
     * @return the byAreaIgnoreStatusList
164
     */
165
    public List<PresenceAbsenceTerm> getByAreaIgnoreStatusList() {
166
        if(byAreaIgnoreStatusList == null ){
167
            byAreaIgnoreStatusList = Arrays.asList(
168
                    new PresenceAbsenceTerm[] {
169
                    		PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(),
170
                    		PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(),
171
                    		PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(),
172
                    		PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(),
173
                    		PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE()
174
                            // TODO what about PresenceAbsenceTerm.ABSENT() also ignore?
175
                    });
176
        }
177
        return byAreaIgnoreStatusList;
178
    }
179

    
180
    /**
181
     * @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
182
     */
183
    public void setByAreaIgnoreStatusList(List<PresenceAbsenceTerm> byAreaIgnoreStatusList) {
184
        this.byAreaIgnoreStatusList = byAreaIgnoreStatusList;
185
    }
186

    
187
    /**
188
     * byRankIgnoreStatusList contains by default
189
     *  <ul>
190
     *    <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
191
     *  </ul>
192
     *
193
     * @return the byRankIgnoreStatusList
194
     */
195
    public List<PresenceAbsenceTerm> getByRankIgnoreStatusList() {
196

    
197
        if (byRankIgnoreStatusList == null) {
198
            byRankIgnoreStatusList = Arrays.asList(
199
                    new PresenceAbsenceTerm[] {
200
                    		PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()
201
                    });
202
        }
203
        return byRankIgnoreStatusList;
204
    }
205

    
206
    /**
207
     * @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
208
     */
209
    public void setByRankIgnoreStatusList(List<PresenceAbsenceTerm> byRankIgnoreStatusList) {
210
        this.byRankIgnoreStatusList = byRankIgnoreStatusList;
211
    }
212

    
213
    /**
214
     *
215
     * @param superAreas
216
     */
217
    public TransmissionEngineDistribution() {
218
    }
219

    
220
    /**
221
     * initializes the map which contains the status terms as key and the priority as value
222
     * The map will contain both, the PresenceTerms and the AbsenceTerms
223
     */
224
    private void initializeStatusPriorityMap() {
225

    
226
        statusPriorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
227
        Integer priority;
228

    
229
        // PresenceTerms
230
        for(PresenceAbsenceTerm term : termService.list(PresenceAbsenceTerm.class, null, null, null, null)){
231
            priority = getPriorityFor(term);
232
            if(priority != null){
233
                statusPriorityMap.put(term, priority);
234
            }
235
        }
236
    }
237

    
238
    /**
239
     * Compares the PresenceAbsenceTermBase terms <code>a</code> and <code>b</code>  and
240
     * returns the PresenceAbsenceTermBase with the higher priority as stored in the statusPriorityMap.
241
     * If either a or b are null b or a is returned.
242
     *
243
     * @see initializeStatusPriorityMap()
244
     *
245
     * @param a
246
     * @param b
247
     * @return
248
     */
249
    private PresenceAbsenceTerm choosePreferred(PresenceAbsenceTerm a, PresenceAbsenceTerm b){
250

    
251
        if (statusPriorityMap == null) {
252
            initializeStatusPriorityMap();
253
        }
254

    
255
        if (b == null) {
256
            return a;
257
        }
258
        if (a == null) {
259
            return b;
260
        }
261

    
262
        if (statusPriorityMap.get(a) == null) {
263
            logger.warn("No priority found in map for " + a.getLabel());
264
            return b;
265
        }
266
        if (statusPriorityMap.get(b) == null) {
267
            logger.warn("No priority found in map for " + b.getLabel());
268
            return a;
269
        }
270
        if(statusPriorityMap.get(a) > statusPriorityMap.get(b)){
271
            return a;
272
        } else {
273
            return b;
274
        }
275
    }
276

    
277
    /**
278
     * reads the priority for the given status term from the extensions.
279
     *
280
     * @param term
281
     * @return the priority value
282
     */
283
    private Integer getPriorityFor(DefinedTermBase<?> term) {
284
        Set<Extension> extensions = term.getExtensions();
285
        for(Extension extension : extensions){
286
            if(!extension.getType().equals(ExtensionType.ORDER())) {
287
                continue;
288
            }
289
            int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
290
            if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
291
                try {
292
                    Integer priority = Integer.valueOf(extension.getValue().substring(EXTENSION_VALUE_PREFIX.length()));
293
                    return priority;
294
                } catch (NumberFormatException e) {
295
                    logger.warn("Invalid number format in Extension:" + extension.getValue());
296
                }
297
            }
298
        }
299
        logger.warn("no priority defined for '" + term.getLabel() + "'");
300
        return null;
301
    }
302

    
303
    /**
304
     * runs both steps
305
     * <ul>
306
     * <li>Step 1: Accumulate occurrence records by area</li>
307
     * <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
308
     * the status of all children are accumulated on each rank starting from
309
     * lower rank to upper rank.</li>
310
     * </ul>
311
     *
312
     * @param superAreas
313
     *            the areas to which the subordinate areas should be projected.
314
     * @param lowerRank
315
     * @param upperRank
316
     * @param classification
317
     * @param classification
318
     *            limit the accumulation process to a specific classification
319
     *            (not yet implemented)
320
     * @param monitor
321
     *            the progress monitor to use for reporting progress to the
322
     *            user. It is the caller's responsibility to call done() on the
323
     *            given monitor. Accepts null, indicating that no progress
324
     *            should be reported and that the operation cannot be cancelled.
325
     */
326
    public void accumulate(AggregationMode mode, List<NamedArea> superAreas, Rank lowerRank, Rank upperRank,
327
            Classification classification, IProgressMonitor monitor) {
328

    
329
        if (monitor == null) {
330
            monitor = new NullProgressMonitor();
331
        }
332

    
333
        logger.setLevel(Level.INFO); // TRACE will slow down a lot since it forces loading all term representations
334

    
335
        logger.info("Hibernate JDBC Batch size: "
336
                + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
337

    
338
        // only for debugging:
339
        logger.setLevel(Level.INFO);
340
        //Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
341

    
342
        Set<Classification> classifications = new HashSet<Classification>();
343
        if(classification == null) {
344
            classifications.addAll(classificationService.listClassifications(null, null, null, null));
345
        } else {
346
            classifications.add(classification);
347
        }
348

    
349
        int aggregationWorkTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
350

    
351
        // take start time for performance testing
352
        // NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
353
        double start = System.currentTimeMillis();
354

    
355
        monitor.beginTask("Accumulating distributions", (classifications.size() * aggregationWorkTicks) + 1 );
356
        updatePriorities();
357
        monitor.worked(1);
358

    
359
        for(Classification _classification : classifications) {
360

    
361
            ClassificationLookupDTO classificationLookupDao = classificationDao.classificationLookup(_classification);
362

    
363
            monitor.subTask("Accumulating distributions to super areas for " + _classification.getTitleCache());
364
            if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
365
                accumulateByArea(superAreas, classificationLookupDao, new SubProgressMonitor(monitor, 200),
366
                        mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks));
367
            }
368
            monitor.subTask("Accumulating distributions to higher ranks for " + _classification.getTitleCache());
369

    
370
            double end1 = System.currentTimeMillis();
371

    
372
            logger.info("Time elapsed for accumulateByArea() : " + (end1 - start) / (1000) + "s");
373

    
374
            double start2 = System.currentTimeMillis();
375
            if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
376
                accumulateByRank(lowerRank, upperRank, classification, new SubProgressMonitor(monitor, 200),
377
                        mode.equals(AggregationMode.byRanks));
378
            }
379

    
380
            double end2 = System.currentTimeMillis();
381
            logger.info("Time elapsed for accumulateByRank() : " + (end2 - start2) / (1000) + "s");
382
            logger.info("Time elapsed for accumulate(): " + (end2 - start) / (1000) + "s");
383

    
384
            if(ONLY_FISRT_BATCH) {
385
                break;
386
            }
387
        }
388
    }
389

    
390
    /**
391
     * @return
392
     */
393
    private Session getSession() {
394
        return descriptionService.getSession();
395
    }
396

    
397
    /**
398
     * Step 1: Accumulate occurrence records by area
399
     * <ul>
400
     * <li>areas are projected to super areas e.g.:  HS <-- HS(A), HS(G), HS(S)</li>
401
     * <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
402
     * <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
403
     * <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
404
     * <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
405
     * <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
406
     * </ul>
407
     *
408
     * @param superAreas
409
     *      the areas to which the subordinate areas should be projected
410
     * @param classificationLookupDao
411
     *
412
     */
413
    protected void accumulateByArea(List<NamedArea> superAreas, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
414

    
415
        int batchSize = 1000;
416

    
417
        TransactionStatus txStatus = startTransaction(false);
418

    
419
        // reload superAreas TODO is it faster to getSession().merge(object) ??
420
        Set<UUID> superAreaUuids = new HashSet<UUID>(superAreas.size());
421
        for (NamedArea superArea : superAreas){
422
            superAreaUuids.add(superArea.getUuid());
423
        }
424
        List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
425

    
426
        // visit all accepted taxa
427
        subMonitor.beginTask("Accumulating by area ",  classificationLookupDao.getTaxonIds().size());
428
        Iterator<Integer> taxonIdIterator = classificationLookupDao.getTaxonIds().iterator();
429

    
430
        int pageIndex = 0;
431
        while (taxonIdIterator.hasNext()) {
432
        while (!isLastPage) {
433

    
434
            if(txStatus == null) {
435
                // transaction has been comitted at the end of this batch, start a new one
436
                txStatus = startTransaction(false);
437
            }
438

    
439
            // load taxa for this batch
440
            List<TaxonBase> taxa = new ArrayList<TaxonBase>(batchSize);
441
            Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
442
            while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
443
                taxonIds.add(taxonIdIterator.next());
444
            }
445

    
446
//            logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
447

    
448
            taxa = taxonService.listByIds(taxonIds, null, null, null, TAXONDESCRIPTION_INIT_STRATEGY);
449

    
450
            // iterate over the taxa and accumulate areas
451
            for(TaxonBase taxon : taxa) {
452
                if(logger.isDebugEnabled()){
453
                    logger.debug("accumulateByArea() - taxon :" + taxonToString(taxon));
454
                }
455

    
456
                TaxonDescription description = findComputedDescription((Taxon)taxon, doClearDescriptions);
457
                List<Distribution> distributions = distributionsFor((Taxon)taxon);
458

    
459
                // Step through superAreas for accumulation of subAreas
460
                for (NamedArea superArea : superAreaList){
461

    
462
                    // accumulate all sub area status
463
                    PresenceAbsenceTerm accumulatedStatus = null;
464
                    // TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
465
                    Set<NamedArea> subAreas = getSubAreasFor(superArea);
466
                    for(NamedArea subArea : subAreas){
467
                        if(logger.isTraceEnabled()){
468
                            logger.trace("accumulateByArea() - \t\t" + termToString(subArea));
469
                        }
470
                        // step through all distributions for the given subArea
471
                        for(Distribution distribution : distributions){
472
                            if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
473
                                PresenceAbsenceTerm status = distribution.getStatus();
474
                                if(logger.isTraceEnabled()){
475
                                    logger.trace("accumulateByArea() - \t\t" + termToString(subArea) + ": " + termToString(status));
476
                                }
477
                                // skip all having a status value different of those in byAreaIgnoreStatusList
478
                                if (getByAreaIgnoreStatusList().contains(status)){
479
                                    continue;
480
                                }
481
                                accumulatedStatus = choosePreferred(accumulatedStatus, status);
482
                            }
483
                        }
484
                    } // next sub area
485
                    if (accumulatedStatus != null) {
486
                        if(logger.isDebugEnabled()){
487
                            logger.debug("accumulateByArea() - \t >> " + termToString(superArea) + ": " + termToString(accumulatedStatus));
488
                        }
489
                        // store new distribution element for superArea in taxon description
490
                        Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatus);
491
                        newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
492
                        description.addElement(newDistribitionElement);
493
                    }
494

    
495
                } // next super area ....
496

    
497
                descriptionService.saveOrUpdate(description);
498
                taxonService.saveOrUpdate(taxon);
499
                subMonitor.worked(1);
500

    
501
            } // next taxon
502

    
503
            flushAndClear();
504

    
505
            // commit for every batch, otherwise the persistent context
506
            // may grow too much and eats up all the heap
507
            commitTransaction(txStatus);
508
            txStatus = null;
509

    
510
            if(ONLY_FISRT_BATCH) {
511
                break;
512
            }
513

    
514
        } // next batch of taxa
515

    
516
        subMonitor.done();
517
    }
518

    
519
    /**
520
     * @param taxon
521
     * @param logger2
522
     * @return
523
     */
524
    private String taxonToString(TaxonBase taxon) {
525
        if(logger.isTraceEnabled()) {
526
            return taxon.getTitleCache();
527
        } else {
528
            return taxon.toString();
529
        }
530
    }
531

    
532
    /**
533
     * @param taxon
534
     * @param logger2
535
     * @return
536
     */
537
    private String termToString(OrderedTermBase<?> term) {
538
        if(logger.isTraceEnabled()) {
539
            return term.getLabel() + " [" + term.getIdInVocabulary() + "]";
540
        } else {
541
            return term.getIdInVocabulary();
542
        }
543
    }
544

    
545
   /**
546
    * Step 2: Accumulate by ranks staring from lower rank to upper rank, the status of all children
547
    * are accumulated on each rank starting from lower rank to upper rank.
548
    * <ul>
549
    * <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
550
    *    up to upper rank (e.g. Genus)</li>
551
    *  <li>the accumulation id done for each distribution area found in the included taxa</li>
552
    *  <li>areas of subtaxa with status endemic are ignored</li>
553
    *  <li>the status with the highest priority determines the value for the accumulated distribution</li>
554
    *  <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
555
    *    this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
556
    *</ul>
557
    */
558
    protected void accumulateByRank(Rank lowerRank, Rank upperRank, Classification classification,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
559

    
560
        int batchSize = 500;
561

    
562
        TransactionStatus txStatus = startTransaction(false);
563

    
564
        // the loadRankSpecificRootNodes() method not only finds
565
        // taxa of the specified rank but also taxa of lower ranks
566
        // if no taxon of the specified rank exists, so we need to
567
        // remember which taxa have been processed already
568
        Set<Integer> taxaProcessedIds = new HashSet<Integer>();
569

    
570
        Rank currentRank = lowerRank;
571
        List<Rank> ranks = new ArrayList<Rank>();
572
        ranks.add(currentRank);
573
        while (!currentRank.isHigher(upperRank)) {
574
            currentRank = findNextHigherRank(currentRank);
575
            ranks.add(currentRank);
576
        }
577

    
578
        int ticksPerRank = 100;
579
        subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
580

    
581
        for (Rank rank : ranks) {
582

    
583
            if(logger.isDebugEnabled()){
584
                logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
585
            }
586

    
587
            Pager<TaxonNode> taxonPager = null;
588
            int pageIndex = 0;
589
            boolean isLastPage = false;
590
            SubProgressMonitor taxonSubMonitor = null;
591
            while (!isLastPage) {
592

    
593
                if(txStatus == null) {
594
                    // transaction has been comitted at the end of this batch, start a new one
595
                    txStatus = startTransaction(false);
596
                }
597

    
598
                taxonPager = classificationService
599
                        .pageRankSpecificRootNodes(classification, rank, batchSize, pageIndex++, null);
600

    
601
                if(taxonSubMonitor == null) {
602
                    taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
603
                    taxonSubMonitor.beginTask("Accumulating by rank " + rank.getLabel(), taxonPager.getCount().intValue());
604

    
605
                }
606

    
607
                if(taxonPager != null){
608
                    if(logger.isDebugEnabled()){
609
                               logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
610
                    }
611
                } else {
612
                    logger.error("accumulateByRank() - taxonNode pager was NULL");
613
                }
614

    
615
                if(taxonPager != null){
616
                    isLastPage = taxonPager.getRecords().size() < batchSize;
617
                    if (taxonPager.getRecords().size() == 0){
618
                        break;
619
                    }
620

    
621
                    for(TaxonNode taxonNode : taxonPager.getRecords()) {
622

    
623
                        Taxon taxon = taxonNode.getTaxon();
624
                        if (taxaProcessedIds.contains(taxon.getId())) {
625
                            if(logger.isDebugEnabled()){
626
                                logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
627
                            }
628
                            continue;
629
                        }
630
                        taxaProcessedIds.add(taxon.getId());
631
                        if(logger.isDebugEnabled()){
632
                            logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
633
                        }
634

    
635
                        // Step through direct taxonomic children for accumulation
636
                        Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>();
637

    
638
                        for (TaxonNode subTaxonNode : taxonNode.getChildNodes()){
639

    
640
                            getSession().setReadOnly(taxonNode, true);
641
                            if(logger.isTraceEnabled()){
642
                                logger.trace("                   subtaxon :" + taxonToString(subTaxonNode.getTaxon()));
643
                            }
644

    
645
                            for(Distribution distribution : distributionsFor(subTaxonNode.getTaxon()) ) {
646
                                PresenceAbsenceTerm status = distribution.getStatus();
647
                                NamedArea area = distribution.getArea();
648
                                if (status == null || getByRankIgnoreStatusList().contains(status)){
649
                                  continue;
650
                                }
651
                                accumulatedStatusMap.put(area, choosePreferred(accumulatedStatusMap.get(area), status));
652
                             }
653
                        }
654

    
655
                        if(accumulatedStatusMap.size() > 0) {
656
                            TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
657
                            for (NamedArea area : accumulatedStatusMap.keySet()) {
658
                                // store new distribution element in new Description
659
                                Distribution newDistribitionElement = Distribution.NewInstance(area, accumulatedStatusMap.get(area));
660
                                newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
661
                                description.addElement(newDistribitionElement);
662
                            }
663
                            taxonService.saveOrUpdate(taxon);
664
                            descriptionService.saveOrUpdate(description);
665
                        }
666
                        taxonSubMonitor.worked(1); // one taxon worked
667

    
668
                    } // next taxon node ....
669
                }
670
                taxonPager = null;
671
                flushAndClear();
672

    
673
                // commit for every batch, otherwise the persistent context
674
                // may grow too much and eats up all the heap
675
                commitTransaction(txStatus);
676
                txStatus = null;
677

    
678
                if(ONLY_FISRT_BATCH) {
679
                    break;
680
                }
681
            } // next batch
682

    
683
            taxonSubMonitor.done();
684
            subMonitor.worked(1);
685

    
686
            if(ONLY_FISRT_BATCH) {
687
                break;
688
            }
689
        } // next Rank
690

    
691
        subMonitor.done();
692
    }
693

    
694
    /**
695
     *
696
     */
697
    private void flushAndClear() {
698
        logger.debug("flushing and clearing session ...");
699
        getSession().flush();
700
        try {
701
            Search.getFullTextSession(getSession()).flushToIndexes();
702
        } catch (HibernateException e) {
703
            /* IGNORE - Hibernate Search Event listeners not configured ... */
704
            if(!e.getMessage().startsWith("Hibernate Search Event listeners not configured")){
705
                throw e;
706
            }
707
        }
708
        getSession().clear();
709
    }
710

    
711

    
712
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
713
    public TransactionStatus startTransaction(Boolean readOnly) {
714

    
715
        DefaultTransactionDefinition defaultTxDef = new DefaultTransactionDefinition();
716
        defaultTxDef.setReadOnly(readOnly);
717
        TransactionDefinition txDef = defaultTxDef;
718

    
719
        // Log some transaction-related debug information.
720
        if (logger.isTraceEnabled()) {
721
            logger.trace("Transaction name = " + txDef.getName());
722
            logger.trace("Transaction facets:");
723
            logger.trace("Propagation behavior = " + txDef.getPropagationBehavior());
724
            logger.trace("Isolation level = " + txDef.getIsolationLevel());
725
            logger.trace("Timeout = " + txDef.getTimeout());
726
            logger.trace("Read Only = " + txDef.isReadOnly());
727
            // org.springframework.orm.hibernate5.HibernateTransactionManager
728
            // provides more transaction/session-related debug information.
729
        }
730

    
731
        TransactionStatus txStatus = transactionManager.getTransaction(txDef);
732

    
733
        getSession().setFlushMode(FlushMode.COMMIT);
734

    
735
        return txStatus;
736
    }
737

    
738
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
739
    public void commitTransaction(TransactionStatus txStatus){
740
        logger.debug("commiting transaction ...");
741
        transactionManager.commit(txStatus);
742
        return;
743
    }
744

    
745
    /**
746
     * returns the next higher rank
747
     *
748
     * TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
749
     *
750
     * @param rank
751
     * @return
752
     */
753
    private Rank findNextHigherRank(Rank rank) {
754
        rank = (Rank) termService.load(rank.getUuid());
755
        return rank.getNextHigherTerm();
756
//        OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
757
//        return rankVocabulary.getNextHigherTerm(rank);
758
    }
759

    
760
    /**
761
     * Either finds an existing taxon description of the given taxon or creates a new one.
762
     * If the doClear is set all existing description elements will be cleared.
763
     *
764
     * @param taxon
765
     * @param doClear will remove all existing Distributions if the taxon already
766
     * has a MarkerType.COMPUTED() TaxonDescription
767
     * @return
768
     */
769
    private TaxonDescription findComputedDescription(Taxon taxon, boolean doClear) {
770

    
771
        String descriptionTitle = this.getClass().getSimpleName();
772

    
773
        // find existing one
774
        for (TaxonDescription description : taxon.getDescriptions()) {
775
            if (description.hasMarker(MarkerType.COMPUTED(), true)) {
776
                logger.debug("reusing description for " + taxon.getTitleCache());
777
                if (doClear) {
778
                    int deleteCount = 0;
779
                    Set<DescriptionElementBase> deleteCandidates = new HashSet<DescriptionElementBase>();
780
                    for (DescriptionElementBase descriptionElement : description.getElements()) {
781
                        if(descriptionElement instanceof Distribution) {
782
                            deleteCandidates.add(descriptionElement);
783
                        }
784
                    }
785
                    if(deleteCandidates.size() > 0){
786
                        for(DescriptionElementBase descriptionElement : deleteCandidates) {
787
                            description.removeElement(descriptionElement);
788
                            descriptionService.deleteDescriptionElement(descriptionElement);
789
                            descriptionElement = null;
790
                            deleteCount++;
791
                        }
792
                        descriptionService.saveOrUpdate(description);
793
                        logger.debug("\t" + deleteCount +" distributions cleared");
794
                    }
795

    
796
                }
797
                return description;
798
            }
799
        }
800

    
801
        // create a new one
802
        logger.debug("creating new description for " + taxon.getTitleCache());
803
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
804
        description.setTitleCache(descriptionTitle, true);
805
        description.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
806
        return description;
807
    }
808

    
809
    /**
810
     * @param superArea
811
     * @return
812
     */
813
    private Set<NamedArea> getSubAreasFor(NamedArea superArea) {
814

    
815
        if(!subAreaMap.containsKey(superArea)) {
816
            if(logger.isDebugEnabled()){
817
                logger.debug("loading included areas for " + superArea.getLabel());
818
            }
819
            subAreaMap.put(superArea, superArea.getIncludes());
820
        }
821
        return subAreaMap.get(superArea);
822
    }
823

    
824
    /**
825
     * @param taxon
826
     * @return
827
     */
828
    private List<Distribution> distributionsFor(Taxon taxon) {
829
        List<Distribution> distributions = new ArrayList<Distribution>();
830
        for(TaxonDescription description: taxon.getDescriptions()) {
831
            for(DescriptionElementBase deb : description.getElements()) {
832
                if(deb instanceof Distribution) {
833
                    distributions.add((Distribution)deb);
834
                }
835
            }
836
        }
837
        return distributions;
838
    }
839

    
840
    /**
841
     * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
842
     * This method will start a new transaction and commits it after the work is done.
843
     */
844
    public void updatePriorities() {
845

    
846
        TransactionStatus txStatus = startTransaction(false);
847

    
848
        Map<PresenceAbsenceTerm, Integer> priorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
849

    
850
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(), 1);
851
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
852
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(), 3);
853
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(), 20);
854
        priorityMap.put(PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(), 30);
855
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED(), 45);
856
        priorityMap.put(PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE(), 40);
857
        priorityMap.put(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE(), 60);
858
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
859
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
860
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED(), 90);
861
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_ADVENTITIOUS(), 100);
862
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_NATURALIZED(), 110);
863
        priorityMap.put(PresenceAbsenceTerm.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
864
        priorityMap.put(PresenceAbsenceTerm.NATIVE(), 130); // null
865
        priorityMap.put(PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
866

    
867
        for(PresenceAbsenceTerm term : priorityMap.keySet()) {
868
            // load the term
869
            term = (PresenceAbsenceTerm) termService.load(term.getUuid());
870
            // find the extension
871
            Extension priorityExtension = null;
872
            Set<Extension> extensions = term.getExtensions();
873
            for(Extension extension : extensions){
874
                if (!extension.getType().equals(ExtensionType.ORDER())) {
875
                    continue;
876
                }
877
                int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
878
                if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
879
                    priorityExtension = extension;
880
                    break;
881
                }
882
            }
883
            if(priorityExtension == null) {
884
                priorityExtension = Extension.NewInstance(term, null, ExtensionType.ORDER());
885
            }
886
            priorityExtension.setValue(EXTENSION_VALUE_PREFIX + priorityMap.get(term));
887

    
888
            // save the term
889
            termService.saveOrUpdate(term);
890
            if (logger.isDebugEnabled()) {
891
                logger.debug("Priority updated for " + term.getLabel());
892
            }
893
        }
894

    
895
        commitTransaction(txStatus);
896
    }
897

    
898
    public enum AggregationMode {
899
        byAreas,
900
        byRanks,
901
        byAreasAndRanks
902

    
903
    }
904
}
    (1-1/1)