Project

General

Profile

Download (37 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2013 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.api.service.description;
11

    
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Iterator;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20
import java.util.UUID;
21

    
22
import org.apache.log4j.Level;
23
import org.apache.log4j.Logger;
24
import org.hibernate.FlushMode;
25
import org.hibernate.HibernateException;
26
import org.hibernate.Session;
27
import org.hibernate.engine.spi.SessionFactoryImplementor;
28
import org.hibernate.search.Search;
29
import org.springframework.beans.factory.annotation.Autowired;
30
import org.springframework.orm.hibernate5.HibernateTransactionManager;
31
import org.springframework.stereotype.Service;
32
import org.springframework.transaction.TransactionDefinition;
33
import org.springframework.transaction.TransactionStatus;
34
import org.springframework.transaction.support.DefaultTransactionDefinition;
35

    
36
import eu.etaxonomy.cdm.api.service.IClassificationService;
37
import eu.etaxonomy.cdm.api.service.IDescriptionService;
38
import eu.etaxonomy.cdm.api.service.INameService;
39
import eu.etaxonomy.cdm.api.service.ITaxonService;
40
import eu.etaxonomy.cdm.api.service.ITermService;
41
import eu.etaxonomy.cdm.api.service.pager.Pager;
42
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
43
import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
44
import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
45
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
46
import eu.etaxonomy.cdm.model.common.Extension;
47
import eu.etaxonomy.cdm.model.common.ExtensionType;
48
import eu.etaxonomy.cdm.model.common.Marker;
49
import eu.etaxonomy.cdm.model.common.MarkerType;
50
import eu.etaxonomy.cdm.model.common.OrderedTermBase;
51
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
52
import eu.etaxonomy.cdm.model.description.Distribution;
53
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
54
import eu.etaxonomy.cdm.model.description.TaxonDescription;
55
import eu.etaxonomy.cdm.model.location.NamedArea;
56
import eu.etaxonomy.cdm.model.name.Rank;
57
import eu.etaxonomy.cdm.model.taxon.Classification;
58
import eu.etaxonomy.cdm.model.taxon.Taxon;
59
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
60
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
61
import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
62
import eu.etaxonomy.cdm.persistence.query.OrderHint;
63

    
64
/**
65
 *
66
 * <h2>GENERAL NOTES </h2>
67
 * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
68
 * version 14 written in Visual Basic and still need to be
69
 * adapted to the java version of the transmission engine!</em>
70
 *
71
 * <h3>summaryStatus</h3>
72
 *
73
 *   Each distribution information has a summaryStatus, this is an summary of the status codes
74
 *   as stored in the fields of emOccurrence native, introduced, cultivated, ...
75
 *   The summaryStatus seems to be equivalent to  the CDM DistributionStatus
76
 *
77
 * <h3>map generation</h3>
78
 *
79
 *   When generating maps from the accumulated distribution information some special cases have to be handled:
80
 * <ol>
81
 *   <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
82
 *       data is available, the calculated data has to be given preference over other data.
83
 *   </li>
84
 *   <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
85
 *       status should be shown in the map, whereas the super area should be ignored.
86
 *   </li>
87
 * </ol>
88
 *
89
 * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
90
 * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
91
 * @date Feb 22, 2013
92
 */
93
@Service
94
public class TransmissionEngineDistribution { //TODO extends IoBase?
95

    
96
    public static final String EXTENSION_VALUE_PREFIX = "transmissionEngineDistribution.priority:";
97

    
98
    public static final Logger logger = Logger.getLogger(TransmissionEngineDistribution.class);
99

    
100
    /**
101
     * only used for performance testing
102
     */
103
    final boolean ONLY_FISRT_BATCH = true;
104

    
105

    
106
    protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
107
            "description.markers.markerType",
108
            "description.elements.markers.markerType",
109
            "description.elements.area",
110
            "description.elements.status",
111
            "description.elements.sources.citation.authorship",
112
//            "description.elements.sources.nameUsedInSource",
113
//            "description.elements.multilanguageText",
114
//            "name.status.type",
115
    });
116

    
117

    
118
    /**
119
     * A map which contains the status terms as key and the priority as value
120
     * The map will contain both, the PresenceTerms and the AbsenceTerms
121
     */
122
    private Map<PresenceAbsenceTerm, Integer> statusPriorityMap = null;
123

    
124
    @Autowired
125
    private IDescriptionService descriptionService;
126

    
127
    @Autowired
128
    private ITermService termService;
129

    
130
    @Autowired
131
    private ITaxonService taxonService;
132

    
133
    @Autowired
134
    private IClassificationService classificationService;
135

    
136
    @Autowired
137
    private INameService mameService;
138

    
139
    @Autowired
140
    private HibernateTransactionManager transactionManager;
141

    
142
    private List<PresenceAbsenceTerm> byAreaIgnoreStatusList = null;
143

    
144
    private List<PresenceAbsenceTerm> byRankIgnoreStatusList = null;
145

    
146
    private final Map<NamedArea, Set<NamedArea>> subAreaMap = new HashMap<NamedArea, Set<NamedArea>>();
147

    
148
    private final List<OrderHint> emptyOrderHints = new ArrayList<OrderHint>(0);
149

    
150

    
151
    /**
152
     * byAreaIgnoreStatusList contains by default:
153
     *  <ul>
154
     *    <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
155
     *    <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
156
     *    <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
157
     *    <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
158
     *    <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
159
     *  </ul>
160
     *
161
     * @return the byAreaIgnoreStatusList
162
     */
163
    public List<PresenceAbsenceTerm> getByAreaIgnoreStatusList() {
164
        if(byAreaIgnoreStatusList == null ){
165
            byAreaIgnoreStatusList = Arrays.asList(
166
                    new PresenceAbsenceTerm[] {
167
                    		PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(),
168
                    		PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(),
169
                    		PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(),
170
                    		PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(),
171
                    		PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE()
172
                            // TODO what about PresenceAbsenceTerm.ABSENT() also ignore?
173
                    });
174
        }
175
        return byAreaIgnoreStatusList;
176
    }
177

    
178
    /**
179
     * @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
180
     */
181
    public void setByAreaIgnoreStatusList(List<PresenceAbsenceTerm> byAreaIgnoreStatusList) {
182
        this.byAreaIgnoreStatusList = byAreaIgnoreStatusList;
183
    }
184

    
185
    /**
186
     * byRankIgnoreStatusList contains by default
187
     *  <ul>
188
     *    <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
189
     *  </ul>
190
     *
191
     * @return the byRankIgnoreStatusList
192
     */
193
    public List<PresenceAbsenceTerm> getByRankIgnoreStatusList() {
194

    
195
        if (byRankIgnoreStatusList == null) {
196
            byRankIgnoreStatusList = Arrays.asList(
197
                    new PresenceAbsenceTerm[] {
198
                    		PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()
199
                    });
200
        }
201
        return byRankIgnoreStatusList;
202
    }
203

    
204
    /**
205
     * @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
206
     */
207
    public void setByRankIgnoreStatusList(List<PresenceAbsenceTerm> byRankIgnoreStatusList) {
208
        this.byRankIgnoreStatusList = byRankIgnoreStatusList;
209
    }
210

    
211
    /**
212
     *
213
     * @param superAreas
214
     */
215
    public TransmissionEngineDistribution() {
216
    }
217

    
218
    /**
219
     * initializes the map which contains the status terms as key and the priority as value
220
     * The map will contain both, the PresenceTerms and the AbsenceTerms
221
     */
222
    private void initializeStatusPriorityMap() {
223

    
224
        statusPriorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
225
        Integer priority;
226

    
227
        // PresenceTerms
228
        for(PresenceAbsenceTerm term : termService.list(PresenceAbsenceTerm.class, null, null, null, null)){
229
            priority = getPriorityFor(term);
230
            if(priority != null){
231
                statusPriorityMap.put(term, priority);
232
            }
233
        }
234
    }
235

    
236
    /**
237
     * Compares the PresenceAbsenceTermBase terms <code>a</code> and <code>b</code>  and
238
     * returns the PresenceAbsenceTermBase with the higher priority as stored in the statusPriorityMap.
239
     * If either a or b are null b or a is returned.
240
     *
241
     * @see initializeStatusPriorityMap()
242
     *
243
     * @param a
244
     * @param b
245
     * @return
246
     */
247
    private PresenceAbsenceTerm choosePreferred(PresenceAbsenceTerm a, PresenceAbsenceTerm b){
248

    
249
        if (statusPriorityMap == null) {
250
            initializeStatusPriorityMap();
251
        }
252

    
253
        if (b == null) {
254
            return a;
255
        }
256
        if (a == null) {
257
            return b;
258
        }
259

    
260
        if (statusPriorityMap.get(a) == null) {
261
            logger.warn("No priority found in map for " + a.getLabel());
262
            return b;
263
        }
264
        if (statusPriorityMap.get(b) == null) {
265
            logger.warn("No priority found in map for " + b.getLabel());
266
            return a;
267
        }
268
        if(statusPriorityMap.get(a) > statusPriorityMap.get(b)){
269
            return a;
270
        } else {
271
            return b;
272
        }
273
    }
274

    
275
    /**
276
     * reads the priority for the given status term from the extensions.
277
     *
278
     * @param term
279
     * @return the priority value
280
     */
281
    private Integer getPriorityFor(DefinedTermBase<?> term) {
282
        Set<Extension> extensions = term.getExtensions();
283
        for(Extension extension : extensions){
284
            if(!extension.getType().equals(ExtensionType.ORDER())) {
285
                continue;
286
            }
287
            int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
288
            if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
289
                try {
290
                    Integer priority = Integer.valueOf(extension.getValue().substring(EXTENSION_VALUE_PREFIX.length()));
291
                    return priority;
292
                } catch (NumberFormatException e) {
293
                    logger.warn("Invalid number format in Extension:" + extension.getValue());
294
                }
295
            }
296
        }
297
        logger.warn("no priority defined for '" + term.getLabel() + "'");
298
        return null;
299
    }
300

    
301
    /**
302
     * runs both steps
303
     * <ul>
304
     * <li>Step 1: Accumulate occurrence records by area</li>
305
     * <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
306
     * the status of all children are accumulated on each rank starting from
307
     * lower rank to upper rank.</li>
308
     * </ul>
309
     *
310
     * @param superAreas
311
     *            the areas to which the subordinate areas should be projected.
312
     * @param lowerRank
313
     * @param upperRank
314
     * @param classification
315
     * @param classification
316
     *            limit the accumulation process to a specific classification
317
     *            (not yet implemented)
318
     * @param monitor
319
     *            the progress monitor to use for reporting progress to the
320
     *            user. It is the caller's responsibility to call done() on the
321
     *            given monitor. Accepts null, indicating that no progress
322
     *            should be reported and that the operation cannot be cancelled.
323
     */
324
    public void accumulate(AggregationMode mode, List<NamedArea> superAreas, Rank lowerRank, Rank upperRank,
325
            Classification classification, IProgressMonitor monitor) {
326

    
327
        if (monitor == null) {
328
            monitor = new NullProgressMonitor();
329
        }
330

    
331
        logger.setLevel(Level.INFO); // TRACE will slow down a lot since it forces loading all term representations
332

    
333
        logger.info("Hibernate JDBC Batch size: "
334
                + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
335

    
336
        // only for debugging:
337
        logger.setLevel(Level.INFO);
338
        //Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
339

    
340
        Set<Classification> classifications = new HashSet<Classification>();
341
        if(classification == null) {
342
            classifications.addAll(classificationService.listClassifications(null, null, null, null));
343
        } else {
344
            classifications.add(classification);
345
        }
346

    
347
        int aggregationWorkTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
348

    
349
        // take start time for performance testing
350
        // NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
351
        double start = System.currentTimeMillis();
352

    
353
        monitor.beginTask("Accumulating distributions", (classifications.size() * aggregationWorkTicks) + 1 );
354
        updatePriorities();
355
        monitor.worked(1);
356

    
357
        for(Classification _classification : classifications) {
358

    
359
            ClassificationLookupDTO classificationLookupDao = classificationService.classificationLookup(_classification);
360

    
361
            double end1 = System.currentTimeMillis();
362
            logger.info("Time elapsed for classificationLookup() : " + (end1 - start) / (1000) + "s");
363
            double start2 = System.currentTimeMillis();
364

    
365
            monitor.subTask("Accumulating distributions to super areas for " + _classification.getTitleCache());
366
            if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
367
                accumulateByArea(superAreas, classificationLookupDao, new SubProgressMonitor(monitor, 200), true);
368
            }
369
            monitor.subTask("Accumulating distributions to higher ranks for " + _classification.getTitleCache());
370

    
371
            double end2 = System.currentTimeMillis();
372
            logger.info("Time elapsed for accumulateByArea() : " + (end2 - start2) / (1000) + "s");
373

    
374
            double start3 = System.currentTimeMillis();
375
            if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
376
                accumulateByRank(lowerRank, upperRank, classification, new SubProgressMonitor(monitor, 200), mode.equals(AggregationMode.byRanks));
377
            }
378

    
379
            double end3 = System.currentTimeMillis();
380
            logger.info("Time elapsed for accumulateByRank() : " + (end3 - start3) / (1000) + "s");
381
            logger.info("Time elapsed for accumulate(): " + (end3 - start) / (1000) + "s");
382

    
383
            if(ONLY_FISRT_BATCH) {
384
                monitor.done();
385
                break;
386
            }
387
        }
388
    }
389

    
390
    /**
391
     * @return
392
     */
393
    private Session getSession() {
394
        return descriptionService.getSession();
395
    }
396

    
397
    /**
398
     * Step 1: Accumulate occurrence records by area
399
     * <ul>
400
     * <li>areas are projected to super areas e.g.:  HS <-- HS(A), HS(G), HS(S)</li>
401
     * <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
402
     * <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
403
     * <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
404
     * <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
405
     * <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
406
     * </ul>
407
     *
408
     * @param superAreas
409
     *      the areas to which the subordinate areas should be projected
410
     * @param classificationLookupDao
411
     *
412
     */
413
    protected void accumulateByArea(List<NamedArea> superAreas, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
414

    
415
        int batchSize = 1000;
416

    
417
        TransactionStatus txStatus = startTransaction(false);
418

    
419
        // reload superAreas TODO is it faster to getSession().merge(object) ??
420
        Set<UUID> superAreaUuids = new HashSet<UUID>(superAreas.size());
421
        for (NamedArea superArea : superAreas){
422
            superAreaUuids.add(superArea.getUuid());
423
        }
424
        List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
425

    
426
        // visit all accepted taxa
427
        subMonitor.beginTask("Accumulating by area ",  classificationLookupDao.getTaxonIds().size());
428
        Iterator<Integer> taxonIdIterator = classificationLookupDao.getTaxonIds().iterator();
429

    
430
        while (taxonIdIterator.hasNext()) {
431

    
432
            if(txStatus == null) {
433
                // transaction has been comitted at the end of this batch, start a new one
434
                txStatus = startTransaction(false);
435
            }
436

    
437
            // load taxa for this batch
438
            List<TaxonBase> taxa = new ArrayList<TaxonBase>(batchSize);
439
            Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
440
            while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
441
                taxonIds.add(taxonIdIterator.next());
442
            }
443

    
444
//            logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
445

    
446
            taxa = taxonService.listByIds(taxonIds, null, null, emptyOrderHints, TAXONDESCRIPTION_INIT_STRATEGY);
447

    
448
            // iterate over the taxa and accumulate areas
449
            for(TaxonBase taxonBase : taxa) {
450
                if(logger.isDebugEnabled()){
451
                    logger.debug("accumulateByArea() - taxon :" + taxonToString(taxonBase));
452
                }
453

    
454
                Taxon taxon = (Taxon)taxonBase;
455
                TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
456
                List<Distribution> distributions = distributionsFor(taxon);
457

    
458

    
459
                // Step through superAreas for accumulation of subAreas
460
                for (NamedArea superArea : superAreaList){
461

    
462
                    // accumulate all sub area status
463
                    PresenceAbsenceTerm accumulatedStatus = null;
464
                    // TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
465
                    Set<NamedArea> subAreas = getSubAreasFor(superArea);
466
                    for(NamedArea subArea : subAreas){
467
                        if(logger.isTraceEnabled()){
468
                            logger.trace("accumulateByArea() - \t\t" + termToString(subArea));
469
                        }
470
                        // step through all distributions for the given subArea
471
                        for(Distribution distribution : distributions){
472
                            if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
473
                                PresenceAbsenceTerm status = distribution.getStatus();
474
                                if(logger.isTraceEnabled()){
475
                                    logger.trace("accumulateByArea() - \t\t" + termToString(subArea) + ": " + termToString(status));
476
                                }
477
                                // skip all having a status value different of those in byAreaIgnoreStatusList
478
                                if (getByAreaIgnoreStatusList().contains(status)){
479
                                    continue;
480
                                }
481
                                accumulatedStatus = choosePreferred(accumulatedStatus, status);
482
                            }
483
                        }
484
                    } // next sub area
485
                    if (accumulatedStatus != null) {
486
                        if(logger.isDebugEnabled()){
487
                            logger.debug("accumulateByArea() - \t >> " + termToString(superArea) + ": " + termToString(accumulatedStatus));
488
                        }
489
                        // store new distribution element for superArea in taxon description
490
                        Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatus);
491
                        newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
492
                        description.addElement(newDistribitionElement);
493
                    }
494

    
495
                } // next super area ....
496

    
497
                descriptionService.saveOrUpdate(description);
498
                taxonService.saveOrUpdate(taxon);
499
                subMonitor.worked(1);
500

    
501
            } // next taxon
502

    
503
            flushAndClear();
504

    
505
            // commit for every batch, otherwise the persistent context
506
            // may grow too much and eats up all the heap
507
            commitTransaction(txStatus);
508
            txStatus = null;
509

    
510
            if(ONLY_FISRT_BATCH) {
511
                break;
512
            }
513

    
514
        } // next batch of taxa
515

    
516
        subMonitor.done();
517
    }
518

    
519
   /**
520
    * Step 2: Accumulate by ranks staring from lower rank to upper rank, the status of all children
521
    * are accumulated on each rank starting from lower rank to upper rank.
522
    * <ul>
523
    * <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
524
    *    up to upper rank (e.g. Genus)</li>
525
    *  <li>the accumulation id done for each distribution area found in the included taxa</li>
526
    *  <li>areas of subtaxa with status endemic are ignored</li>
527
    *  <li>the status with the highest priority determines the value for the accumulated distribution</li>
528
    *  <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
529
    *    this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
530
    *</ul>
531
    */
532
    protected void accumulateByRank(Rank lowerRank, Rank upperRank, Classification classification,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
533

    
534
        int batchSize = 500;
535

    
536
        TransactionStatus txStatus = startTransaction(false);
537

    
538
        // the loadRankSpecificRootNodes() method not only finds
539
        // taxa of the specified rank but also taxa of lower ranks
540
        // if no taxon of the specified rank exists, so we need to
541
        // remember which taxa have been processed already
542
        Set<Integer> taxaProcessedIds = new HashSet<Integer>();
543

    
544
        Rank currentRank = lowerRank;
545
        List<Rank> ranks = new ArrayList<Rank>();
546
        ranks.add(currentRank);
547
        while (!currentRank.isHigher(upperRank)) {
548
            currentRank = findNextHigherRank(currentRank);
549
            ranks.add(currentRank);
550
        }
551

    
552
        int ticksPerRank = 100;
553
        subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
554

    
555
        for (Rank rank : ranks) {
556

    
557
            if(logger.isDebugEnabled()){
558
                logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
559
            }
560

    
561
            Pager<TaxonNode> taxonPager = null;
562
            int pageIndex = 0;
563
            boolean isLastPage = false;
564
            SubProgressMonitor taxonSubMonitor = null;
565
            while (!isLastPage) {
566

    
567
                if(txStatus == null) {
568
                    // transaction has been comitted at the end of this batch, start a new one
569
                    txStatus = startTransaction(false);
570
                }
571

    
572
                taxonPager = classificationService
573
                        .pageRankSpecificRootNodes(classification, rank, batchSize, pageIndex++, null);
574

    
575
                if(taxonSubMonitor == null) {
576
                    taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
577
                    taxonSubMonitor.beginTask("Accumulating by rank " + rank.getLabel(), taxonPager.getCount().intValue());
578
                }
579

    
580
                if(taxonPager != null){
581
                    if(logger.isDebugEnabled()){
582
                               logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
583
                    }
584
                } else {
585
                    logger.error("accumulateByRank() - taxonNode pager was NULL");
586
                }
587

    
588
                if(taxonPager != null){
589
                    isLastPage = taxonPager.getRecords().size() < batchSize;
590
                    if (taxonPager.getRecords().size() == 0){
591
                        break;
592
                    }
593

    
594
                    for(TaxonNode taxonNode : taxonPager.getRecords()) {
595

    
596
                        Taxon taxon = taxonNode.getTaxon();
597
                        if (taxaProcessedIds.contains(taxon.getId())) {
598
                            if(logger.isDebugEnabled()){
599
                                logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
600
                            }
601
                            continue;
602
                        }
603
                        taxaProcessedIds.add(taxon.getId());
604
                        if(logger.isDebugEnabled()){
605
                            logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
606
                        }
607

    
608
                        // Step through direct taxonomic children for accumulation
609
                        Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>();
610

    
611
                        for (TaxonNode subTaxonNode : taxonNode.getChildNodes()){
612

    
613
                            getSession().setReadOnly(taxonNode, true);
614
                            if(logger.isTraceEnabled()){
615
                                logger.trace("                   subtaxon :" + taxonToString(subTaxonNode.getTaxon()));
616
                            }
617

    
618
                            for(Distribution distribution : distributionsFor(subTaxonNode.getTaxon()) ) {
619
                                PresenceAbsenceTerm status = distribution.getStatus();
620
                                NamedArea area = distribution.getArea();
621
                                if (status == null || getByRankIgnoreStatusList().contains(status)){
622
                                  continue;
623
                                }
624
                                accumulatedStatusMap.put(area, choosePreferred(accumulatedStatusMap.get(area), status));
625
                             }
626
                        }
627

    
628
                        if(accumulatedStatusMap.size() > 0) {
629
                            TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
630
                            for (NamedArea area : accumulatedStatusMap.keySet()) {
631
                                // store new distribution element in new Description
632
                                Distribution newDistribitionElement = Distribution.NewInstance(area, accumulatedStatusMap.get(area));
633
                                newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
634
                                description.addElement(newDistribitionElement);
635
                            }
636
                            taxonService.saveOrUpdate(taxon);
637
                            descriptionService.saveOrUpdate(description);
638
                        }
639
                        taxonSubMonitor.worked(1); // one taxon worked
640

    
641
                    } // next taxon node ....
642
                }
643
                taxonPager = null;
644
                flushAndClear();
645

    
646
                // commit for every batch, otherwise the persistent context
647
                // may grow too much and eats up all the heap
648
                commitTransaction(txStatus);
649
                txStatus = null;
650

    
651
                if(ONLY_FISRT_BATCH) {
652
                    break;
653
                }
654
            } // next batch
655

    
656
            if(taxonSubMonitor != null) { // TODO taxonSubpager, this check should not be needed
657
                taxonSubMonitor.done();
658
            }
659
            subMonitor.worked(1);
660

    
661
            if(ONLY_FISRT_BATCH) {
662
                break;
663
            }
664
        } // next Rank
665

    
666
        subMonitor.done();
667
    }
668

    
669
    /**
670
     *
671
     */
672
    private void flushAndClear() {
673
        logger.debug("flushing and clearing session ...");
674
        getSession().flush();
675
        try {
676
            Search.getFullTextSession(getSession()).flushToIndexes();
677
        } catch (HibernateException e) {
678
            /* IGNORE - Hibernate Search Event listeners not configured ... */
679
            if(!e.getMessage().startsWith("Hibernate Search Event listeners not configured")){
680
                throw e;
681
            }
682
        }
683
        getSession().clear();
684
    }
685

    
686

    
687
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
688
    public TransactionStatus startTransaction(Boolean readOnly) {
689

    
690
        DefaultTransactionDefinition defaultTxDef = new DefaultTransactionDefinition();
691
        defaultTxDef.setReadOnly(readOnly);
692
        TransactionDefinition txDef = defaultTxDef;
693

    
694
        // Log some transaction-related debug information.
695
        if (logger.isTraceEnabled()) {
696
            logger.trace("Transaction name = " + txDef.getName());
697
            logger.trace("Transaction facets:");
698
            logger.trace("Propagation behavior = " + txDef.getPropagationBehavior());
699
            logger.trace("Isolation level = " + txDef.getIsolationLevel());
700
            logger.trace("Timeout = " + txDef.getTimeout());
701
            logger.trace("Read Only = " + txDef.isReadOnly());
702
            // org.springframework.orm.hibernate5.HibernateTransactionManager
703
            // provides more transaction/session-related debug information.
704
        }
705

    
706
        TransactionStatus txStatus = transactionManager.getTransaction(txDef);
707

    
708
        getSession().setFlushMode(FlushMode.COMMIT);
709

    
710
        return txStatus;
711
    }
712

    
713
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
714
    public void commitTransaction(TransactionStatus txStatus){
715
        logger.debug("commiting transaction ...");
716
        transactionManager.commit(txStatus);
717
        return;
718
    }
719

    
720
    /**
721
     * returns the next higher rank
722
     *
723
     * TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
724
     *
725
     * @param rank
726
     * @return
727
     */
728
    private Rank findNextHigherRank(Rank rank) {
729
        rank = (Rank) termService.load(rank.getUuid());
730
        return rank.getNextHigherTerm();
731
//        OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
732
//        return rankVocabulary.getNextHigherTerm(rank);
733
    }
734

    
735
    /**
736
     * Either finds an existing taxon description of the given taxon or creates a new one.
737
     * If the doClear is set all existing description elements will be cleared.
738
     *
739
     * @param taxon
740
     * @param doClear will remove all existing Distributions if the taxon already
741
     * has a MarkerType.COMPUTED() TaxonDescription
742
     * @return
743
     */
744
    private TaxonDescription findComputedDescription(Taxon taxon, boolean doClear) {
745

    
746
        String descriptionTitle = this.getClass().getSimpleName();
747

    
748
        // find existing one
749
        for (TaxonDescription description : taxon.getDescriptions()) {
750
            if (description.hasMarker(MarkerType.COMPUTED(), true)) {
751
                logger.debug("reusing description for " + taxon.getTitleCache());
752
                if (doClear) {
753
                    int deleteCount = 0;
754
                    Set<DescriptionElementBase> deleteCandidates = new HashSet<DescriptionElementBase>();
755
                    for (DescriptionElementBase descriptionElement : description.getElements()) {
756
                        if(descriptionElement instanceof Distribution) {
757
                            deleteCandidates.add(descriptionElement);
758
                        }
759
                    }
760
                    if(deleteCandidates.size() > 0){
761
                        for(DescriptionElementBase descriptionElement : deleteCandidates) {
762
                            description.removeElement(descriptionElement);
763
                            descriptionService.deleteDescriptionElement(descriptionElement);
764
                            descriptionElement = null;
765
                            deleteCount++;
766
                        }
767
                        descriptionService.saveOrUpdate(description);
768
                        logger.debug("\t" + deleteCount +" distributions cleared");
769
                    }
770

    
771
                }
772
                return description;
773
            }
774
        }
775

    
776
        // create a new one
777
        logger.debug("creating new description for " + taxon.getTitleCache());
778
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
779
        description.setTitleCache(descriptionTitle, true);
780
        description.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
781
        return description;
782
    }
783

    
784
    /**
785
     * @param superArea
786
     * @return
787
     */
788
    private Set<NamedArea> getSubAreasFor(NamedArea superArea) {
789

    
790
        if(!subAreaMap.containsKey(superArea)) {
791
            if(logger.isDebugEnabled()){
792
                logger.debug("loading included areas for " + superArea.getLabel());
793
            }
794
            subAreaMap.put(superArea, superArea.getIncludes());
795
        }
796
        return subAreaMap.get(superArea);
797
    }
798

    
799
    /**
800
     * @param taxon
801
     * @return
802
     */
803
    private List<Distribution> distributionsFor(Taxon taxon) {
804
        List<Distribution> distributions = new ArrayList<Distribution>();
805
        for(TaxonDescription description: taxon.getDescriptions()) {
806
            for(DescriptionElementBase deb : description.getElements()) {
807
                if(deb instanceof Distribution) {
808
                    distributions.add((Distribution)deb);
809
                }
810
            }
811
        }
812
        return distributions;
813
    }
814

    
815
    /**
816
     * @param taxon
817
     * @param logger2
818
     * @return
819
     */
820
    private String taxonToString(TaxonBase taxon) {
821
        if(logger.isTraceEnabled()) {
822
            return taxon.getTitleCache();
823
        } else {
824
            return taxon.toString();
825
        }
826
    }
827

    
828
    /**
829
     * @param taxon
830
     * @param logger2
831
     * @return
832
     */
833
    private String termToString(OrderedTermBase<?> term) {
834
        if(logger.isTraceEnabled()) {
835
            return term.getLabel() + " [" + term.getIdInVocabulary() + "]";
836
        } else {
837
            return term.getIdInVocabulary();
838
        }
839
    }
840

    
841
    /**
842
     * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
843
     * This method will start a new transaction and commits it after the work is done.
844
     */
845
    public void updatePriorities() {
846

    
847
        TransactionStatus txStatus = startTransaction(false);
848

    
849
        Map<PresenceAbsenceTerm, Integer> priorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
850

    
851
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(), 1);
852
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
853
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(), 3);
854
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(), 20);
855
        priorityMap.put(PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(), 30);
856
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED(), 45);
857
        priorityMap.put(PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE(), 40);
858
        priorityMap.put(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE(), 60);
859
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
860
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
861
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED(), 90);
862
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_ADVENTITIOUS(), 100);
863
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_NATURALIZED(), 110);
864
        priorityMap.put(PresenceAbsenceTerm.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
865
        priorityMap.put(PresenceAbsenceTerm.NATIVE(), 130); // null
866
        priorityMap.put(PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
867

    
868
        for(PresenceAbsenceTerm term : priorityMap.keySet()) {
869
            // load the term
870
            term = (PresenceAbsenceTerm) termService.load(term.getUuid());
871
            // find the extension
872
            Extension priorityExtension = null;
873
            Set<Extension> extensions = term.getExtensions();
874
            for(Extension extension : extensions){
875
                if (!extension.getType().equals(ExtensionType.ORDER())) {
876
                    continue;
877
                }
878
                int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
879
                if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
880
                    priorityExtension = extension;
881
                    break;
882
                }
883
            }
884
            if(priorityExtension == null) {
885
                priorityExtension = Extension.NewInstance(term, null, ExtensionType.ORDER());
886
            }
887
            priorityExtension.setValue(EXTENSION_VALUE_PREFIX + priorityMap.get(term));
888

    
889
            // save the term
890
            termService.saveOrUpdate(term);
891
            if (logger.isDebugEnabled()) {
892
                logger.debug("Priority updated for " + term.getLabel());
893
            }
894
        }
895

    
896
        commitTransaction(txStatus);
897
    }
898

    
899
    public enum AggregationMode {
900
        byAreas,
901
        byRanks,
902
        byAreasAndRanks
903

    
904
    }
905
}
    (1-1/1)