Project

General

Profile

Download (41.4 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2013 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.api.service.description;
11

    
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.Iterator;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20
import java.util.UUID;
21

    
22
import org.apache.log4j.Logger;
23
import org.hibernate.FlushMode;
24
import org.hibernate.HibernateException;
25
import org.hibernate.Session;
26
import org.hibernate.engine.spi.SessionFactoryImplementor;
27
import org.hibernate.search.Search;
28
import org.springframework.beans.factory.annotation.Autowired;
29
import org.springframework.orm.hibernate5.HibernateTransactionManager;
30
import org.springframework.stereotype.Service;
31
import org.springframework.transaction.TransactionDefinition;
32
import org.springframework.transaction.TransactionStatus;
33
import org.springframework.transaction.support.DefaultTransactionDefinition;
34

    
35
import eu.etaxonomy.cdm.api.service.IClassificationService;
36
import eu.etaxonomy.cdm.api.service.IDescriptionService;
37
import eu.etaxonomy.cdm.api.service.INameService;
38
import eu.etaxonomy.cdm.api.service.ITaxonService;
39
import eu.etaxonomy.cdm.api.service.ITermService;
40
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
41
import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
42
import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
43
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
44
import eu.etaxonomy.cdm.model.common.Extension;
45
import eu.etaxonomy.cdm.model.common.ExtensionType;
46
import eu.etaxonomy.cdm.model.common.Marker;
47
import eu.etaxonomy.cdm.model.common.MarkerType;
48
import eu.etaxonomy.cdm.model.common.OrderedTermBase;
49
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
50
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
51
import eu.etaxonomy.cdm.model.description.Distribution;
52
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
53
import eu.etaxonomy.cdm.model.description.TaxonDescription;
54
import eu.etaxonomy.cdm.model.location.NamedArea;
55
import eu.etaxonomy.cdm.model.name.Rank;
56
import eu.etaxonomy.cdm.model.taxon.Classification;
57
import eu.etaxonomy.cdm.model.taxon.Taxon;
58
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
59
import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
60
import eu.etaxonomy.cdm.persistence.query.OrderHint;
61

    
62
/**
63
 *
64
 * <h2>GENERAL NOTES </h2>
65
 * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
66
 * version 14 written in Visual Basic and still need to be
67
 * adapted to the java version of the transmission engine!</em>
68
 *
69
 * <h3>summaryStatus</h3>
70
 *
71
 *   Each distribution information has a summaryStatus, this is an summary of the status codes
72
 *   as stored in the fields of emOccurrence native, introduced, cultivated, ...
73
 *   The summaryStatus seems to be equivalent to  the CDM DistributionStatus
74
 *
75
 * <h3>map generation</h3>
76
 *
77
 *   When generating maps from the accumulated distribution information some special cases have to be handled:
78
 * <ol>
79
 *   <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
80
 *       data is available, the calculated data has to be given preference over other data.
81
 *   </li>
82
 *   <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
83
 *       status should be shown in the map, whereas the super area should be ignored.
84
 *   </li>
85
 * </ol>
86
 *
87
 * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
88
 * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
89
 * @date Feb 22, 2013
90
 */
91
@Service
92
public class TransmissionEngineDistribution { //TODO extends IoBase?
93

    
94
    public static final String EXTENSION_VALUE_PREFIX = "transmissionEngineDistribution.priority:";
95

    
96
    public static final Logger logger = Logger.getLogger(TransmissionEngineDistribution.class);
97

    
98
    /**
99
     * only used for performance testing
100
     */
101
    final boolean ONLY_FISRT_BATCH = false;
102

    
103

    
104
    protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
105
            "description.markers.markerType",
106
            "description.elements.markers.markerType",
107
            "description.elements.area",
108
            "description.elements.status",
109
            "description.elements.sources.citation.authorship",
110
//            "description.elements.sources.nameUsedInSource",
111
//            "description.elements.multilanguageText",
112
//            "name.status.type",
113
    });
114

    
115

    
116
    /**
117
     * A map which contains the status terms as key and the priority as value
118
     * The map will contain both, the PresenceTerms and the AbsenceTerms
119
     */
120
    private Map<PresenceAbsenceTerm, Integer> statusPriorityMap = null;
121

    
122
    @Autowired
123
    private IDescriptionService descriptionService;
124

    
125
    @Autowired
126
    private ITermService termService;
127

    
128
    @Autowired
129
    private ITaxonService taxonService;
130

    
131
    @Autowired
132
    private IClassificationService classificationService;
133

    
134
    @Autowired
135
    private INameService mameService;
136

    
137
    @Autowired
138
    private HibernateTransactionManager transactionManager;
139

    
140
    private List<PresenceAbsenceTerm> byAreaIgnoreStatusList = null;
141

    
142
    private List<PresenceAbsenceTerm> byRankIgnoreStatusList = null;
143

    
144
    private final Map<NamedArea, Set<NamedArea>> subAreaMap = new HashMap<NamedArea, Set<NamedArea>>();
145

    
146
    private final List<OrderHint> emptyOrderHints = new ArrayList<OrderHint>(0);
147

    
148
    int byRankTicks = 300;
149
    int byAreasTicks = 100;
150

    
151

    
152
    /**
153
     * byAreaIgnoreStatusList contains by default:
154
     *  <ul>
155
     *    <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
156
     *    <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
157
     *    <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
158
     *    <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
159
     *    <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
160
     *  </ul>
161
     *
162
     * @return the byAreaIgnoreStatusList
163
     */
164
    public List<PresenceAbsenceTerm> getByAreaIgnoreStatusList() {
165
        if(byAreaIgnoreStatusList == null ){
166
            byAreaIgnoreStatusList = Arrays.asList(
167
                    new PresenceAbsenceTerm[] {
168
                    		PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(),
169
                    		PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(),
170
                    		PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(),
171
                    		PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(),
172
                    		PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE()
173
                            // TODO what about PresenceAbsenceTerm.ABSENT() also ignore?
174
                    });
175
        }
176
        return byAreaIgnoreStatusList;
177
    }
178

    
179
    /**
180
     * @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
181
     */
182
    public void setByAreaIgnoreStatusList(List<PresenceAbsenceTerm> byAreaIgnoreStatusList) {
183
        this.byAreaIgnoreStatusList = byAreaIgnoreStatusList;
184
    }
185

    
186
    /**
187
     * byRankIgnoreStatusList contains by default
188
     *  <ul>
189
     *    <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
190
     *  </ul>
191
     *
192
     * @return the byRankIgnoreStatusList
193
     */
194
    public List<PresenceAbsenceTerm> getByRankIgnoreStatusList() {
195

    
196
        if (byRankIgnoreStatusList == null) {
197
            byRankIgnoreStatusList = Arrays.asList(
198
                    new PresenceAbsenceTerm[] {
199
                    		PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()
200
                    });
201
        }
202
        return byRankIgnoreStatusList;
203
    }
204

    
205
    /**
206
     * @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
207
     */
208
    public void setByRankIgnoreStatusList(List<PresenceAbsenceTerm> byRankIgnoreStatusList) {
209
        this.byRankIgnoreStatusList = byRankIgnoreStatusList;
210
    }
211

    
212
    /**
213
     *
214
     * @param superAreas
215
     */
216
    public TransmissionEngineDistribution() {
217
    }
218

    
219
    /**
220
     * initializes the map which contains the status terms as key and the priority as value
221
     * The map will contain both, the PresenceTerms and the AbsenceTerms
222
     */
223
    private void initializeStatusPriorityMap() {
224

    
225
        statusPriorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
226
        Integer priority;
227

    
228
        // PresenceTerms
229
        for(PresenceAbsenceTerm term : termService.list(PresenceAbsenceTerm.class, null, null, null, null)){
230
            priority = getPriorityFor(term);
231
            if(priority != null){
232
                statusPriorityMap.put(term, priority);
233
            }
234
        }
235
    }
236

    
237
    /**
238
     * Compares the PresenceAbsenceTermBase terms contained in <code>a.status</code> and <code>b.status</code> after
239
     * the priority as stored in the statusPriorityMap. The StatusAndSources object with
240
     * the higher priority is returned. In the case of <code>a == b</code> the sources of b will be added to the sources
241
     * of a.
242
     *
243
     * If either a or b or the status are null b or a is returned.
244
     *
245
     * @see initializeStatusPriorityMap()
246
     *
247
     * @param a
248
     * @param b
249
     * @param sourcesForWinnerB
250
     *  In the case when <code>b</code> is preferred over <code>a</code> these Set of sources will be added to the sources of <code>b</code>
251
     * @return
252
     */
253
    private StatusAndSources choosePreferred(StatusAndSources a, StatusAndSources b, Set<DescriptionElementSource> sourcesForWinnerB){
254

    
255
        if (statusPriorityMap == null) {
256
            initializeStatusPriorityMap();
257
        }
258

    
259
        if (b == null || b.status == null) {
260
            return a;
261
        }
262
        if (a == null || a.status == null) {
263
            return b;
264
        }
265

    
266
        if (statusPriorityMap.get(a.status) == null) {
267
            logger.warn("No priority found in map for " + a.status.getLabel());
268
            return b;
269
        }
270
        if (statusPriorityMap.get(b.status) == null) {
271
            logger.warn("No priority found in map for " + b.status.getLabel());
272
            return a;
273
        }
274
        if(statusPriorityMap.get(a.status) < statusPriorityMap.get(b.status)){
275
            if(sourcesForWinnerB != null) {
276
                b.addSources(sourcesForWinnerB);
277
            }
278
            return b;
279
        } else if (statusPriorityMap.get(a.status) == statusPriorityMap.get(b.status)){
280
            a.addSources(b.sources);
281
            return a;
282
        } else {
283
            return a;
284
        }
285
    }
286

    
287
    /**
288
     * reads the priority for the given status term from the extensions.
289
     *
290
     * @param term
291
     * @return the priority value
292
     */
293
    private Integer getPriorityFor(DefinedTermBase<?> term) {
294
        Set<Extension> extensions = term.getExtensions();
295
        for(Extension extension : extensions){
296
            if(!extension.getType().equals(ExtensionType.ORDER())) {
297
                continue;
298
            }
299
            int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
300
            if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
301
                try {
302
                    Integer priority = Integer.valueOf(extension.getValue().substring(EXTENSION_VALUE_PREFIX.length()));
303
                    return priority;
304
                } catch (NumberFormatException e) {
305
                    logger.warn("Invalid number format in Extension:" + extension.getValue());
306
                }
307
            }
308
        }
309
        logger.warn("no priority defined for '" + term.getLabel() + "'");
310
        return null;
311
    }
312

    
313
    /**
314
     * runs both steps
315
     * <ul>
316
     * <li>Step 1: Accumulate occurrence records by area</li>
317
     * <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
318
     * the status of all children are accumulated on each rank starting from
319
     * lower rank to upper rank.</li>
320
     * </ul>
321
     *
322
     * @param superAreas
323
     *            the areas to which the subordinate areas should be projected.
324
     * @param lowerRank
325
     * @param upperRank
326
     * @param classification
327
     * @param classification
328
     *            limit the accumulation process to a specific classification
329
     *            (not yet implemented)
330
     * @param monitor
331
     *            the progress monitor to use for reporting progress to the
332
     *            user. It is the caller's responsibility to call done() on the
333
     *            given monitor. Accepts null, indicating that no progress
334
     *            should be reported and that the operation cannot be cancelled.
335
     */
336
    public void accumulate(AggregationMode mode, List<NamedArea> superAreas, Rank lowerRank, Rank upperRank,
337
            Classification classification, IProgressMonitor monitor) {
338

    
339
        if (monitor == null) {
340
            monitor = new NullProgressMonitor();
341
        }
342

    
343

    
344
        // only for debugging:
345
        //logger.setLevel(Level.TRACE); // TRACE will slow down a lot since it forces loading all term representations
346
        //Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
347

    
348
        logger.info("Hibernate JDBC Batch size: "
349
                + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
350

    
351
        Set<Classification> classifications = new HashSet<Classification>();
352
        if(classification == null) {
353
            classifications.addAll(classificationService.listClassifications(null, null, null, null));
354
        } else {
355
            classifications.add(classification);
356
        }
357

    
358
        int aggregationWorkTicks;
359
        switch(mode){
360
        case byAreasAndRanks:
361
            aggregationWorkTicks = byAreasTicks + byRankTicks;
362
            break;
363
        case byAreas:
364
            aggregationWorkTicks = byAreasTicks;
365
            break;
366
        case byRanks:
367
            aggregationWorkTicks = byRankTicks;
368
            break;
369
        default:
370
            aggregationWorkTicks = 0;
371
            break;
372
        }
373

    
374
        // take start time for performance testing
375
        // NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
376
        double start = System.currentTimeMillis();
377

    
378
        monitor.beginTask("Accumulating distributions", (classifications.size() * aggregationWorkTicks) + 1 );
379

    
380
        updatePriorities();
381

    
382
        List<Rank> ranks = rankInterval(lowerRank, upperRank);
383

    
384
        monitor.worked(1);
385

    
386

    
387
        for(Classification _classification : classifications) {
388

    
389
            ClassificationLookupDTO classificationLookupDao = classificationService.classificationLookup(_classification);
390
            classificationLookupDao.filterInclude(ranks);
391

    
392
            double end1 = System.currentTimeMillis();
393
            logger.info("Time elapsed for classificationLookup() : " + (end1 - start) / (1000) + "s");
394
            double start2 = System.currentTimeMillis();
395

    
396
            monitor.subTask("Accumulating distributions to super areas for " + _classification.getTitleCache());
397
            if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
398
                accumulateByArea(superAreas, classificationLookupDao, new SubProgressMonitor(monitor, byAreasTicks), true);
399
            }
400
            monitor.subTask("Accumulating distributions to higher ranks for " + _classification.getTitleCache());
401

    
402
            double end2 = System.currentTimeMillis();
403
            logger.info("Time elapsed for accumulateByArea() : " + (end2 - start2) / (1000) + "s");
404

    
405
            double start3 = System.currentTimeMillis();
406
            if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
407
                accumulateByRank(ranks, classificationLookupDao, new SubProgressMonitor(monitor, byRankTicks), mode.equals(AggregationMode.byRanks));
408
            }
409

    
410
            double end3 = System.currentTimeMillis();
411
            logger.info("Time elapsed for accumulateByRank() : " + (end3 - start3) / (1000) + "s");
412
            logger.info("Time elapsed for accumulate(): " + (end3 - start) / (1000) + "s");
413

    
414
            if(ONLY_FISRT_BATCH) {
415
                monitor.done();
416
                break;
417
            }
418
        }
419
        monitor.done();
420
    }
421

    
422

    
423
    /**
424
     * Step 1: Accumulate occurrence records by area
425
     * <ul>
426
     * <li>areas are projected to super areas e.g.:  HS <-- HS(A), HS(G), HS(S)</li>
427
     * <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
428
     * <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
429
     * <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
430
     * <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
431
     * <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
432
     * </ul>
433
     *
434
     * @param superAreas
435
     *      the areas to which the subordinate areas should be projected
436
     * @param classificationLookupDao
437
     *
438
     */
439
    protected void accumulateByArea(List<NamedArea> superAreas, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
440

    
441
        int batchSize = 1000;
442

    
443
        TransactionStatus txStatus = startTransaction(false);
444

    
445
        // reload superAreas TODO is it faster to getSession().merge(object) ??
446
        Set<UUID> superAreaUuids = new HashSet<UUID>(superAreas.size());
447
        for (NamedArea superArea : superAreas){
448
            superAreaUuids.add(superArea.getUuid());
449
        }
450

    
451
        // visit all accepted taxa
452
        subMonitor.beginTask("Accumulating by area ",  classificationLookupDao.getTaxonIds().size());
453
        Iterator<Integer> taxonIdIterator = classificationLookupDao.getTaxonIds().iterator();
454

    
455
        while (taxonIdIterator.hasNext()) {
456

    
457
            if(txStatus == null) {
458
                // transaction has been comitted at the end of this batch, start a new one
459
                txStatus = startTransaction(false);
460
            }
461

    
462
            // the session is cleared after each batch, so load the superAreaList for each batch
463
            List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
464

    
465
            // load taxa for this batch
466
            List<TaxonBase> taxa = null;
467
            List<Integer> taxonIds = new ArrayList<Integer>(batchSize);
468
            while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
469
                taxonIds.add(taxonIdIterator.next());
470
            }
471

    
472
//            logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
473

    
474
            taxa = taxonService.loadByIds(taxonIds, TAXONDESCRIPTION_INIT_STRATEGY);
475

    
476
            // iterate over the taxa and accumulate areas
477
            for(TaxonBase taxonBase : taxa) {
478
                if(logger.isDebugEnabled()){
479
                    logger.debug("accumulateByArea() - taxon :" + taxonToString(taxonBase));
480
                }
481

    
482
                Taxon taxon = (Taxon)taxonBase;
483
                TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
484
                List<Distribution> distributions = distributionsFor(taxon);
485

    
486
                // Step through superAreas for accumulation of subAreas
487
                for (NamedArea superArea : superAreaList){
488

    
489
                    // accumulate all sub area status
490
                    StatusAndSources accumulatedStatusAndSources = null;
491
                    // TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
492
                    Set<NamedArea> subAreas = getSubAreasFor(superArea);
493
                    for(NamedArea subArea : subAreas){
494
                        if(logger.isTraceEnabled()){
495
                            logger.trace("accumulateByArea() - \t\t" + termToString(subArea));
496
                        }
497
                        // step through all distributions for the given subArea
498
                        for(Distribution distribution : distributions){
499
                            if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
500
                                PresenceAbsenceTerm status = distribution.getStatus();
501
                                if(logger.isTraceEnabled()){
502
                                    logger.trace("accumulateByArea() - \t\t" + termToString(subArea) + ": " + termToString(status));
503
                                }
504
                                // skip all having a status value different of those in byAreaIgnoreStatusList
505
                                if (getByAreaIgnoreStatusList().contains(status)){
506
                                    continue;
507
                                }
508
                                StatusAndSources subStatusAndSources = new StatusAndSources(status, distribution.getSources());
509
                                accumulatedStatusAndSources = choosePreferred(accumulatedStatusAndSources, subStatusAndSources, null);
510
                            }
511
                        }
512
                    } // next sub area
513
                    if (accumulatedStatusAndSources != null) {
514
                        if(logger.isDebugEnabled()){
515
                            logger.debug("accumulateByArea() - \t >> " + termToString(superArea) + ": " + termToString(accumulatedStatusAndSources.status));
516
                        }
517
                        // store new distribution element for superArea in taxon description
518
                        Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatusAndSources.status);
519
                        newDistribitionElement.getSources().addAll(accumulatedStatusAndSources.sources);
520
                        newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
521
                        description.addElement(newDistribitionElement);
522
                    }
523

    
524
                } // next super area ....
525

    
526
                descriptionService.saveOrUpdate(description);
527
                taxonService.saveOrUpdate(taxon);
528
                subMonitor.worked(1);
529

    
530
            } // next taxon
531

    
532
            flushAndClear();
533

    
534
            // commit for every batch, otherwise the persistent context
535
            // may grow too much and eats up all the heap
536
            commitTransaction(txStatus);
537
            txStatus = null;
538

    
539
            if(ONLY_FISRT_BATCH) {
540
                break;
541
            }
542

    
543
        } // next batch of taxa
544

    
545
        subMonitor.done();
546
    }
547

    
548
   /**
549
    * Step 2: Accumulate by ranks starting from lower rank to upper rank, the status of all children
550
    * are accumulated on each rank starting from lower rank to upper rank.
551
    * <ul>
552
    * <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
553
    *    up to upper rank (e.g. Genus)</li>
554
    *  <li>the accumulation id done for each distribution area found in the included taxa</li>
555
    *  <li>areas of subtaxa with status endemic are ignored</li>
556
    *  <li>the status with the highest priority determines the value for the accumulated distribution</li>
557
    *  <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
558
    *    this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
559
    *</ul>
560
    */
561
    protected void accumulateByRank(List<Rank> rankInterval, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
562

    
563
        int batchSize = 500;
564
        int ticksPerRank = 100;
565

    
566
        TransactionStatus txStatus = startTransaction(false);
567

    
568
        // the loadRankSpecificRootNodes() method not only finds
569
        // taxa of the specified rank but also taxa of lower ranks
570
        // if no taxon of the specified rank exists, so we need to
571
        // remember which taxa have been processed already
572
        Set<Integer> taxaProcessedIds = new HashSet<Integer>();
573
        List<TaxonBase> taxa = null;
574
        List<TaxonBase> childTaxa = null;
575

    
576
        List<Rank> ranks = rankInterval;
577

    
578
        subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
579

    
580
        for (Rank rank : ranks) {
581

    
582
            if(logger.isDebugEnabled()){
583
                logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
584
            }
585

    
586
            Set<Integer> taxonIdsPerRank = classificationLookupDao.getTaxonIdByRank().get(rank);
587

    
588
            int taxonCountperRank = taxonIdsPerRank != null ? taxonIdsPerRank.size() : 0;
589

    
590
            SubProgressMonitor taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
591
            taxonSubMonitor.beginTask("Accumulating by rank " + termToString(rank), taxonCountperRank);
592

    
593
            if(taxonCountperRank == 0) {
594
                taxonSubMonitor.done();
595
                continue;
596
            }
597

    
598

    
599
            Iterator<Integer> taxonIdIterator = taxonIdsPerRank.iterator();
600
            while (taxonIdIterator.hasNext()) {
601

    
602
                if(txStatus == null) {
603
                    // transaction has been committed at the end of this batch, start a new one
604
                    txStatus = startTransaction(false);
605
                }
606

    
607
                // load taxa for this batch
608
                List<Integer> taxonIds = new ArrayList<Integer>(batchSize);
609
                while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
610
                    taxonIds.add(taxonIdIterator.next());
611
                }
612

    
613
                taxa = taxonService.loadByIds(taxonIds, null);
614

    
615
//                if(logger.isDebugEnabled()){
616
//                           logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
617
//                }
618

    
619
                for(TaxonBase taxonBase : taxa) {
620

    
621
                    Taxon taxon = (Taxon)taxonBase;
622
                    if (taxaProcessedIds.contains(taxon.getId())) {
623
                        if(logger.isDebugEnabled()){
624
                            logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
625
                        }
626
                        continue;
627
                    }
628
                    taxaProcessedIds.add(taxon.getId());
629
                    if(logger.isDebugEnabled()){
630
                        logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
631
                    }
632

    
633
                    // Step through direct taxonomic children for accumulation
634
                    Map<NamedArea, StatusAndSources> accumulatedStatusMap = new HashMap<NamedArea, StatusAndSources>();
635

    
636
                    List<Integer> childTaxonIds = new ArrayList<>();
637
                    Set<Integer> childSet = classificationLookupDao.getChildTaxonMap().get(taxon.getId());
638
                    if(childSet != null) {
639
                        childTaxonIds.addAll(childSet);
640
                    }
641
                    if(!childTaxonIds.isEmpty()) {
642
                        childTaxa = taxonService.loadByIds(childTaxonIds, TAXONDESCRIPTION_INIT_STRATEGY);
643

    
644
                        for (TaxonBase childTaxonBase : childTaxa){
645

    
646
                            Taxon childTaxon = (Taxon) childTaxonBase;
647
                            getSession().setReadOnly(childTaxon, true);
648
                            if(logger.isTraceEnabled()){
649
                                logger.trace("                   subtaxon :" + taxonToString(childTaxon));
650
                            }
651

    
652
                            for(Distribution distribution : distributionsFor(childTaxon) ) {
653
                                PresenceAbsenceTerm status = distribution.getStatus();
654
                                NamedArea area = distribution.getArea();
655
                                if (status == null || getByRankIgnoreStatusList().contains(status)){
656
                                  continue;
657
                                }
658

    
659
                                StatusAndSources subStatusAndSources = new StatusAndSources(status, distribution.getSources());
660
                                accumulatedStatusMap.put(area, choosePreferred(accumulatedStatusMap.get(area), subStatusAndSources, null));
661
                             }
662
                        }
663

    
664
                        if(accumulatedStatusMap.size() > 0) {
665
                            TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
666
                            for (NamedArea area : accumulatedStatusMap.keySet()) {
667
                                Distribution distribition = findDistribution(description, area, accumulatedStatusMap.get(area).status);
668
                                if(distribition == null) {
669
                                    // create a new distribution element
670
                                    distribition = Distribution.NewInstance(area, accumulatedStatusMap.get(area).status);
671
                                    distribition.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
672
                                }
673
                                addSourcesDeduplicated(distribition.getSources(), accumulatedStatusMap.get(area).sources);
674

    
675
                                description.addElement(distribition);
676
                            }
677
                            taxonService.saveOrUpdate(taxon);
678
                            descriptionService.saveOrUpdate(description);
679
                        }
680

    
681
                    }
682
                    taxonSubMonitor.worked(1); // one taxon worked
683

    
684
                } // next taxon ....
685

    
686
                flushAndClear();
687

    
688
                // commit for every batch, otherwise the persistent context
689
                // may grow too much and eats up all the heap
690
                commitTransaction(txStatus);
691
                txStatus = null;
692

    
693
                if(ONLY_FISRT_BATCH) {
694
                    break;
695
                }
696
            } // next batch
697

    
698
            taxonSubMonitor.done();
699
            subMonitor.worked(1);
700

    
701
            if(ONLY_FISRT_BATCH) {
702
                break;
703
            }
704
        } // next Rank
705

    
706
        logger.info("accumulateByRank() - done");
707
        subMonitor.done();
708
    }
709

    
710
/**
711
 * @param description
712
 * @param area
713
 * @param status
714
 * @return
715
 */
716
private Distribution findDistribution(TaxonDescription description, NamedArea area, PresenceAbsenceTerm status) {
717
    for(DescriptionElementBase item : description.getElements()) {
718
        if(!(item instanceof Distribution)) {
719
            continue;
720
        }
721
        Distribution distribution = ((Distribution)item);
722
        if(distribution.getArea().equals(area) && distribution.getStatus().equals(status)) {
723
            return distribution;
724
        }
725
    }
726
    return null;
727
}
728

    
729
/**
730
 * @param lowerRank
731
 * @param upperRank
732
 * @return
733
 */
734
private List<Rank> rankInterval(Rank lowerRank, Rank upperRank) {
735

    
736
    TransactionStatus txStatus = startTransaction(false);
737
    Rank currentRank = lowerRank;
738
    List<Rank> ranks = new ArrayList<Rank>();
739
    ranks.add(currentRank);
740
    while (!currentRank.isHigher(upperRank)) {
741
        currentRank = findNextHigherRank(currentRank);
742
        ranks.add(currentRank);
743
    }
744
    commitTransaction(txStatus);
745
    txStatus = null;
746
    return ranks;
747
}
748

    
749
    /**
750
     * @return
751
     */
752
    private Session getSession() {
753
        return descriptionService.getSession();
754
    }
755

    
756
    /**
757
     *
758
     */
759
    private void flushAndClear() {
760
        logger.debug("flushing and clearing session ...");
761
        getSession().flush();
762
        try {
763
            Search.getFullTextSession(getSession()).flushToIndexes();
764
        } catch (HibernateException e) {
765
            /* IGNORE - Hibernate Search Event listeners not configured ... */
766
            if(!e.getMessage().startsWith("Hibernate Search Event listeners not configured")){
767
                throw e;
768
            }
769
        }
770
        getSession().clear();
771
    }
772

    
773

    
774
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
775
    public TransactionStatus startTransaction(Boolean readOnly) {
776

    
777
        DefaultTransactionDefinition defaultTxDef = new DefaultTransactionDefinition();
778
        defaultTxDef.setReadOnly(readOnly);
779
        TransactionDefinition txDef = defaultTxDef;
780

    
781
        // Log some transaction-related debug information.
782
        if (logger.isTraceEnabled()) {
783
            logger.trace("Transaction name = " + txDef.getName());
784
            logger.trace("Transaction facets:");
785
            logger.trace("Propagation behavior = " + txDef.getPropagationBehavior());
786
            logger.trace("Isolation level = " + txDef.getIsolationLevel());
787
            logger.trace("Timeout = " + txDef.getTimeout());
788
            logger.trace("Read Only = " + txDef.isReadOnly());
789
            // org.springframework.orm.hibernate5.HibernateTransactionManager
790
            // provides more transaction/session-related debug information.
791
        }
792

    
793
        TransactionStatus txStatus = transactionManager.getTransaction(txDef);
794

    
795
        getSession().setFlushMode(FlushMode.COMMIT);
796

    
797
        return txStatus;
798
    }
799

    
800
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
801
    public void commitTransaction(TransactionStatus txStatus){
802
        logger.debug("commiting transaction ...");
803
        transactionManager.commit(txStatus);
804
        return;
805
    }
806

    
807
    /**
808
     * returns the next higher rank
809
     *
810
     * TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
811
     *
812
     * @param rank
813
     * @return
814
     */
815
    private Rank findNextHigherRank(Rank rank) {
816
        rank = (Rank) termService.load(rank.getUuid());
817
        return rank.getNextHigherTerm();
818
//        OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
819
//        return rankVocabulary.getNextHigherTerm(rank);
820
    }
821

    
822
    /**
823
     * Either finds an existing taxon description of the given taxon or creates a new one.
824
     * If the doClear is set all existing description elements will be cleared.
825
     *
826
     * @param taxon
827
     * @param doClear will remove all existing Distributions if the taxon already
828
     * has a MarkerType.COMPUTED() TaxonDescription
829
     * @return
830
     */
831
    private TaxonDescription findComputedDescription(Taxon taxon, boolean doClear) {
832

    
833
        String descriptionTitle = this.getClass().getSimpleName();
834

    
835
        // find existing one
836
        for (TaxonDescription description : taxon.getDescriptions()) {
837
            if (description.hasMarker(MarkerType.COMPUTED(), true)) {
838
                logger.debug("reusing description for " + taxon.getTitleCache());
839
                if (doClear) {
840
                    int deleteCount = 0;
841
                    Set<DescriptionElementBase> deleteCandidates = new HashSet<DescriptionElementBase>();
842
                    for (DescriptionElementBase descriptionElement : description.getElements()) {
843
                        if(descriptionElement instanceof Distribution) {
844
                            deleteCandidates.add(descriptionElement);
845
                        }
846
                    }
847
                    if(deleteCandidates.size() > 0){
848
                        for(DescriptionElementBase descriptionElement : deleteCandidates) {
849
                            description.removeElement(descriptionElement);
850
                            descriptionService.deleteDescriptionElement(descriptionElement);
851
                            descriptionElement = null;
852
                            deleteCount++;
853
                        }
854
                        descriptionService.saveOrUpdate(description);
855
                        logger.debug("\t" + deleteCount +" distributions cleared");
856
                    }
857

    
858
                }
859
                return description;
860
            }
861
        }
862

    
863
        // create a new one
864
        logger.debug("creating new description for " + taxon.getTitleCache());
865
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
866
        description.setTitleCache(descriptionTitle, true);
867
        description.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
868
        return description;
869
    }
870

    
871
    /**
872
     * @param superArea
873
     * @return
874
     */
875
    private Set<NamedArea> getSubAreasFor(NamedArea superArea) {
876

    
877
        if(!subAreaMap.containsKey(superArea)) {
878
            if(logger.isDebugEnabled()){
879
                logger.debug("loading included areas for " + superArea.getLabel());
880
            }
881
            subAreaMap.put(superArea, superArea.getIncludes());
882
        }
883
        return subAreaMap.get(superArea);
884
    }
885

    
886
    /**
887
     * @param taxon
888
     * @return
889
     */
890
    private List<Distribution> distributionsFor(Taxon taxon) {
891
        List<Distribution> distributions = new ArrayList<Distribution>();
892
        for(TaxonDescription description: taxon.getDescriptions()) {
893
            for(DescriptionElementBase deb : description.getElements()) {
894
                if(deb instanceof Distribution) {
895
                    distributions.add((Distribution)deb);
896
                }
897
            }
898
        }
899
        return distributions;
900
    }
901

    
902
    /**
903
     * @param taxon
904
     * @param logger2
905
     * @return
906
     */
907
    private String taxonToString(TaxonBase taxon) {
908
        if(logger.isTraceEnabled()) {
909
            return taxon.getTitleCache();
910
        } else {
911
            return taxon.toString();
912
        }
913
    }
914

    
915
    /**
916
     * @param taxon
917
     * @param logger2
918
     * @return
919
     */
920
    private String termToString(OrderedTermBase<?> term) {
921
        if(logger.isTraceEnabled()) {
922
            return term.getLabel() + " [" + term.getIdInVocabulary() + "]";
923
        } else {
924
            return term.getIdInVocabulary();
925
        }
926
    }
927

    
928
    /**
929
     * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
930
     * This method will start a new transaction and commits it after the work is done.
931
     */
932
    public void updatePriorities() {
933

    
934
        TransactionStatus txStatus = startTransaction(false);
935

    
936
        Map<PresenceAbsenceTerm, Integer> priorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
937

    
938
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(), 1);
939
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
940
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(), 3);
941
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(), 20);
942
        priorityMap.put(PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(), 30);
943
        priorityMap.put(PresenceAbsenceTerm.CULTIVATED(), 45);
944
        priorityMap.put(PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE(), 40);
945
        priorityMap.put(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE(), 60);
946
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
947
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
948
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED(), 90);
949
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_ADVENTITIOUS(), 100);
950
        priorityMap.put(PresenceAbsenceTerm.INTRODUCED_NATURALIZED(), 110);
951
        priorityMap.put(PresenceAbsenceTerm.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
952
        priorityMap.put(PresenceAbsenceTerm.NATIVE(), 130); // null
953
        priorityMap.put(PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
954

    
955
        for(PresenceAbsenceTerm term : priorityMap.keySet()) {
956
            // load the term
957
            term = (PresenceAbsenceTerm) termService.load(term.getUuid());
958
            // find the extension
959
            Extension priorityExtension = null;
960
            Set<Extension> extensions = term.getExtensions();
961
            for(Extension extension : extensions){
962
                if (!extension.getType().equals(ExtensionType.ORDER())) {
963
                    continue;
964
                }
965
                int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
966
                if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
967
                    priorityExtension = extension;
968
                    break;
969
                }
970
            }
971
            if(priorityExtension == null) {
972
                priorityExtension = Extension.NewInstance(term, null, ExtensionType.ORDER());
973
            }
974
            priorityExtension.setValue(EXTENSION_VALUE_PREFIX + priorityMap.get(term));
975

    
976
            // save the term
977
            termService.saveOrUpdate(term);
978
            if (logger.isDebugEnabled()) {
979
                logger.debug("Priority updated for " + term.getLabel());
980
            }
981
        }
982

    
983
        commitTransaction(txStatus);
984
    }
985

    
986
    public static void addSourcesDeduplicated(Set<DescriptionElementSource> target, Set<DescriptionElementSource> sources) {
987
        for(DescriptionElementSource source : sources) {
988
            boolean contained = false;
989
            for(DescriptionElementSource existingSource: target) {
990
                if(existingSource.equalsByShallowCompare(source)) {
991
                    contained = true;
992
                    break;
993
                }
994
            }
995
            if(!contained) {
996
                try {
997
                    target.add((DescriptionElementSource)source.clone());
998
                } catch (CloneNotSupportedException e) {
999
                    // should never happen
1000
                    throw new RuntimeException(e);
1001
                }
1002
            }
1003
        }
1004
    }
1005

    
1006
    public enum AggregationMode {
1007
        byAreas,
1008
        byRanks,
1009
        byAreasAndRanks
1010

    
1011
    }
1012

    
1013
    private class StatusAndSources {
1014

    
1015
        private final PresenceAbsenceTerm status;
1016

    
1017
        private final Set<DescriptionElementSource> sources = new HashSet<>();
1018

    
1019
        public StatusAndSources(PresenceAbsenceTerm status, Set<DescriptionElementSource> sources) {
1020
            this.status = status;
1021
            addSourcesDeduplicated(this.sources, sources);
1022
        }
1023

    
1024
        /**
1025
         * @param sources
1026
         */
1027
        public void addSources(Set<DescriptionElementSource> sources) {
1028
            addSourcesDeduplicated(this.sources, sources);
1029
        }
1030

    
1031
    }
1032
}
    (1-1/1)