Project

General

Profile

Download (33.4 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2013 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.api.service.description;
11

    
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.List;
17
import java.util.Map;
18
import java.util.Set;
19
import java.util.UUID;
20

    
21
import org.apache.log4j.Logger;
22
import org.hibernate.FlushMode;
23
import org.hibernate.HibernateException;
24
import org.hibernate.Session;
25
import org.hibernate.engine.spi.SessionFactoryImplementor;
26
import org.hibernate.search.Search;
27
import org.springframework.beans.factory.annotation.Autowired;
28
import org.springframework.orm.hibernate4.HibernateTransactionManager;
29
import org.springframework.stereotype.Service;
30
import org.springframework.transaction.TransactionDefinition;
31
import org.springframework.transaction.TransactionStatus;
32
import org.springframework.transaction.support.DefaultTransactionDefinition;
33

    
34
import eu.etaxonomy.cdm.api.service.IClassificationService;
35
import eu.etaxonomy.cdm.api.service.IDescriptionService;
36
import eu.etaxonomy.cdm.api.service.INameService;
37
import eu.etaxonomy.cdm.api.service.ITaxonService;
38
import eu.etaxonomy.cdm.api.service.ITermService;
39
import eu.etaxonomy.cdm.api.service.pager.Pager;
40
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
41
import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
42
import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
43
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
44
import eu.etaxonomy.cdm.model.common.Extension;
45
import eu.etaxonomy.cdm.model.common.ExtensionType;
46
import eu.etaxonomy.cdm.model.common.Marker;
47
import eu.etaxonomy.cdm.model.common.MarkerType;
48
import eu.etaxonomy.cdm.model.common.OrderedTermVocabulary;
49
import eu.etaxonomy.cdm.model.description.AbsenceTerm;
50
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
51
import eu.etaxonomy.cdm.model.description.Distribution;
52
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
53
import eu.etaxonomy.cdm.model.description.PresenceTerm;
54
import eu.etaxonomy.cdm.model.description.TaxonDescription;
55
import eu.etaxonomy.cdm.model.location.NamedArea;
56
import eu.etaxonomy.cdm.model.name.Rank;
57
import eu.etaxonomy.cdm.model.taxon.Classification;
58
import eu.etaxonomy.cdm.model.taxon.Taxon;
59
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
60

    
61
/**
62
 * The TransmissionEngineDistribution is meant to be used from within a service class.
63
 *
64
 * <h2>GENERAL NOTES </h2>
65
 * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
66
 * version 14 written in Visual Basic and still need to be
67
 * adapted to the java version of the transmission engine!</em>
68
 *
69
 * <h3>summaryStatus</h3>
70
 *
71
 *   Each distribution information has a summaryStatus, this is an summary of the status codes
72
 *   as stored in the fields of emOccurrence native, introduced, cultivated, ...
73
 *   The summaryStatus seems to be equivalent to  the CDM DistributionStatus
74
 *
75
 * <h3>map generation</h3>
76
 *
77
 *   When generating maps from the accumulated distribution information some special cases have to be handled:
78
 * <ol>
79
 *   <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
80
 *       data is available, the calculated data has to be given preference over other data.
81
 *   </li>
82
 *   <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
83
 *       status should be shown in the map, whereas the super area should be ignored.
84
 *   </li>
85
 * </ol>
86
 *
87
 * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
88
 * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
89
 * @date Feb 22, 2013
90
 */
91
@Service
92

    
93
public class TransmissionEngineDistribution { //TODO extends IoBase?
94

    
95
    public static final String EXTENSION_VALUE_PREFIX = "transmissionEngineDistribution.priority:";
96

    
97
    public static final Logger logger = Logger.getLogger(TransmissionEngineDistribution.class);
98

    
99

    
100
    /**classification
101
     * A map which contains the status terms as key and the priority as value
102
     * The map will contain both, the PresenceTerms and the AbsenceTerms
103
     */
104
    private Map<PresenceAbsenceTermBase<?>, Integer> statusPriorityMap = null;
105

    
106
    @Autowired
107
    private IDescriptionService descriptionService;
108

    
109
    @Autowired
110
    private ITermService termService;
111

    
112
    @Autowired
113
    private ITaxonService taxonService;
114

    
115
    @Autowired
116
    private IClassificationService classificationService;
117

    
118
    @Autowired
119
    private INameService mameService;
120

    
121
    @Autowired
122
    private HibernateTransactionManager transactionManager;
123

    
124
    private List<PresenceAbsenceTermBase<?>> byAreaIgnoreStatusList = null;
125

    
126
    private List<PresenceAbsenceTermBase<?>> byRankIgnoreStatusList = null;
127

    
128
    private final Map<NamedArea, Set<NamedArea>> subAreaMap = new HashMap<NamedArea, Set<NamedArea>>();
129

    
130

    
131
    /**
132
     * byAreaIgnoreStatusList contains by default:
133
     *  <ul>
134
     *    <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
135
     *    <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
136
     *    <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
137
     *    <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
138
     *    <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
139
     *  </ul>
140
     *
141
     * @return the byAreaIgnoreStatusList
142
     */
143
    public List<PresenceAbsenceTermBase<?>> getByAreaIgnoreStatusList() {
144
        if(byAreaIgnoreStatusList == null ){
145
            byAreaIgnoreStatusList = Arrays.asList(
146
                    new PresenceAbsenceTermBase<?>[] {
147
                            AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(),
148
                            AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(),
149
                            AbsenceTerm.NATIVE_REPORTED_IN_ERROR(),
150
                            AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(),
151
                            AbsenceTerm.NATIVE_FORMERLY_NATIVE()
152
                            // TODO what about AbsenceTerm.ABSENT() also ignore?
153
                    });
154
        }
155
        return byAreaIgnoreStatusList;
156
    }
157

    
158
    /**
159
     * @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
160
     */
161
    public void setByAreaIgnoreStatusList(List<PresenceAbsenceTermBase<?>> byAreaIgnoreStatusList) {
162
        this.byAreaIgnoreStatusList = byAreaIgnoreStatusList;
163
    }
164

    
165
    /**
166
     * byRankIgnoreStatusList contains by default
167
     *  <ul>
168
     *    <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
169
     *  </ul>
170
     *
171
     * @return the byRankIgnoreStatusList
172
     */
173
    public List<PresenceAbsenceTermBase<?>> getByRankIgnoreStatusList() {
174

    
175
        if (byRankIgnoreStatusList == null) {
176
            byRankIgnoreStatusList = Arrays.asList(
177
                    new PresenceAbsenceTermBase<?>[] {
178
                            PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()
179
                    });
180
        }
181
        return byRankIgnoreStatusList;
182
    }
183

    
184
    /**
185
     * @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
186
     */
187
    public void setByRankIgnoreStatusList(List<PresenceAbsenceTermBase<?>> byRankIgnoreStatusList) {
188
        this.byRankIgnoreStatusList = byRankIgnoreStatusList;
189
    }
190

    
191
    /**
192
     *
193
     * @param superAreas
194
     */
195
    public TransmissionEngineDistribution() {
196
    }
197

    
198
    /**
199
     * initializes the map which contains the status terms as key and the priority as value
200
     * The map will contain both, the PresenceTerms and the AbsenceTerms
201
     */
202
    @SuppressWarnings("rawtypes")
203
    private void initializeStatusPriorityMap() {
204

    
205
        statusPriorityMap = new HashMap<PresenceAbsenceTermBase<?>, Integer>();
206
        Integer priority;
207

    
208
        // PresenceTerms
209
        for(DefinedTermBase term : termService.list(PresenceTerm.class, null, null, null, null)){
210
            priority = getPriorityFor(term);
211
            if(priority != null){
212
                statusPriorityMap.put((PresenceAbsenceTermBase) term, priority);
213
            }
214
        }
215
        // AbsenceTerms
216
        for(DefinedTermBase term : termService.list(AbsenceTerm.class, null, null, null, null)){
217
            priority = getPriorityFor(term);
218
            if(priority != null){
219
                statusPriorityMap.put((PresenceAbsenceTermBase) term, priority);
220
            }
221
        }
222
    }
223

    
224
    /**
225
     * Compares the PresenceAbsenceTermBase terms <code>a</code> and <code>b</code>  and
226
     * returns the PresenceAbsenceTermBase with the higher priority as stored in the statusPriorityMap.
227
     * If either a or b are null b or a is returned.
228
     *
229
     * @see initializeStatusPriorityMap()
230
     *
231
     * @param a
232
     * @param b
233
     * @return
234
     */
235
    private PresenceAbsenceTermBase<?> choosePreferred(PresenceAbsenceTermBase<?> a, PresenceAbsenceTermBase<?> b){
236

    
237
        if (statusPriorityMap == null) {
238
            initializeStatusPriorityMap();
239
        }
240

    
241
        if (b == null) {
242
            return a;
243
        }
244
        if (a == null) {
245
            return b;
246
        }
247

    
248
        if (statusPriorityMap.get(a) == null) {
249
            logger.warn("No priority found in map for " + a.getLabel());
250
            return b;
251
        }
252
        if (statusPriorityMap.get(b) == null) {
253
            logger.warn("No priority found in map for " + b.getLabel());
254
            return a;
255
        }
256
        if(statusPriorityMap.get(a) > statusPriorityMap.get(b)){
257
            return a;
258
        } else {
259
            return b;
260
        }
261
    }
262

    
263
    /**
264
     * reads the priority for the given status term from the extensions.
265
     *
266
     * @param term
267
     * @return the priority value
268
     */
269
    private Integer getPriorityFor(DefinedTermBase<?> term) {
270
        Set<Extension> extensions = term.getExtensions();
271
        for(Extension extension : extensions){
272
            if(!extension.getType().equals(ExtensionType.ORDER())) {
273
                continue;
274
            }
275
            int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
276
            if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
277
                try {
278
                    Integer priority = Integer.valueOf(extension.getValue().substring(EXTENSION_VALUE_PREFIX.length()));
279
                    return priority;
280
                } catch (NumberFormatException e) {
281
                    logger.warn("Invalid number format in Extension:" + extension.getValue());
282
                }
283
            }
284
        }
285
        logger.warn("no priority defined for '" + term.getLabel() + "'");
286
        return null;
287
    }
288

    
289
    /**
290
     * runs both steps
291
     * <ul>
292
     * <li>Step 1: Accumulate occurrence records by area</li>
293
     * <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
294
     * the status of all children are accumulated on each rank starting from
295
     * lower rank to upper rank.</li>
296
     * </ul>
297
     *
298
     * @param superAreas
299
     *            the areas to which the subordinate areas should be projected.
300
     * @param lowerRank
301
     * @param upperRank
302
     * @param classification
303
     * @param classification
304
     *            limit the accumulation process to a specific classification
305
     *            (not yet implemented)
306
     * @param monitor
307
     *            the progress monitor to use for reporting progress to the
308
     *            user. It is the caller's responsibility to call done() on the
309
     *            given monitor. Accepts null, indicating that no progress
310
     *            should be reported and that the operation cannot be cancelled.
311
     */
312
    public void accumulate(AggregationMode mode, List<NamedArea> superAreas, Rank lowerRank, Rank upperRank,
313
            Classification classification, IProgressMonitor monitor) {
314

    
315
        if (monitor == null) {
316
            monitor = new NullProgressMonitor();
317
        }
318

    
319
        logger.info("Hibernate JDBC Batch size: "
320
                + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
321

    
322
        int workTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
323
        monitor.beginTask("Accumulating distributions", workTicks + 1 );
324

    
325

    
326
        monitor.subTask("updating Priorities");
327
        updatePriorities();
328
        monitor.worked(1);
329
        monitor.setTaskName("Accumulating distributions");
330

    
331
        if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
332
            accumulateByArea(superAreas, classification, new SubProgressMonitor(monitor, 200),
333
                    mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks));
334
        }
335
        if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
336
            accumulateByRank(lowerRank, upperRank, classification, new SubProgressMonitor(monitor, 200),
337
                    mode.equals(AggregationMode.byRanks));
338
        }
339
    }
340

    
341
    /**
342
     * @return
343
     */
344
    private Session getSession() {
345
        return descriptionService.getSession();
346
    }
347

    
348
    /**
349
     * Step 1: Accumulate occurrence records by area
350
     * <ul>
351
     * <li>areas are projected to super areas e.g.:  HS <-- HS(A), HS(G), HS(S)</li>
352
     * <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
353
     * <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
354
     * <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
355
     * <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
356
     * <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
357
     * </ul>
358
     *
359
     * @param superAreas
360
     *      the areas to which the subordinate areas should be projected
361
     * @param classification
362
     *      limit the accumulation process to a specific classification (not yet implemented)
363
     */
364
    protected void accumulateByArea(List<NamedArea> superAreas, Classification classification,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
365

    
366
        int batchSize = 1000;
367

    
368
        TransactionStatus txStatus = startTransaction(false);
369

    
370
        // reload superAreas TODO is it faster to getSession().merge(object) ??
371
        Set<UUID> superAreaUuids = new HashSet<UUID>(superAreas.size());
372
        for (NamedArea superArea : superAreas){
373
            superAreaUuids.add(superArea.getUuid());
374
        }
375
        List<DefinedTermBase> superAreaList = termService.find(superAreaUuids);
376

    
377
        // visit all accepted taxa
378
        Pager<Taxon> taxonPager = null;
379
        int pageIndex = 0;
380
        boolean isLastPage = false;
381
        while (!isLastPage) {
382

    
383
            if(txStatus == null) {
384
                // transaction has been comitted at the end of this batch, start a new one
385
                txStatus = startTransaction(false);
386
            }
387

    
388
            //TODO limit by classification if not null
389
            taxonPager = taxonService.page(Taxon.class, batchSize, pageIndex++, null, null);
390

    
391
            if(taxonPager.getCurrentIndex() == 0){
392
                subMonitor.beginTask("Accumulating by area ",  taxonPager.getCount());
393
            }
394

    
395
            logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
396

    
397
            if (taxonPager.getRecords().size() == 0){
398
                break;
399
            }
400
            isLastPage = taxonPager.getRecords().size() < batchSize;
401

    
402
            // iterate over the taxa and accumulate areas
403
            for(Taxon taxon : taxonPager.getRecords()) {
404
                if(logger.isDebugEnabled()){
405
                    logger.debug("accumulateByArea() - taxon :" + taxon.getTitleCache());
406
                }
407

    
408
                TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
409
                List<Distribution> distributions = distributionsFor(taxon);
410

    
411
                // Step through superAreas for accumulation of subAreas
412
                for (DefinedTermBase superAreaTermBase : superAreaList){
413
                    NamedArea superArea = (NamedArea)superAreaTermBase;
414

    
415
                    // accumulate all sub area status
416
                    PresenceAbsenceTermBase<?> accumulatedStatus = null;
417
                    Set<NamedArea> subAreas = getSubAreasFor(superArea);
418
                    for(NamedArea subArea : subAreas){
419
                        if(logger.isTraceEnabled()){
420
                            logger.trace("accumulateByArea() - \t\t" + subArea.getLabel());
421
                        }
422
                        // step through all distributions for the given subArea
423
                        for(Distribution distribution : distributions){
424
                            if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
425
                                PresenceAbsenceTermBase<?> status = distribution.getStatus();
426
                                if(logger.isTraceEnabled()){
427
                                    logger.trace("accumulateByArea() - \t\t" + subArea.getLabel() + ": " + status.getLabel());
428
                                }
429
                                // skip all having a status value different of those in byAreaIgnoreStatusList
430
                                if (getByAreaIgnoreStatusList().contains(status)){
431
                                    continue;
432
                                }
433
                                accumulatedStatus = choosePreferred(accumulatedStatus, status);
434
                            }
435
                        }
436
                    } // next sub area
437
                    if (accumulatedStatus != null) {
438
                        if(logger.isDebugEnabled()){
439
                            logger.debug("accumulateByArea() - \t >> " + superArea.getLabel() + ": " + accumulatedStatus.getLabel());
440
                        }
441
                        // store new distribution element for superArea in taxon description
442
                        Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatus);
443
                        newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
444
                        description.addElement(newDistribitionElement);
445
                    }
446

    
447
                } // next super area ....
448

    
449
                descriptionService.saveOrUpdate(description);
450
                taxonService.saveOrUpdate(taxon);
451
                subMonitor.worked(1);
452

    
453
            } // next taxon
454

    
455
            taxonPager = null;
456
            flushAndClear();
457

    
458
            // commit for every batch, otherwise the persistent context
459
            // may grow too much and eats up all the heap
460
            commitTransaction(txStatus);
461
            txStatus = null;
462

    
463
        } // next batch of taxa
464

    
465
        subMonitor.done();
466
    }
467

    
468
   /**
469
    * Step 2: Accumulate by ranks staring from lower rank to upper rank, the status of all children
470
    * are accumulated on each rank starting from lower rank to upper rank.
471
    * <ul>
472
    * <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
473
    *    up to upper rank (e.g. Genus)</li>
474
    *  <li>the accumulation id done for each distribution area found in the included taxa</li>
475
    *  <li>areas of subtaxa with status endemic are ignored</li>
476
    *  <li>the status with the highest priority determines the value for the accumulated distribution</li>
477
    *  <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
478
    *    this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
479
    *</ul>
480
    */
481
    protected void accumulateByRank(Rank lowerRank, Rank upperRank, Classification classification,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
482

    
483
        int batchSize = 500;
484

    
485
        TransactionStatus txStatus = startTransaction(false);
486

    
487
        // the loadRankSpecificRootNodes() method not only finds
488
        // taxa of the specified rank but also taxa of lower ranks
489
        // if no taxon of the specified rank exists, so we need to
490
        // remember which taxa have been processed already
491
        Set<Integer> taxaProcessedIds = new HashSet<Integer>();
492

    
493
        Rank currentRank = lowerRank;
494
        List<Rank> ranks = new ArrayList<Rank>();
495
        ranks.add(currentRank);
496
        while (!currentRank.isHigher(upperRank)) {
497
            currentRank = findNextHigherRank(currentRank);
498
            ranks.add(currentRank);
499
        }
500

    
501
        int ticksPerRank = 100;
502
        subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
503

    
504
        for (Rank rank : ranks) {
505

    
506
            if(logger.isDebugEnabled()){
507
                logger.debug("accumulateByRank() - at Rank '" + rank.getLabel() + "'");
508
            }
509

    
510
            Pager<TaxonNode> taxonPager = null;
511
            int pageIndex = 0;
512
            boolean isLastPage = false;
513
            SubProgressMonitor taxonSubMonitor = null;
514
            while (!isLastPage) {
515

    
516
                if(txStatus == null) {
517
                    // transaction has been comitted at the end of this batch, start a new one
518
                    txStatus = startTransaction(false);
519
                }
520

    
521
                taxonPager = classificationService
522
                        .pageRankSpecificRootNodes(classification, rank, batchSize, pageIndex++, null);
523

    
524
                if(taxonSubMonitor == null) {
525
                    taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
526
                    taxonSubMonitor.beginTask("Accumulating by rank " + rank.getLabel(), taxonPager.getCount());
527

    
528
                }
529

    
530
                if(taxonPager != null){
531
                    if(logger.isDebugEnabled()){
532
                               logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
533
                    }
534
                } else {
535
                    logger.error("accumulateByRank() - taxonNode pager was NULL");
536
                }
537

    
538

    
539
                isLastPage = taxonPager.getRecords().size() < batchSize;
540
                if (taxonPager.getRecords().size() == 0){
541
                    break;
542
                }
543

    
544
                for(TaxonNode taxonNode : taxonPager.getRecords()) {
545

    
546
                    Taxon taxon = taxonNode.getTaxon();
547
                    if (taxaProcessedIds.contains(taxon.getId())) {
548
                        if(logger.isDebugEnabled()){
549
                            logger.debug("accumulateByRank() - skipping already processed taxon :" + taxon.getTitleCache());
550
                        }
551
                        continue;
552
                    }
553
                    taxaProcessedIds.add(taxon.getId());
554
                    if(logger.isDebugEnabled()){
555
                        logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxon.getTitleCache());
556
                    }
557

    
558
                    // Step through direct taxonomic children for accumulation
559
                    @SuppressWarnings("rawtypes")
560
                    Map<NamedArea, PresenceAbsenceTermBase> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTermBase>();
561

    
562
                    for (TaxonNode subTaxonNode : taxonNode.getChildNodes()){
563

    
564
                        getSession().setReadOnly(taxonNode, true);
565
                        if(logger.isTraceEnabled()){
566
                            logger.trace("                   subtaxon :" + subTaxonNode.getTaxon().getTitleCache());
567
                        }
568

    
569
                        for(Distribution distribution : distributionsFor(subTaxonNode.getTaxon()) ) {
570
                            PresenceAbsenceTermBase<?> status = distribution.getStatus();
571
                            NamedArea area = distribution.getArea();
572
                            if (status == null || getByRankIgnoreStatusList().contains(status)){
573
                              continue;
574
                            }
575
                            accumulatedStatusMap.put(area, choosePreferred(accumulatedStatusMap.get(area), status));
576
                         }
577
                    }
578

    
579
                    if(accumulatedStatusMap.size() > 0) {
580
                        TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
581
                        for (NamedArea area : accumulatedStatusMap.keySet()) {
582
                            // store new distribution element in new Description
583
                            Distribution newDistribitionElement = Distribution.NewInstance(area, accumulatedStatusMap.get(area));
584
                            newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
585
                            description.addElement(newDistribitionElement);
586
                        }
587
                        taxonService.saveOrUpdate(taxon);
588
                        descriptionService.saveOrUpdate(description);
589
                    }
590
                    taxonSubMonitor.worked(1); // one taxon worked
591

    
592
                } // next taxon node ....
593

    
594
                taxonPager = null;
595
                flushAndClear();
596

    
597
                // commit for every batch, otherwise the persistent context
598
                // may grow too much and eats up all the heap
599
                commitTransaction(txStatus);
600
                txStatus = null;
601

    
602
            } // next batch
603

    
604
            taxonSubMonitor.done();
605
            subMonitor.worked(1);
606

    
607
        } // next Rank
608

    
609
        subMonitor.done();
610
    }
611

    
612
    /**
613
     *
614
     */
615
    private void flushAndClear() {
616
        logger.debug("flushing and clearing session ...");
617
        getSession().flush();
618
        try {
619
            Search.getFullTextSession(getSession()).flushToIndexes();
620
        } catch (HibernateException e) {
621
            /* IGNORE - Hibernate Search Event listeners not configured ... */
622
            if(!e.getMessage().startsWith("Hibernate Search Event listeners not configured")){
623
                throw e;
624
            }
625
        }
626
        getSession().clear();
627
    }
628

    
629

    
630
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
631
    public TransactionStatus startTransaction(Boolean readOnly) {
632

    
633
        DefaultTransactionDefinition defaultTxDef = new DefaultTransactionDefinition();
634
        defaultTxDef.setReadOnly(readOnly);
635
        TransactionDefinition txDef = defaultTxDef;
636

    
637
        // Log some transaction-related debug information.
638
        if (logger.isTraceEnabled()) {
639
            logger.trace("Transaction name = " + txDef.getName());
640
            logger.trace("Transaction facets:");
641
            logger.trace("Propagation behavior = " + txDef.getPropagationBehavior());
642
            logger.trace("Isolation level = " + txDef.getIsolationLevel());
643
            logger.trace("Timeout = " + txDef.getTimeout());
644
            logger.trace("Read Only = " + txDef.isReadOnly());
645
            // org.springframework.orm.hibernate4.HibernateTransactionManager
646
            // provides more transaction/session-related debug information.
647
        }
648

    
649
        TransactionStatus txStatus = transactionManager.getTransaction(txDef);
650

    
651
        getSession().setFlushMode(FlushMode.COMMIT);
652

    
653
        return txStatus;
654
    }
655

    
656
    // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
657
    public void commitTransaction(TransactionStatus txStatus){
658
        logger.debug("commiting transaction ...");
659
        transactionManager.commit(txStatus);
660
        return;
661
    }
662

    
663
    /**
664
     * returns the next higher rank
665
     *
666
     * TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
667
     *
668
     * @param rank
669
     * @return
670
     */
671
    private Rank findNextHigherRank(Rank rank) {
672
        rank = (Rank) termService.load(rank.getUuid());
673
        OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
674
        return rankVocabulary.getNextHigherTerm(rank);
675
    }
676

    
677
    /**
678
     * Either finds an existing taxon description of the given taxon or creates a new one.
679
     * If the doClear is set all existing description elements will be cleared.
680
     *
681
     * @param taxon
682
     * @param doClear will remove all existing Distributions if the taxon already
683
     * has a MarkerType.COMPUTED() TaxonDescription
684
     * @return
685
     */
686
    private TaxonDescription findComputedDescription(Taxon taxon, boolean doClear) {
687

    
688
        String descriptionTitle = this.getClass().getSimpleName();
689

    
690
        // find existing one
691
        for (TaxonDescription description : taxon.getDescriptions()) {
692
            if (description.hasMarker(MarkerType.COMPUTED(), true)) {
693
                logger.debug("reusing description for " + taxon.getTitleCache());
694
                if (doClear) {
695
                    int deleteCount = 0;
696
                    Set<DescriptionElementBase> deleteCandidates = new HashSet<DescriptionElementBase>();
697
                    for (DescriptionElementBase descriptionElement : description.getElements()) {
698
                        if(descriptionElement instanceof Distribution) {
699
                            deleteCandidates.add(descriptionElement);
700
                        }
701
                    }
702
                    if(deleteCandidates.size() > 0){
703
                        for(DescriptionElementBase descriptionElement : deleteCandidates) {
704
                            description.removeElement(descriptionElement);
705
                            descriptionService.deleteDescriptionElement(descriptionElement);
706
                            descriptionElement = null;
707
                            deleteCount++;
708
                        }
709
                        descriptionService.saveOrUpdate(description);
710
                        logger.debug("\t" + deleteCount +" distributions cleared");
711
                    }
712

    
713
                }
714
                return description;
715
            }
716
        }
717

    
718
        // create a new one
719
        logger.debug("creating new description for " + taxon.getTitleCache());
720
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
721
        description.setTitleCache(descriptionTitle, true);
722
        description.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
723
        return description;
724
    }
725

    
726
    /**
727
     * @param superArea
728
     * @return
729
     */
730
    private Set<NamedArea> getSubAreasFor(NamedArea superArea) {
731

    
732
        if(!subAreaMap.containsKey(superArea)) {
733
            if(logger.isDebugEnabled()){
734
                logger.debug("loading included areas for " + superArea.getLabel());
735
            }
736
            subAreaMap.put(superArea, superArea.getIncludes());
737
        }
738
        return subAreaMap.get(superArea);
739
    }
740

    
741
    /**
742
     * @param taxon
743
     * @return
744
     */
745
    private List<Distribution> distributionsFor(Taxon taxon) {
746
        return descriptionService
747
                .getDescriptionElementsForTaxon(taxon, null, Distribution.class, null, null, null);
748
    }
749

    
750
    /**
751
     * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
752
     * This method will start a new transaction and commits it after the work is done.
753
     */
754
    public void updatePriorities() {
755

    
756
        TransactionStatus txStatus = startTransaction(false);
757

    
758
        Map<PresenceAbsenceTermBase<?>, Integer> priorityMap = new HashMap<PresenceAbsenceTermBase<?>, Integer>();
759

    
760
        priorityMap.put(AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(), 1);
761
        priorityMap.put(PresenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
762
        priorityMap.put(AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(), 3);
763
        priorityMap.put(AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(), 20);
764
        priorityMap.put(AbsenceTerm.NATIVE_REPORTED_IN_ERROR(), 30);
765
        priorityMap.put(PresenceTerm.CULTIVATED(), 45);
766
        priorityMap.put(AbsenceTerm.NATIVE_FORMERLY_NATIVE(), 40);
767
        priorityMap.put(PresenceTerm.NATIVE_PRESENCE_QUESTIONABLE(), 60);
768
        priorityMap.put(PresenceTerm.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
769
        priorityMap.put(PresenceTerm.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
770
        priorityMap.put(PresenceTerm.INTRODUCED(), 90);
771
        priorityMap.put(PresenceTerm.INTRODUCED_ADVENTITIOUS(), 100);
772
        priorityMap.put(PresenceTerm.INTRODUCED_NATURALIZED(), 110);
773
        priorityMap.put(PresenceTerm.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
774
        priorityMap.put(PresenceTerm.NATIVE(), 130); // null
775
        priorityMap.put(PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
776

    
777
        for(PresenceAbsenceTermBase<?> term : priorityMap.keySet()) {
778
            // load the term
779
            term = (PresenceAbsenceTermBase<?>) termService.load(term.getUuid());
780
            // find the extension
781
            Extension priotityExtension = null;
782
            Set<Extension> extensions = term.getExtensions();
783
            for(Extension extension : extensions){
784
                if (!extension.getType().equals(ExtensionType.ORDER())) {
785
                    continue;
786
                }
787
                int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
788
                if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
789
                    priotityExtension = extension;
790
                    break;
791
                }
792
            }
793
            if(priotityExtension == null) {
794
                priotityExtension = Extension.NewInstance(term, null, ExtensionType.ORDER());
795
            }
796
            priotityExtension.setValue(EXTENSION_VALUE_PREFIX + priorityMap.get(term));
797

    
798
            // save the term
799
            termService.saveOrUpdate(term);
800
            if (logger.isDebugEnabled()) {
801
                logger.debug("Priority updated for " + term.getLabel());
802
            }
803
        }
804

    
805
        commitTransaction(txStatus);
806
    }
807

    
808
    public enum AggregationMode {
809
        byAreas,
810
        byRanks,
811
        byAreasAndRanks
812

    
813
    }
814
}
    (1-1/1)