3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.api
.service
.description
;
12 import java
.util
.ArrayList
;
13 import java
.util
.Arrays
;
14 import java
.util
.HashMap
;
15 import java
.util
.HashSet
;
16 import java
.util
.Iterator
;
17 import java
.util
.List
;
20 import java
.util
.UUID
;
22 import org
.apache
.log4j
.Level
;
23 import org
.apache
.log4j
.Logger
;
24 import org
.hibernate
.FlushMode
;
25 import org
.hibernate
.HibernateException
;
26 import org
.hibernate
.Session
;
27 import org
.hibernate
.engine
.spi
.SessionFactoryImplementor
;
28 import org
.hibernate
.search
.Search
;
29 import org
.springframework
.beans
.factory
.annotation
.Autowired
;
30 import org
.springframework
.orm
.hibernate5
.HibernateTransactionManager
;
31 import org
.springframework
.stereotype
.Service
;
32 import org
.springframework
.transaction
.TransactionDefinition
;
33 import org
.springframework
.transaction
.TransactionStatus
;
34 import org
.springframework
.transaction
.support
.DefaultTransactionDefinition
;
36 import eu
.etaxonomy
.cdm
.api
.service
.IClassificationService
;
37 import eu
.etaxonomy
.cdm
.api
.service
.IDescriptionService
;
38 import eu
.etaxonomy
.cdm
.api
.service
.INameService
;
39 import eu
.etaxonomy
.cdm
.api
.service
.ITaxonService
;
40 import eu
.etaxonomy
.cdm
.api
.service
.ITermService
;
41 import eu
.etaxonomy
.cdm
.api
.service
.pager
.Pager
;
42 import eu
.etaxonomy
.cdm
.common
.monitor
.IProgressMonitor
;
43 import eu
.etaxonomy
.cdm
.common
.monitor
.NullProgressMonitor
;
44 import eu
.etaxonomy
.cdm
.common
.monitor
.SubProgressMonitor
;
45 import eu
.etaxonomy
.cdm
.model
.common
.DefinedTermBase
;
46 import eu
.etaxonomy
.cdm
.model
.common
.Extension
;
47 import eu
.etaxonomy
.cdm
.model
.common
.ExtensionType
;
48 import eu
.etaxonomy
.cdm
.model
.common
.Marker
;
49 import eu
.etaxonomy
.cdm
.model
.common
.MarkerType
;
50 import eu
.etaxonomy
.cdm
.model
.common
.OrderedTermBase
;
51 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
52 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
53 import eu
.etaxonomy
.cdm
.model
.description
.PresenceAbsenceTerm
;
54 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
55 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
56 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
57 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
58 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
59 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
60 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
61 import eu
.etaxonomy
.cdm
.persistence
.dao
.taxon
.IClassificationDao
;
62 import eu
.etaxonomy
.cdm
.persistence
.dto
.ClassificationLookupDTO
;
65 * The TransmissionEngineDistribution is meant to be used from within a service class.
67 * <h2>GENERAL NOTES </h2>
68 * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
69 * version 14 written in Visual Basic and still need to be
70 * adapted to the java version of the transmission engine!</em>
72 * <h3>summaryStatus</h3>
74 * Each distribution information has a summaryStatus, this is an summary of the status codes
75 * as stored in the fields of emOccurrence native, introduced, cultivated, ...
76 * The summaryStatus seems to be equivalent to the CDM DistributionStatus
78 * <h3>map generation</h3>
80 * When generating maps from the accumulated distribution information some special cases have to be handled:
82 * <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
83 * data is available, the calculated data has to be given preference over other data.
85 * <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
86 * status should be shown in the map, whereas the super area should be ignored.
90 * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
91 * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
96 public class TransmissionEngineDistribution
{ //TODO extends IoBase?
98 public static final String EXTENSION_VALUE_PREFIX
= "transmissionEngineDistribution.priority:";
100 public static final Logger logger
= Logger
.getLogger(TransmissionEngineDistribution
.class);
103 * only used for performance testing
105 final boolean ONLY_FISRT_BATCH
= false;
108 protected static final List
<String
> TAXONDESCRIPTION_INIT_STRATEGY
= Arrays
.asList(new String
[] {
109 "description.markers.markerType",
110 "description.elements.markers.markerType",
111 "description.elements.area",
112 "description.elements.sources.citation.authorship",
113 "description.elements.sources.nameUsedInSource",
114 "description.elements.multilanguageText",
120 * A map which contains the status terms as key and the priority as value
121 * The map will contain both, the PresenceTerms and the AbsenceTerms
123 private Map
<PresenceAbsenceTerm
, Integer
> statusPriorityMap
= null;
126 private IDescriptionService descriptionService
;
129 private ITermService termService
;
132 private ITaxonService taxonService
;
135 private IClassificationService classificationService
;
138 private IClassificationDao classificationDao
;
141 private INameService mameService
;
144 private HibernateTransactionManager transactionManager
;
146 private List
<PresenceAbsenceTerm
> byAreaIgnoreStatusList
= null;
148 private List
<PresenceAbsenceTerm
> byRankIgnoreStatusList
= null;
150 private final Map
<NamedArea
, Set
<NamedArea
>> subAreaMap
= new HashMap
<NamedArea
, Set
<NamedArea
>>();
154 * byAreaIgnoreStatusList contains by default:
156 * <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
157 * <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
158 * <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
159 * <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
160 * <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
163 * @return the byAreaIgnoreStatusList
165 public List
<PresenceAbsenceTerm
> getByAreaIgnoreStatusList() {
166 if(byAreaIgnoreStatusList
== null ){
167 byAreaIgnoreStatusList
= Arrays
.asList(
168 new PresenceAbsenceTerm
[] {
169 PresenceAbsenceTerm
.CULTIVATED_REPORTED_IN_ERROR(),
170 PresenceAbsenceTerm
.INTRODUCED_REPORTED_IN_ERROR(),
171 PresenceAbsenceTerm
.NATIVE_REPORTED_IN_ERROR(),
172 PresenceAbsenceTerm
.INTRODUCED_FORMERLY_INTRODUCED(),
173 PresenceAbsenceTerm
.NATIVE_FORMERLY_NATIVE()
174 // TODO what about PresenceAbsenceTerm.ABSENT() also ignore?
177 return byAreaIgnoreStatusList
;
181 * @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
183 public void setByAreaIgnoreStatusList(List
<PresenceAbsenceTerm
> byAreaIgnoreStatusList
) {
184 this.byAreaIgnoreStatusList
= byAreaIgnoreStatusList
;
188 * byRankIgnoreStatusList contains by default
190 * <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
193 * @return the byRankIgnoreStatusList
195 public List
<PresenceAbsenceTerm
> getByRankIgnoreStatusList() {
197 if (byRankIgnoreStatusList
== null) {
198 byRankIgnoreStatusList
= Arrays
.asList(
199 new PresenceAbsenceTerm
[] {
200 PresenceAbsenceTerm
.ENDEMIC_FOR_THE_RELEVANT_AREA()
203 return byRankIgnoreStatusList
;
207 * @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
209 public void setByRankIgnoreStatusList(List
<PresenceAbsenceTerm
> byRankIgnoreStatusList
) {
210 this.byRankIgnoreStatusList
= byRankIgnoreStatusList
;
217 public TransmissionEngineDistribution() {
221 * initializes the map which contains the status terms as key and the priority as value
222 * The map will contain both, the PresenceTerms and the AbsenceTerms
224 private void initializeStatusPriorityMap() {
226 statusPriorityMap
= new HashMap
<PresenceAbsenceTerm
, Integer
>();
230 for(PresenceAbsenceTerm term
: termService
.list(PresenceAbsenceTerm
.class, null, null, null, null)){
231 priority
= getPriorityFor(term
);
232 if(priority
!= null){
233 statusPriorityMap
.put(term
, priority
);
239 * Compares the PresenceAbsenceTermBase terms <code>a</code> and <code>b</code> and
240 * returns the PresenceAbsenceTermBase with the higher priority as stored in the statusPriorityMap.
241 * If either a or b are null b or a is returned.
243 * @see initializeStatusPriorityMap()
249 private PresenceAbsenceTerm
choosePreferred(PresenceAbsenceTerm a
, PresenceAbsenceTerm b
){
251 if (statusPriorityMap
== null) {
252 initializeStatusPriorityMap();
262 if (statusPriorityMap
.get(a
) == null) {
263 logger
.warn("No priority found in map for " + a
.getLabel());
266 if (statusPriorityMap
.get(b
) == null) {
267 logger
.warn("No priority found in map for " + b
.getLabel());
270 if(statusPriorityMap
.get(a
) > statusPriorityMap
.get(b
)){
278 * reads the priority for the given status term from the extensions.
281 * @return the priority value
283 private Integer
getPriorityFor(DefinedTermBase
<?
> term
) {
284 Set
<Extension
> extensions
= term
.getExtensions();
285 for(Extension extension
: extensions
){
286 if(!extension
.getType().equals(ExtensionType
.ORDER())) {
289 int pos
= extension
.getValue().indexOf(EXTENSION_VALUE_PREFIX
);
290 if(pos
== 0){ // if starts with EXTENSION_VALUE_PREFIX
292 Integer priority
= Integer
.valueOf(extension
.getValue().substring(EXTENSION_VALUE_PREFIX
.length()));
294 } catch (NumberFormatException e
) {
295 logger
.warn("Invalid number format in Extension:" + extension
.getValue());
299 logger
.warn("no priority defined for '" + term
.getLabel() + "'");
306 * <li>Step 1: Accumulate occurrence records by area</li>
307 * <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
308 * the status of all children are accumulated on each rank starting from
309 * lower rank to upper rank.</li>
313 * the areas to which the subordinate areas should be projected.
316 * @param classification
317 * @param classification
318 * limit the accumulation process to a specific classification
319 * (not yet implemented)
321 * the progress monitor to use for reporting progress to the
322 * user. It is the caller's responsibility to call done() on the
323 * given monitor. Accepts null, indicating that no progress
324 * should be reported and that the operation cannot be cancelled.
326 public void accumulate(AggregationMode mode
, List
<NamedArea
> superAreas
, Rank lowerRank
, Rank upperRank
,
327 Classification classification
, IProgressMonitor monitor
) {
329 if (monitor
== null) {
330 monitor
= new NullProgressMonitor();
333 logger
.setLevel(Level
.INFO
); // TRACE will slow down a lot since it forces loading all term representations
335 logger
.info("Hibernate JDBC Batch size: "
336 + ((SessionFactoryImplementor
) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
338 // only for debugging:
339 logger
.setLevel(Level
.INFO
);
340 //Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
342 Set
<Classification
> classifications
= new HashSet
<Classification
>();
343 if(classification
== null) {
344 classifications
.addAll(classificationService
.listClassifications(null, null, null, null));
346 classifications
.add(classification
);
349 int aggregationWorkTicks
= mode
.equals(AggregationMode
.byAreasAndRanks
) ?
400 : 200;
351 // take start time for performance testing
352 // NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
353 double start
= System
.currentTimeMillis();
355 monitor
.beginTask("Accumulating distributions", (classifications
.size() * aggregationWorkTicks
) + 1 );
359 for(Classification _classification
: classifications
) {
361 ClassificationLookupDTO classificationLookupDao
= classificationDao
.classificationLookup(_classification
);
363 monitor
.subTask("Accumulating distributions to super areas for " + _classification
.getTitleCache());
364 if (mode
.equals(AggregationMode
.byAreas
) || mode
.equals(AggregationMode
.byAreasAndRanks
)) {
365 accumulateByArea(superAreas
, classificationLookupDao
, new SubProgressMonitor(monitor
, 200),
366 mode
.equals(AggregationMode
.byAreas
) || mode
.equals(AggregationMode
.byAreasAndRanks
));
368 monitor
.subTask("Accumulating distributions to higher ranks for " + _classification
.getTitleCache());
370 double end1
= System
.currentTimeMillis();
372 logger
.info("Time elapsed for accumulateByArea() : " + (end1
- start
) / (1000) + "s");
374 double start2
= System
.currentTimeMillis();
375 if (mode
.equals(AggregationMode
.byRanks
) || mode
.equals(AggregationMode
.byAreasAndRanks
)) {
376 accumulateByRank(lowerRank
, upperRank
, classification
, new SubProgressMonitor(monitor
, 200),
377 mode
.equals(AggregationMode
.byRanks
));
380 double end2
= System
.currentTimeMillis();
381 logger
.info("Time elapsed for accumulateByRank() : " + (end2
- start2
) / (1000) + "s");
382 logger
.info("Time elapsed for accumulate(): " + (end2
- start
) / (1000) + "s");
384 if(ONLY_FISRT_BATCH
) {
393 private Session
getSession() {
394 return descriptionService
.getSession();
398 * Step 1: Accumulate occurrence records by area
400 * <li>areas are projected to super areas e.g.: HS <-- HS(A), HS(G), HS(S)</li>
401 * <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
402 * <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
403 * <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
404 * <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
405 * <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
409 * the areas to which the subordinate areas should be projected
410 * @param classificationLookupDao
413 protected void accumulateByArea(List
<NamedArea
> superAreas
, ClassificationLookupDTO classificationLookupDao
, IProgressMonitor subMonitor
, boolean doClearDescriptions
) {
415 int batchSize
= 1000;
417 TransactionStatus txStatus
= startTransaction(false);
419 // reload superAreas TODO is it faster to getSession().merge(object) ??
420 Set
<UUID
> superAreaUuids
= new HashSet
<UUID
>(superAreas
.size());
421 for (NamedArea superArea
: superAreas
){
422 superAreaUuids
.add(superArea
.getUuid());
424 List
<NamedArea
> superAreaList
= (List
)termService
.find(superAreaUuids
);
426 // visit all accepted taxa
427 subMonitor
.beginTask("Accumulating by area ", classificationLookupDao
.getTaxonIds().size());
428 Iterator
<Integer
> taxonIdIterator
= classificationLookupDao
.getTaxonIds().iterator();
431 while (taxonIdIterator
.hasNext()) {
432 while (!isLastPage
) {
434 if(txStatus
== null) {
435 // transaction has been comitted at the end of this batch, start a new one
436 txStatus
= startTransaction(false);
439 // load taxa for this batch
440 List
<TaxonBase
> taxa
= new ArrayList
<TaxonBase
>(batchSize
);
441 Set
<Integer
> taxonIds
= new HashSet
<Integer
>(batchSize
);
442 while(taxonIdIterator
.hasNext() && taxonIds
.size() < batchSize
) {
443 taxonIds
.add(taxonIdIterator
.next());
446 // logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
448 taxa
= taxonService
.listByIds(taxonIds
, null, null, null, TAXONDESCRIPTION_INIT_STRATEGY
);
450 // iterate over the taxa and accumulate areas
451 for(TaxonBase taxon
: taxa
) {
452 if(logger
.isDebugEnabled()){
453 logger
.debug("accumulateByArea() - taxon :" + taxonToString(taxon
));
456 TaxonDescription description
= findComputedDescription((Taxon
)taxon
, doClearDescriptions
);
457 List
<Distribution
> distributions
= distributionsFor((Taxon
)taxon
);
459 // Step through superAreas for accumulation of subAreas
460 for (NamedArea superArea
: superAreaList
){
462 // accumulate all sub area status
463 PresenceAbsenceTerm accumulatedStatus
= null;
464 // TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
465 Set
<NamedArea
> subAreas
= getSubAreasFor(superArea
);
466 for(NamedArea subArea
: subAreas
){
467 if(logger
.isTraceEnabled()){
468 logger
.trace("accumulateByArea() - \t\t" + termToString(subArea
));
470 // step through all distributions for the given subArea
471 for(Distribution distribution
: distributions
){
472 if(distribution
.getArea() != null && distribution
.getArea().equals(subArea
) && distribution
.getStatus() != null) {
473 PresenceAbsenceTerm status
= distribution
.getStatus();
474 if(logger
.isTraceEnabled()){
475 logger
.trace("accumulateByArea() - \t\t" + termToString(subArea
) + ": " + termToString(status
));
477 // skip all having a status value different of those in byAreaIgnoreStatusList
478 if (getByAreaIgnoreStatusList().contains(status
)){
481 accumulatedStatus
= choosePreferred(accumulatedStatus
, status
);
485 if (accumulatedStatus
!= null) {
486 if(logger
.isDebugEnabled()){
487 logger
.debug("accumulateByArea() - \t >> " + termToString(superArea
) + ": " + termToString(accumulatedStatus
));
489 // store new distribution element for superArea in taxon description
490 Distribution newDistribitionElement
= Distribution
.NewInstance(superArea
, accumulatedStatus
);
491 newDistribitionElement
.addMarker(Marker
.NewInstance(MarkerType
.COMPUTED(), true));
492 description
.addElement(newDistribitionElement
);
495 } // next super area ....
497 descriptionService
.saveOrUpdate(description
);
498 taxonService
.saveOrUpdate(taxon
);
499 subMonitor
.worked(1);
505 // commit for every batch, otherwise the persistent context
506 // may grow too much and eats up all the heap
507 commitTransaction(txStatus
);
510 if(ONLY_FISRT_BATCH
) {
514 } // next batch of taxa
524 private String
taxonToString(TaxonBase taxon
) {
525 if(logger
.isTraceEnabled()) {
526 return taxon
.getTitleCache();
528 return taxon
.toString();
537 private String
termToString(OrderedTermBase
<?
> term
) {
538 if(logger
.isTraceEnabled()) {
539 return term
.getLabel() + " [" + term
.getIdInVocabulary() + "]";
541 return term
.getIdInVocabulary();
546 * Step 2: Accumulate by ranks staring from lower rank to upper rank, the status of all children
547 * are accumulated on each rank starting from lower rank to upper rank.
549 * <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
550 * up to upper rank (e.g. Genus)</li>
551 * <li>the accumulation id done for each distribution area found in the included taxa</li>
552 * <li>areas of subtaxa with status endemic are ignored</li>
553 * <li>the status with the highest priority determines the value for the accumulated distribution</li>
554 * <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
555 * this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
558 protected void accumulateByRank(Rank lowerRank
, Rank upperRank
, Classification classification
, IProgressMonitor subMonitor
, boolean doClearDescriptions
) {
562 TransactionStatus txStatus
= startTransaction(false);
564 // the loadRankSpecificRootNodes() method not only finds
565 // taxa of the specified rank but also taxa of lower ranks
566 // if no taxon of the specified rank exists, so we need to
567 // remember which taxa have been processed already
568 Set
<Integer
> taxaProcessedIds
= new HashSet
<Integer
>();
570 Rank currentRank
= lowerRank
;
571 List
<Rank
> ranks
= new ArrayList
<Rank
>();
572 ranks
.add(currentRank
);
573 while (!currentRank
.isHigher(upperRank
)) {
574 currentRank
= findNextHigherRank(currentRank
);
575 ranks
.add(currentRank
);
578 int ticksPerRank
= 100;
579 subMonitor
.beginTask("Accumulating by rank", ranks
.size() * ticksPerRank
);
581 for (Rank rank
: ranks
) {
583 if(logger
.isDebugEnabled()){
584 logger
.debug("accumulateByRank() - at Rank '" + termToString(rank
) + "'");
587 Pager
<TaxonNode
> taxonPager
= null;
589 boolean isLastPage
= false;
590 SubProgressMonitor taxonSubMonitor
= null;
591 while (!isLastPage
) {
593 if(txStatus
== null) {
594 // transaction has been comitted at the end of this batch, start a new one
595 txStatus
= startTransaction(false);
598 taxonPager
= classificationService
599 .pageRankSpecificRootNodes(classification
, rank
, batchSize
, pageIndex
++, null);
601 if(taxonSubMonitor
== null) {
602 taxonSubMonitor
= new SubProgressMonitor(subMonitor
, ticksPerRank
);
603 taxonSubMonitor
.beginTask("Accumulating by rank " + rank
.getLabel(), taxonPager
.getCount().intValue());
607 if(taxonPager
!= null){
608 if(logger
.isDebugEnabled()){
609 logger
.debug("accumulateByRank() - taxon " + taxonPager
.getFirstRecord() + " to " + taxonPager
.getLastRecord() + " of " + taxonPager
.getCount() + "]");
612 logger
.error("accumulateByRank() - taxonNode pager was NULL");
615 if(taxonPager
!= null){
616 isLastPage
= taxonPager
.getRecords().size() < batchSize
;
617 if (taxonPager
.getRecords().size() == 0){
621 for(TaxonNode taxonNode
: taxonPager
.getRecords()) {
623 Taxon taxon
= taxonNode
.getTaxon();
624 if (taxaProcessedIds
.contains(taxon
.getId())) {
625 if(logger
.isDebugEnabled()){
626 logger
.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon
));
630 taxaProcessedIds
.add(taxon
.getId());
631 if(logger
.isDebugEnabled()){
632 logger
.debug("accumulateByRank() [" + rank
.getLabel() + "] - taxon :" + taxonToString(taxon
));
635 // Step through direct taxonomic children for accumulation
636 Map
<NamedArea
, PresenceAbsenceTerm
> accumulatedStatusMap
= new HashMap
<NamedArea
, PresenceAbsenceTerm
>();
638 for (TaxonNode subTaxonNode
: taxonNode
.getChildNodes()){
640 getSession().setReadOnly(taxonNode
, true);
641 if(logger
.isTraceEnabled()){
642 logger
.trace(" subtaxon :" + taxonToString(subTaxonNode
.getTaxon()));
645 for(Distribution distribution
: distributionsFor(subTaxonNode
.getTaxon()) ) {
646 PresenceAbsenceTerm status
= distribution
.getStatus();
647 NamedArea area
= distribution
.getArea();
648 if (status
== null || getByRankIgnoreStatusList().contains(status
)){
651 accumulatedStatusMap
.put(area
, choosePreferred(accumulatedStatusMap
.get(area
), status
));
655 if(accumulatedStatusMap
.size() > 0) {
656 TaxonDescription description
= findComputedDescription(taxon
, doClearDescriptions
);
657 for (NamedArea area
: accumulatedStatusMap
.keySet()) {
658 // store new distribution element in new Description
659 Distribution newDistribitionElement
= Distribution
.NewInstance(area
, accumulatedStatusMap
.get(area
));
660 newDistribitionElement
.addMarker(Marker
.NewInstance(MarkerType
.COMPUTED(), true));
661 description
.addElement(newDistribitionElement
);
663 taxonService
.saveOrUpdate(taxon
);
664 descriptionService
.saveOrUpdate(description
);
666 taxonSubMonitor
.worked(1); // one taxon worked
668 } // next taxon node ....
673 // commit for every batch, otherwise the persistent context
674 // may grow too much and eats up all the heap
675 commitTransaction(txStatus
);
678 if(ONLY_FISRT_BATCH
) {
683 taxonSubMonitor
.done();
684 subMonitor
.worked(1);
686 if(ONLY_FISRT_BATCH
) {
697 private void flushAndClear() {
698 logger
.debug("flushing and clearing session ...");
699 getSession().flush();
701 Search
.getFullTextSession(getSession()).flushToIndexes();
702 } catch (HibernateException e
) {
703 /* IGNORE - Hibernate Search Event listeners not configured ... */
704 if(!e
.getMessage().startsWith("Hibernate Search Event listeners not configured")){
708 getSession().clear();
712 // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
713 public TransactionStatus
startTransaction(Boolean readOnly
) {
715 DefaultTransactionDefinition defaultTxDef
= new DefaultTransactionDefinition();
716 defaultTxDef
.setReadOnly(readOnly
);
717 TransactionDefinition txDef
= defaultTxDef
;
719 // Log some transaction-related debug information.
720 if (logger
.isTraceEnabled()) {
721 logger
.trace("Transaction name = " + txDef
.getName());
722 logger
.trace("Transaction facets:");
723 logger
.trace("Propagation behavior = " + txDef
.getPropagationBehavior());
724 logger
.trace("Isolation level = " + txDef
.getIsolationLevel());
725 logger
.trace("Timeout = " + txDef
.getTimeout());
726 logger
.trace("Read Only = " + txDef
.isReadOnly());
727 // org.springframework.orm.hibernate5.HibernateTransactionManager
728 // provides more transaction/session-related debug information.
731 TransactionStatus txStatus
= transactionManager
.getTransaction(txDef
);
733 getSession().setFlushMode(FlushMode
.COMMIT
);
738 // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
739 public void commitTransaction(TransactionStatus txStatus
){
740 logger
.debug("commiting transaction ...");
741 transactionManager
.commit(txStatus
);
746 * returns the next higher rank
748 * TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
753 private Rank
findNextHigherRank(Rank rank
) {
754 rank
= (Rank
) termService
.load(rank
.getUuid());
755 return rank
.getNextHigherTerm();
756 // OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
757 // return rankVocabulary.getNextHigherTerm(rank);
761 * Either finds an existing taxon description of the given taxon or creates a new one.
762 * If the doClear is set all existing description elements will be cleared.
765 * @param doClear will remove all existing Distributions if the taxon already
766 * has a MarkerType.COMPUTED() TaxonDescription
769 private TaxonDescription
findComputedDescription(Taxon taxon
, boolean doClear
) {
771 String descriptionTitle
= this.getClass().getSimpleName();
774 for (TaxonDescription description
: taxon
.getDescriptions()) {
775 if (description
.hasMarker(MarkerType
.COMPUTED(), true)) {
776 logger
.debug("reusing description for " + taxon
.getTitleCache());
779 Set
<DescriptionElementBase
> deleteCandidates
= new HashSet
<DescriptionElementBase
>();
780 for (DescriptionElementBase descriptionElement
: description
.getElements()) {
781 if(descriptionElement
instanceof Distribution
) {
782 deleteCandidates
.add(descriptionElement
);
785 if(deleteCandidates
.size() > 0){
786 for(DescriptionElementBase descriptionElement
: deleteCandidates
) {
787 description
.removeElement(descriptionElement
);
788 descriptionService
.deleteDescriptionElement(descriptionElement
);
789 descriptionElement
= null;
792 descriptionService
.saveOrUpdate(description
);
793 logger
.debug("\t" + deleteCount
+" distributions cleared");
802 logger
.debug("creating new description for " + taxon
.getTitleCache());
803 TaxonDescription description
= TaxonDescription
.NewInstance(taxon
);
804 description
.setTitleCache(descriptionTitle
, true);
805 description
.addMarker(Marker
.NewInstance(MarkerType
.COMPUTED(), true));
813 private Set
<NamedArea
> getSubAreasFor(NamedArea superArea
) {
815 if(!subAreaMap
.containsKey(superArea
)) {
816 if(logger
.isDebugEnabled()){
817 logger
.debug("loading included areas for " + superArea
.getLabel());
819 subAreaMap
.put(superArea
, superArea
.getIncludes());
821 return subAreaMap
.get(superArea
);
828 private List
<Distribution
> distributionsFor(Taxon taxon
) {
829 List
<Distribution
> distributions
= new ArrayList
<Distribution
>();
830 for(TaxonDescription description
: taxon
.getDescriptions()) {
831 for(DescriptionElementBase deb
: description
.getElements()) {
832 if(deb
instanceof Distribution
) {
833 distributions
.add((Distribution
)deb
);
837 return distributions
;
841 * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
842 * This method will start a new transaction and commits it after the work is done.
844 public void updatePriorities() {
846 TransactionStatus txStatus
= startTransaction(false);
848 Map
<PresenceAbsenceTerm
, Integer
> priorityMap
= new HashMap
<PresenceAbsenceTerm
, Integer
>();
850 priorityMap
.put(PresenceAbsenceTerm
.CULTIVATED_REPORTED_IN_ERROR(), 1);
851 priorityMap
.put(PresenceAbsenceTerm
.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
852 priorityMap
.put(PresenceAbsenceTerm
.INTRODUCED_FORMERLY_INTRODUCED(), 3);
853 priorityMap
.put(PresenceAbsenceTerm
.INTRODUCED_REPORTED_IN_ERROR(), 20);
854 priorityMap
.put(PresenceAbsenceTerm
.NATIVE_REPORTED_IN_ERROR(), 30);
855 priorityMap
.put(PresenceAbsenceTerm
.CULTIVATED(), 45);
856 priorityMap
.put(PresenceAbsenceTerm
.NATIVE_FORMERLY_NATIVE(), 40);
857 priorityMap
.put(PresenceAbsenceTerm
.NATIVE_PRESENCE_QUESTIONABLE(), 60);
858 priorityMap
.put(PresenceAbsenceTerm
.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
859 priorityMap
.put(PresenceAbsenceTerm
.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
860 priorityMap
.put(PresenceAbsenceTerm
.INTRODUCED(), 90);
861 priorityMap
.put(PresenceAbsenceTerm
.INTRODUCED_ADVENTITIOUS(), 100);
862 priorityMap
.put(PresenceAbsenceTerm
.INTRODUCED_NATURALIZED(), 110);
863 priorityMap
.put(PresenceAbsenceTerm
.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
864 priorityMap
.put(PresenceAbsenceTerm
.NATIVE(), 130); // null
865 priorityMap
.put(PresenceAbsenceTerm
.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
867 for(PresenceAbsenceTerm term
: priorityMap
.keySet()) {
869 term
= (PresenceAbsenceTerm
) termService
.load(term
.getUuid());
870 // find the extension
871 Extension priorityExtension
= null;
872 Set
<Extension
> extensions
= term
.getExtensions();
873 for(Extension extension
: extensions
){
874 if (!extension
.getType().equals(ExtensionType
.ORDER())) {
877 int pos
= extension
.getValue().indexOf(EXTENSION_VALUE_PREFIX
);
878 if(pos
== 0){ // if starts with EXTENSION_VALUE_PREFIX
879 priorityExtension
= extension
;
883 if(priorityExtension
== null) {
884 priorityExtension
= Extension
.NewInstance(term
, null, ExtensionType
.ORDER());
886 priorityExtension
.setValue(EXTENSION_VALUE_PREFIX
+ priorityMap
.get(term
));
889 termService
.saveOrUpdate(term
);
890 if (logger
.isDebugEnabled()) {
891 logger
.debug("Priority updated for " + term
.getLabel());
895 commitTransaction(txStatus
);
898 public enum AggregationMode
{