cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/description/TransmissionEngineDistribution.java

   1 // $Id$
   2 /**
   3 * Copyright (C) 2013 EDIT
   4 * European Distributed Institute of Taxonomy
   5 * http://www.e-taxonomy.eu
   6 *
   7 * The contents of this file are subject to the Mozilla Public License Version 1.1
   8 * See LICENSE.TXT at the top of this package for the full license terms.
   9 */
  10 package eu.etaxonomy.cdm.api.service.description;
  11
  12 import java.util.ArrayList;
  13 import java.util.Arrays;
  14 import java.util.HashMap;
  15 import java.util.HashSet;
  16 import java.util.Iterator;
  17 import java.util.List;
  18 import java.util.Map;
  19 import java.util.Set;
  20 import java.util.UUID;
  21
  22 import org.apache.log4j.Level;
  23 import org.apache.log4j.Logger;
  24 import org.hibernate.FlushMode;
  25 import org.hibernate.HibernateException;
  26 import org.hibernate.Session;
  27 import org.hibernate.engine.spi.SessionFactoryImplementor;
  28 import org.hibernate.search.Search;
  29 import org.springframework.beans.factory.annotation.Autowired;
  30 import org.springframework.orm.hibernate5.HibernateTransactionManager;
  31 import org.springframework.stereotype.Service;
  32 import org.springframework.transaction.TransactionDefinition;
  33 import org.springframework.transaction.TransactionStatus;
  34 import org.springframework.transaction.support.DefaultTransactionDefinition;
  35
  36 import eu.etaxonomy.cdm.api.service.IClassificationService;
  37 import eu.etaxonomy.cdm.api.service.IDescriptionService;
  38 import eu.etaxonomy.cdm.api.service.INameService;
  39 import eu.etaxonomy.cdm.api.service.ITaxonService;
  40 import eu.etaxonomy.cdm.api.service.ITermService;
  41 import eu.etaxonomy.cdm.api.service.pager.Pager;
  42 import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
  43 import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
  44 import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
  45 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
  46 import eu.etaxonomy.cdm.model.common.Extension;
  47 import eu.etaxonomy.cdm.model.common.ExtensionType;
  48 import eu.etaxonomy.cdm.model.common.Marker;
  49 import eu.etaxonomy.cdm.model.common.MarkerType;
  50 import eu.etaxonomy.cdm.model.common.OrderedTermBase;
  51 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
  52 import eu.etaxonomy.cdm.model.description.Distribution;
  53 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
  54 import eu.etaxonomy.cdm.model.description.TaxonDescription;
  55 import eu.etaxonomy.cdm.model.location.NamedArea;
  56 import eu.etaxonomy.cdm.model.name.Rank;
  57 import eu.etaxonomy.cdm.model.taxon.Classification;
  58 import eu.etaxonomy.cdm.model.taxon.Taxon;
  59 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
  60 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
  61 import eu.etaxonomy.cdm.persistence.dao.taxon.IClassificationDao;
  62 import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
  63
  64 /**
  65  * The TransmissionEngineDistribution is meant to be used from within a service class.
  66  *
  67  * <h2>GENERAL NOTES </h2>
  68  * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
  69  * version 14 written in Visual Basic and still need to be
  70  * adapted to the java version of the transmission engine!</em>
  71  *
  72  * <h3>summaryStatus</h3>
  73  *
  74  *   Each distribution information has a summaryStatus, this is an summary of the status codes
  75  *   as stored in the fields of emOccurrence native, introduced, cultivated, ...
  76  *   The summaryStatus seems to be equivalent to  the CDM DistributionStatus
  77  *
  78  * <h3>map generation</h3>
  79  *
  80  *   When generating maps from the accumulated distribution information some special cases have to be handled:
  81  * <ol>
  82  *   <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
  83  *       data is available, the calculated data has to be given preference over other data.
  84  *   </li>
  85  *   <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
  86  *       status should be shown in the map, whereas the super area should be ignored.
  87  *   </li>
  88  * </ol>
  89  *
  90  * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
  91  * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
  92  * @date Feb 22, 2013
  93  */
  94 @Service
  95
  96 public class TransmissionEngineDistribution { //TODO extends IoBase?
  97
  98     public static final String EXTENSION_VALUE_PREFIX = "transmissionEngineDistribution.priority:";
  99
 100     public static final Logger logger = Logger.getLogger(TransmissionEngineDistribution.class);
 101
 102     /**
 103      * only used for performance testing
 104      */
 105     final boolean ONLY_FISRT_BATCH = false;
 106
 107
 108     protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
 109             "description.markers.markerType",
 110             "description.elements.markers.markerType",
 111             "description.elements.area",
 112             "description.elements.sources.citation.authorship",
 113             "description.elements.sources.nameUsedInSource",
 114             "description.elements.multilanguageText",
 115             "name.status.type",
 116     });
 117
 118
 119     /**
 120      * A map which contains the status terms as key and the priority as value
 121      * The map will contain both, the PresenceTerms and the AbsenceTerms
 122      */
 123     private Map<PresenceAbsenceTerm, Integer> statusPriorityMap = null;
 124
 125     @Autowired
 126     private IDescriptionService descriptionService;
 127
 128     @Autowired
 129     private ITermService termService;
 130
 131     @Autowired
 132     private ITaxonService taxonService;
 133
 134     @Autowired
 135     private IClassificationService classificationService;
 136
 137     @Autowired
 138     private IClassificationDao classificationDao;
 139
 140     @Autowired
 141     private INameService mameService;
 142
 143     @Autowired
 144     private HibernateTransactionManager transactionManager;
 145
 146     private List<PresenceAbsenceTerm> byAreaIgnoreStatusList = null;
 147
 148     private List<PresenceAbsenceTerm> byRankIgnoreStatusList = null;
 149
 150     private final Map<NamedArea, Set<NamedArea>> subAreaMap = new HashMap<NamedArea, Set<NamedArea>>();
 151
 152
 153     /**
 154      * byAreaIgnoreStatusList contains by default:
 155      *  <ul>
 156      *    <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
 157      *    <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
 158      *    <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
 159      *    <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
 160      *    <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
 161      *  </ul>
 162      *
 163      * @return the byAreaIgnoreStatusList
 164      */
 165     public List<PresenceAbsenceTerm> getByAreaIgnoreStatusList() {
 166         if(byAreaIgnoreStatusList == null ){
 167             byAreaIgnoreStatusList = Arrays.asList(
 168                     new PresenceAbsenceTerm[] {
 169                                 PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(),
 170                                 PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(),
 171                                 PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(),
 172                                 PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(),
 173                                 PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE()
 174                             // TODO what about PresenceAbsenceTerm.ABSENT() also ignore?
 175                     });
 176         }
 177         return byAreaIgnoreStatusList;
 178     }
 179
 180     /**
 181      * @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
 182      */
 183     public void setByAreaIgnoreStatusList(List<PresenceAbsenceTerm> byAreaIgnoreStatusList) {
 184         this.byAreaIgnoreStatusList = byAreaIgnoreStatusList;
 185     }
 186
 187     /**
 188      * byRankIgnoreStatusList contains by default
 189      *  <ul>
 190      *    <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
 191      *  </ul>
 192      *
 193      * @return the byRankIgnoreStatusList
 194      */
 195     public List<PresenceAbsenceTerm> getByRankIgnoreStatusList() {
 196
 197         if (byRankIgnoreStatusList == null) {
 198             byRankIgnoreStatusList = Arrays.asList(
 199                     new PresenceAbsenceTerm[] {
 200                                 PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()
 201                     });
 202         }
 203         return byRankIgnoreStatusList;
 204     }
 205
 206     /**
 207      * @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
 208      */
 209     public void setByRankIgnoreStatusList(List<PresenceAbsenceTerm> byRankIgnoreStatusList) {
 210         this.byRankIgnoreStatusList = byRankIgnoreStatusList;
 211     }
 212
 213     /**
 214      *
 215      * @param superAreas
 216      */
 217     public TransmissionEngineDistribution() {
 218     }
 219
 220     /**
 221      * initializes the map which contains the status terms as key and the priority as value
 222      * The map will contain both, the PresenceTerms and the AbsenceTerms
 223      */
 224     private void initializeStatusPriorityMap() {
 225
 226         statusPriorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
 227         Integer priority;
 228
 229         // PresenceTerms
 230         for(PresenceAbsenceTerm term : termService.list(PresenceAbsenceTerm.class, null, null, null, null)){
 231             priority = getPriorityFor(term);
 232             if(priority != null){
 233                 statusPriorityMap.put(term, priority);
 234             }
 235         }
 236     }
 237
 238     /**
 239      * Compares the PresenceAbsenceTermBase terms <code>a</code> and <code>b</code>  and
 240      * returns the PresenceAbsenceTermBase with the higher priority as stored in the statusPriorityMap.
 241      * If either a or b are null b or a is returned.
 242      *
 243      * @see initializeStatusPriorityMap()
 244      *
 245      * @param a
 246      * @param b
 247      * @return
 248      */
 249     private PresenceAbsenceTerm choosePreferred(PresenceAbsenceTerm a, PresenceAbsenceTerm b){
 250
 251         if (statusPriorityMap == null) {
 252             initializeStatusPriorityMap();
 253         }
 254
 255         if (b == null) {
 256             return a;
 257         }
 258         if (a == null) {
 259             return b;
 260         }
 261
 262         if (statusPriorityMap.get(a) == null) {
 263             logger.warn("No priority found in map for " + a.getLabel());
 264             return b;
 265         }
 266         if (statusPriorityMap.get(b) == null) {
 267             logger.warn("No priority found in map for " + b.getLabel());
 268             return a;
 269         }
 270         if(statusPriorityMap.get(a) > statusPriorityMap.get(b)){
 271             return a;
 272         } else {
 273             return b;
 274         }
 275     }
 276
 277     /**
 278      * reads the priority for the given status term from the extensions.
 279      *
 280      * @param term
 281      * @return the priority value
 282      */
 283     private Integer getPriorityFor(DefinedTermBase<?> term) {
 284         Set<Extension> extensions = term.getExtensions();
 285         for(Extension extension : extensions){
 286             if(!extension.getType().equals(ExtensionType.ORDER())) {
 287                 continue;
 288             }
 289             int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
 290             if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
 291                 try {
 292                     Integer priority = Integer.valueOf(extension.getValue().substring(EXTENSION_VALUE_PREFIX.length()));
 293                     return priority;
 294                 } catch (NumberFormatException e) {
 295                     logger.warn("Invalid number format in Extension:" + extension.getValue());
 296                 }
 297             }
 298         }
 299         logger.warn("no priority defined for '" + term.getLabel() + "'");
 300         return null;
 301     }
 302
 303     /**
 304      * runs both steps
 305      * <ul>
 306      * <li>Step 1: Accumulate occurrence records by area</li>
 307      * <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
 308      * the status of all children are accumulated on each rank starting from
 309      * lower rank to upper rank.</li>
 310      * </ul>
 311      *
 312      * @param superAreas
 313      *            the areas to which the subordinate areas should be projected.
 314      * @param lowerRank
 315      * @param upperRank
 316      * @param classification
 317      * @param classification
 318      *            limit the accumulation process to a specific classification
 319      *            (not yet implemented)
 320      * @param monitor
 321      *            the progress monitor to use for reporting progress to the
 322      *            user. It is the caller's responsibility to call done() on the
 323      *            given monitor. Accepts null, indicating that no progress
 324      *            should be reported and that the operation cannot be cancelled.
 325      */
 326     public void accumulate(AggregationMode mode, List<NamedArea> superAreas, Rank lowerRank, Rank upperRank,
 327             Classification classification, IProgressMonitor monitor) {
 328
 329         if (monitor == null) {
 330             monitor = new NullProgressMonitor();
 331         }
 332
 333         logger.setLevel(Level.INFO); // TRACE will slow down a lot since it forces loading all term representations
 334
 335         logger.info("Hibernate JDBC Batch size: "
 336                 + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
 337
 338         // only for debugging:
 339         logger.setLevel(Level.INFO);
 340         //Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
 341
 342         Set<Classification> classifications = new HashSet<Classification>();
 343         if(classification == null) {
 344             classifications.addAll(classificationService.listClassifications(null, null, null, null));
 345         } else {
 346             classifications.add(classification);
 347         }
 348
 349         int aggregationWorkTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
 350
 351         // take start time for performance testing
 352         // NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
 353         double start = System.currentTimeMillis();
 354
 355         monitor.beginTask("Accumulating distributions", (classifications.size() * aggregationWorkTicks) + 1 );
 356         updatePriorities();
 357         monitor.worked(1);
 358
 359         for(Classification _classification : classifications) {
 360
 361             ClassificationLookupDTO classificationLookupDao = classificationDao.classificationLookup(_classification);
 362
 363             monitor.subTask("Accumulating distributions to super areas for " + _classification.getTitleCache());
 364             if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
 365                 accumulateByArea(superAreas, classificationLookupDao, new SubProgressMonitor(monitor, 200),
 366                         mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks));
 367             }
 368             monitor.subTask("Accumulating distributions to higher ranks for " + _classification.getTitleCache());
 369
 370             double end1 = System.currentTimeMillis();
 371
 372             logger.info("Time elapsed for accumulateByArea() : " + (end1 - start) / (1000) + "s");
 373
 374             double start2 = System.currentTimeMillis();
 375             if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
 376                 accumulateByRank(lowerRank, upperRank, classification, new SubProgressMonitor(monitor, 200),
 377                         mode.equals(AggregationMode.byRanks));
 378             }
 379
 380             double end2 = System.currentTimeMillis();
 381             logger.info("Time elapsed for accumulateByRank() : " + (end2 - start2) / (1000) + "s");
 382             logger.info("Time elapsed for accumulate(): " + (end2 - start) / (1000) + "s");
 383
 384             if(ONLY_FISRT_BATCH) {
 385                 break;
 386             }
 387         }
 388     }
 389
 390     /**
 391      * @return
 392      */
 393     private Session getSession() {
 394         return descriptionService.getSession();
 395     }
 396
 397     /**
 398      * Step 1: Accumulate occurrence records by area
 399      * <ul>
 400      * <li>areas are projected to super areas e.g.:  HS <-- HS(A), HS(G), HS(S)</li>
 401      * <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
 402      * <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
 403      * <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
 404      * <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
 405      * <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
 406      * </ul>
 407      *
 408      * @param superAreas
 409      *      the areas to which the subordinate areas should be projected
 410      * @param classificationLookupDao
 411      *
 412      */
 413     protected void accumulateByArea(List<NamedArea> superAreas, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
 414
 415         int batchSize = 1000;
 416
 417         TransactionStatus txStatus = startTransaction(false);
 418
 419         // reload superAreas TODO is it faster to getSession().merge(object) ??
 420         Set<UUID> superAreaUuids = new HashSet<UUID>(superAreas.size());
 421         for (NamedArea superArea : superAreas){
 422             superAreaUuids.add(superArea.getUuid());
 423         }
 424         List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
 425
 426         // visit all accepted taxa
 427         subMonitor.beginTask("Accumulating by area ",  classificationLookupDao.getTaxonIds().size());
 428         Iterator<Integer> taxonIdIterator = classificationLookupDao.getTaxonIds().iterator();
 429
 430         int pageIndex = 0;
 431         while (taxonIdIterator.hasNext()) {
 432         while (!isLastPage) {
 433
 434             if(txStatus == null) {
 435                 // transaction has been comitted at the end of this batch, start a new one
 436                 txStatus = startTransaction(false);
 437             }
 438
 439             // load taxa for this batch
 440             List<TaxonBase> taxa = new ArrayList<TaxonBase>(batchSize);
 441             Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
 442             while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
 443                 taxonIds.add(taxonIdIterator.next());
 444             }
 445
 446 //            logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
 447
 448             taxa = taxonService.listByIds(taxonIds, null, null, null, TAXONDESCRIPTION_INIT_STRATEGY);
 449
 450             // iterate over the taxa and accumulate areas
 451             for(TaxonBase taxon : taxa) {
 452                 if(logger.isDebugEnabled()){
 453                     logger.debug("accumulateByArea() - taxon :" + taxonToString(taxon));
 454                 }
 455
 456                 TaxonDescription description = findComputedDescription((Taxon)taxon, doClearDescriptions);
 457                 List<Distribution> distributions = distributionsFor((Taxon)taxon);
 458
 459                 // Step through superAreas for accumulation of subAreas
 460                 for (NamedArea superArea : superAreaList){
 461
 462                     // accumulate all sub area status
 463                     PresenceAbsenceTerm accumulatedStatus = null;
 464                     // TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
 465                     Set<NamedArea> subAreas = getSubAreasFor(superArea);
 466                     for(NamedArea subArea : subAreas){
 467                         if(logger.isTraceEnabled()){
 468                             logger.trace("accumulateByArea() - \t\t" + termToString(subArea));
 469                         }
 470                         // step through all distributions for the given subArea
 471                         for(Distribution distribution : distributions){
 472                             if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
 473                                 PresenceAbsenceTerm status = distribution.getStatus();
 474                                 if(logger.isTraceEnabled()){
 475                                     logger.trace("accumulateByArea() - \t\t" + termToString(subArea) + ": " + termToString(status));
 476                                 }
 477                                 // skip all having a status value different of those in byAreaIgnoreStatusList
 478                                 if (getByAreaIgnoreStatusList().contains(status)){
 479                                     continue;
 480                                 }
 481                                 accumulatedStatus = choosePreferred(accumulatedStatus, status);
 482                             }
 483                         }
 484                     } // next sub area
 485                     if (accumulatedStatus != null) {
 486                         if(logger.isDebugEnabled()){
 487                             logger.debug("accumulateByArea() - \t >> " + termToString(superArea) + ": " + termToString(accumulatedStatus));
 488                         }
 489                         // store new distribution element for superArea in taxon description
 490                         Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatus);
 491                         newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
 492                         description.addElement(newDistribitionElement);
 493                     }
 494
 495                 } // next super area ....
 496
 497                 descriptionService.saveOrUpdate(description);
 498                 taxonService.saveOrUpdate(taxon);
 499                 subMonitor.worked(1);
 500
 501             } // next taxon
 502
 503             flushAndClear();
 504
 505             // commit for every batch, otherwise the persistent context
 506             // may grow too much and eats up all the heap
 507             commitTransaction(txStatus);
 508             txStatus = null;
 509
 510             if(ONLY_FISRT_BATCH) {
 511                 break;
 512             }
 513
 514         } // next batch of taxa
 515
 516         subMonitor.done();
 517     }
 518
 519     /**
 520      * @param taxon
 521      * @param logger2
 522      * @return
 523      */
 524     private String taxonToString(TaxonBase taxon) {
 525         if(logger.isTraceEnabled()) {
 526             return taxon.getTitleCache();
 527         } else {
 528             return taxon.toString();
 529         }
 530     }
 531
 532     /**
 533      * @param taxon
 534      * @param logger2
 535      * @return
 536      */
 537     private String termToString(OrderedTermBase<?> term) {
 538         if(logger.isTraceEnabled()) {
 539             return term.getLabel() + " [" + term.getIdInVocabulary() + "]";
 540         } else {
 541             return term.getIdInVocabulary();
 542         }
 543     }
 544
 545    /**
 546     * Step 2: Accumulate by ranks staring from lower rank to upper rank, the status of all children
 547     * are accumulated on each rank starting from lower rank to upper rank.
 548     * <ul>
 549     * <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
 550     *    up to upper rank (e.g. Genus)</li>
 551     *  <li>the accumulation id done for each distribution area found in the included taxa</li>
 552     *  <li>areas of subtaxa with status endemic are ignored</li>
 553     *  <li>the status with the highest priority determines the value for the accumulated distribution</li>
 554     *  <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
 555     *    this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
 556     *</ul>
 557     */
 558     protected void accumulateByRank(Rank lowerRank, Rank upperRank, Classification classification,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
 559
 560         int batchSize = 500;
 561
 562         TransactionStatus txStatus = startTransaction(false);
 563
 564         // the loadRankSpecificRootNodes() method not only finds
 565         // taxa of the specified rank but also taxa of lower ranks
 566         // if no taxon of the specified rank exists, so we need to
 567         // remember which taxa have been processed already
 568         Set<Integer> taxaProcessedIds = new HashSet<Integer>();
 569
 570         Rank currentRank = lowerRank;
 571         List<Rank> ranks = new ArrayList<Rank>();
 572         ranks.add(currentRank);
 573         while (!currentRank.isHigher(upperRank)) {
 574             currentRank = findNextHigherRank(currentRank);
 575             ranks.add(currentRank);
 576         }
 577
 578         int ticksPerRank = 100;
 579         subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
 580
 581         for (Rank rank : ranks) {
 582
 583             if(logger.isDebugEnabled()){
 584                 logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
 585             }
 586
 587             Pager<TaxonNode> taxonPager = null;
 588             int pageIndex = 0;
 589             boolean isLastPage = false;
 590             SubProgressMonitor taxonSubMonitor = null;
 591             while (!isLastPage) {
 592
 593                 if(txStatus == null) {
 594                     // transaction has been comitted at the end of this batch, start a new one
 595                     txStatus = startTransaction(false);
 596                 }
 597
 598                 taxonPager = classificationService
 599                         .pageRankSpecificRootNodes(classification, rank, batchSize, pageIndex++, null);
 600
 601                 if(taxonSubMonitor == null) {
 602                     taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
 603                     taxonSubMonitor.beginTask("Accumulating by rank " + rank.getLabel(), taxonPager.getCount().intValue());
 604
 605                 }
 606
 607                 if(taxonPager != null){
 608                     if(logger.isDebugEnabled()){
 609                                logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
 610                     }
 611                 } else {
 612                     logger.error("accumulateByRank() - taxonNode pager was NULL");
 613                 }
 614
 615                 if(taxonPager != null){
 616                     isLastPage = taxonPager.getRecords().size() < batchSize;
 617                     if (taxonPager.getRecords().size() == 0){
 618                         break;
 619                     }
 620
 621                     for(TaxonNode taxonNode : taxonPager.getRecords()) {
 622
 623                         Taxon taxon = taxonNode.getTaxon();
 624                         if (taxaProcessedIds.contains(taxon.getId())) {
 625                             if(logger.isDebugEnabled()){
 626                                 logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
 627                             }
 628                             continue;
 629                         }
 630                         taxaProcessedIds.add(taxon.getId());
 631                         if(logger.isDebugEnabled()){
 632                             logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
 633                         }
 634
 635                         // Step through direct taxonomic children for accumulation
 636                         Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>();
 637
 638                         for (TaxonNode subTaxonNode : taxonNode.getChildNodes()){
 639
 640                             getSession().setReadOnly(taxonNode, true);
 641                             if(logger.isTraceEnabled()){
 642                                 logger.trace("                   subtaxon :" + taxonToString(subTaxonNode.getTaxon()));
 643                             }
 644
 645                             for(Distribution distribution : distributionsFor(subTaxonNode.getTaxon()) ) {
 646                                 PresenceAbsenceTerm status = distribution.getStatus();
 647                                 NamedArea area = distribution.getArea();
 648                                 if (status == null || getByRankIgnoreStatusList().contains(status)){
 649                                   continue;
 650                                 }
 651                                 accumulatedStatusMap.put(area, choosePreferred(accumulatedStatusMap.get(area), status));
 652                              }
 653                         }
 654
 655                         if(accumulatedStatusMap.size() > 0) {
 656                             TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
 657                             for (NamedArea area : accumulatedStatusMap.keySet()) {
 658                                 // store new distribution element in new Description
 659                                 Distribution newDistribitionElement = Distribution.NewInstance(area, accumulatedStatusMap.get(area));
 660                                 newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
 661                                 description.addElement(newDistribitionElement);
 662                             }
 663                             taxonService.saveOrUpdate(taxon);
 664                             descriptionService.saveOrUpdate(description);
 665                         }
 666                         taxonSubMonitor.worked(1); // one taxon worked
 667
 668                     } // next taxon node ....
 669                 }
 670                 taxonPager = null;
 671                 flushAndClear();
 672
 673                 // commit for every batch, otherwise the persistent context
 674                 // may grow too much and eats up all the heap
 675                 commitTransaction(txStatus);
 676                 txStatus = null;
 677
 678                 if(ONLY_FISRT_BATCH) {
 679                     break;
 680                 }
 681             } // next batch
 682
 683             taxonSubMonitor.done();
 684             subMonitor.worked(1);
 685
 686             if(ONLY_FISRT_BATCH) {
 687                 break;
 688             }
 689         } // next Rank
 690
 691         subMonitor.done();
 692     }
 693
 694     /**
 695      *
 696      */
 697     private void flushAndClear() {
 698         logger.debug("flushing and clearing session ...");
 699         getSession().flush();
 700         try {
 701             Search.getFullTextSession(getSession()).flushToIndexes();
 702         } catch (HibernateException e) {
 703             /* IGNORE - Hibernate Search Event listeners not configured ... */
 704             if(!e.getMessage().startsWith("Hibernate Search Event listeners not configured")){
 705                 throw e;
 706             }
 707         }
 708         getSession().clear();
 709     }
 710
 711
 712     // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
 713     public TransactionStatus startTransaction(Boolean readOnly) {
 714
 715         DefaultTransactionDefinition defaultTxDef = new DefaultTransactionDefinition();
 716         defaultTxDef.setReadOnly(readOnly);
 717         TransactionDefinition txDef = defaultTxDef;
 718
 719         // Log some transaction-related debug information.
 720         if (logger.isTraceEnabled()) {
 721             logger.trace("Transaction name = " + txDef.getName());
 722             logger.trace("Transaction facets:");
 723             logger.trace("Propagation behavior = " + txDef.getPropagationBehavior());
 724             logger.trace("Isolation level = " + txDef.getIsolationLevel());
 725             logger.trace("Timeout = " + txDef.getTimeout());
 726             logger.trace("Read Only = " + txDef.isReadOnly());
 727             // org.springframework.orm.hibernate5.HibernateTransactionManager
 728             // provides more transaction/session-related debug information.
 729         }
 730
 731         TransactionStatus txStatus = transactionManager.getTransaction(txDef);
 732
 733         getSession().setFlushMode(FlushMode.COMMIT);
 734
 735         return txStatus;
 736     }
 737
 738     // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
 739     public void commitTransaction(TransactionStatus txStatus){
 740         logger.debug("commiting transaction ...");
 741         transactionManager.commit(txStatus);
 742         return;
 743     }
 744
 745     /**
 746      * returns the next higher rank
 747      *
 748      * TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
 749      *
 750      * @param rank
 751      * @return
 752      */
 753     private Rank findNextHigherRank(Rank rank) {
 754         rank = (Rank) termService.load(rank.getUuid());
 755         return rank.getNextHigherTerm();
 756 //        OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
 757 //        return rankVocabulary.getNextHigherTerm(rank);
 758     }
 759
 760     /**
 761      * Either finds an existing taxon description of the given taxon or creates a new one.
 762      * If the doClear is set all existing description elements will be cleared.
 763      *
 764      * @param taxon
 765      * @param doClear will remove all existing Distributions if the taxon already
 766      * has a MarkerType.COMPUTED() TaxonDescription
 767      * @return
 768      */
 769     private TaxonDescription findComputedDescription(Taxon taxon, boolean doClear) {
 770
 771         String descriptionTitle = this.getClass().getSimpleName();
 772
 773         // find existing one
 774         for (TaxonDescription description : taxon.getDescriptions()) {
 775             if (description.hasMarker(MarkerType.COMPUTED(), true)) {
 776                 logger.debug("reusing description for " + taxon.getTitleCache());
 777                 if (doClear) {
 778                     int deleteCount = 0;
 779                     Set<DescriptionElementBase> deleteCandidates = new HashSet<DescriptionElementBase>();
 780                     for (DescriptionElementBase descriptionElement : description.getElements()) {
 781                         if(descriptionElement instanceof Distribution) {
 782                             deleteCandidates.add(descriptionElement);
 783                         }
 784                     }
 785                     if(deleteCandidates.size() > 0){
 786                         for(DescriptionElementBase descriptionElement : deleteCandidates) {
 787                             description.removeElement(descriptionElement);
 788                             descriptionService.deleteDescriptionElement(descriptionElement);
 789                             descriptionElement = null;
 790                             deleteCount++;
 791                         }
 792                         descriptionService.saveOrUpdate(description);
 793                         logger.debug("\t" + deleteCount +" distributions cleared");
 794                     }
 795
 796                 }
 797                 return description;
 798             }
 799         }
 800
 801         // create a new one
 802         logger.debug("creating new description for " + taxon.getTitleCache());
 803         TaxonDescription description = TaxonDescription.NewInstance(taxon);
 804         description.setTitleCache(descriptionTitle, true);
 805         description.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
 806         return description;
 807     }
 808
 809     /**
 810      * @param superArea
 811      * @return
 812      */
 813     private Set<NamedArea> getSubAreasFor(NamedArea superArea) {
 814
 815         if(!subAreaMap.containsKey(superArea)) {
 816             if(logger.isDebugEnabled()){
 817                 logger.debug("loading included areas for " + superArea.getLabel());
 818             }
 819             subAreaMap.put(superArea, superArea.getIncludes());
 820         }
 821         return subAreaMap.get(superArea);
 822     }
 823
 824     /**
 825      * @param taxon
 826      * @return
 827      */
 828     private List<Distribution> distributionsFor(Taxon taxon) {
 829         List<Distribution> distributions = new ArrayList<Distribution>();
 830         for(TaxonDescription description: taxon.getDescriptions()) {
 831             for(DescriptionElementBase deb : description.getElements()) {
 832                 if(deb instanceof Distribution) {
 833                     distributions.add((Distribution)deb);
 834                 }
 835             }
 836         }
 837         return distributions;
 838     }
 839
 840     /**
 841      * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
 842      * This method will start a new transaction and commits it after the work is done.
 843      */
 844     public void updatePriorities() {
 845
 846         TransactionStatus txStatus = startTransaction(false);
 847
 848         Map<PresenceAbsenceTerm, Integer> priorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
 849
 850         priorityMap.put(PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(), 1);
 851         priorityMap.put(PresenceAbsenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
 852         priorityMap.put(PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(), 3);
 853         priorityMap.put(PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(), 20);
 854         priorityMap.put(PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(), 30);
 855         priorityMap.put(PresenceAbsenceTerm.CULTIVATED(), 45);
 856         priorityMap.put(PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE(), 40);
 857         priorityMap.put(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE(), 60);
 858         priorityMap.put(PresenceAbsenceTerm.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
 859         priorityMap.put(PresenceAbsenceTerm.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
 860         priorityMap.put(PresenceAbsenceTerm.INTRODUCED(), 90);
 861         priorityMap.put(PresenceAbsenceTerm.INTRODUCED_ADVENTITIOUS(), 100);
 862         priorityMap.put(PresenceAbsenceTerm.INTRODUCED_NATURALIZED(), 110);
 863         priorityMap.put(PresenceAbsenceTerm.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
 864         priorityMap.put(PresenceAbsenceTerm.NATIVE(), 130); // null
 865         priorityMap.put(PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
 866
 867         for(PresenceAbsenceTerm term : priorityMap.keySet()) {
 868             // load the term
 869             term = (PresenceAbsenceTerm) termService.load(term.getUuid());
 870             // find the extension
 871             Extension priorityExtension = null;
 872             Set<Extension> extensions = term.getExtensions();
 873             for(Extension extension : extensions){
 874                 if (!extension.getType().equals(ExtensionType.ORDER())) {
 875                     continue;
 876                 }
 877                 int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
 878                 if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
 879                     priorityExtension = extension;
 880                     break;
 881                 }
 882             }
 883             if(priorityExtension == null) {
 884                 priorityExtension = Extension.NewInstance(term, null, ExtensionType.ORDER());
 885             }
 886             priorityExtension.setValue(EXTENSION_VALUE_PREFIX + priorityMap.get(term));
 887
 888             // save the term
 889             termService.saveOrUpdate(term);
 890             if (logger.isDebugEnabled()) {
 891                 logger.debug("Priority updated for " + term.getLabel());
 892             }
 893         }
 894
 895         commitTransaction(txStatus);
 896     }
 897
 898     public enum AggregationMode {
 899         byAreas,
 900         byRanks,
 901         byAreasAndRanks
 902
 903     }
 904 }