attempt to reduce the overhead imposed by database access
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / description / TransmissionEngineDistribution.java
1 // $Id$
2 /**
3 * Copyright (C) 2013 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.api.service.description;
11
12 import java.util.ArrayList;
13 import java.util.Arrays;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Iterator;
17 import java.util.List;
18 import java.util.Map;
19 import java.util.Set;
20 import java.util.UUID;
21
22 import org.apache.log4j.Level;
23 import org.apache.log4j.Logger;
24 import org.hibernate.FlushMode;
25 import org.hibernate.HibernateException;
26 import org.hibernate.Session;
27 import org.hibernate.engine.spi.SessionFactoryImplementor;
28 import org.hibernate.search.Search;
29 import org.springframework.beans.factory.annotation.Autowired;
30 import org.springframework.orm.hibernate5.HibernateTransactionManager;
31 import org.springframework.stereotype.Service;
32 import org.springframework.transaction.TransactionDefinition;
33 import org.springframework.transaction.TransactionStatus;
34 import org.springframework.transaction.support.DefaultTransactionDefinition;
35
36 import eu.etaxonomy.cdm.api.service.IClassificationService;
37 import eu.etaxonomy.cdm.api.service.IDescriptionService;
38 import eu.etaxonomy.cdm.api.service.INameService;
39 import eu.etaxonomy.cdm.api.service.ITaxonService;
40 import eu.etaxonomy.cdm.api.service.ITermService;
41 import eu.etaxonomy.cdm.api.service.pager.Pager;
42 import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
43 import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
44 import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
45 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
46 import eu.etaxonomy.cdm.model.common.Extension;
47 import eu.etaxonomy.cdm.model.common.ExtensionType;
48 import eu.etaxonomy.cdm.model.common.Marker;
49 import eu.etaxonomy.cdm.model.common.MarkerType;
50 import eu.etaxonomy.cdm.model.common.OrderedTermBase;
51 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
52 import eu.etaxonomy.cdm.model.description.Distribution;
53 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
54 import eu.etaxonomy.cdm.model.description.TaxonDescription;
55 import eu.etaxonomy.cdm.model.location.NamedArea;
56 import eu.etaxonomy.cdm.model.name.Rank;
57 import eu.etaxonomy.cdm.model.taxon.Classification;
58 import eu.etaxonomy.cdm.model.taxon.Taxon;
59 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
60 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
61 import eu.etaxonomy.cdm.persistence.dao.taxon.IClassificationDao;
62 import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
63
64 /**
65 * The TransmissionEngineDistribution is meant to be used from within a service class.
66 *
67 * <h2>GENERAL NOTES </h2>
68 * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
69 * version 14 written in Visual Basic and still need to be
70 * adapted to the java version of the transmission engine!</em>
71 *
72 * <h3>summaryStatus</h3>
73 *
74 * Each distribution information has a summaryStatus, this is an summary of the status codes
75 * as stored in the fields of emOccurrence native, introduced, cultivated, ...
76 * The summaryStatus seems to be equivalent to the CDM DistributionStatus
77 *
78 * <h3>map generation</h3>
79 *
80 * When generating maps from the accumulated distribution information some special cases have to be handled:
81 * <ol>
82 * <li>if a entered or imported status information exist for the same area for which calculated (accumulated)
83 * data is available, the calculated data has to be given preference over other data.
84 * </li>
85 * <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
86 * status should be shown in the map, whereas the super area should be ignored.
87 * </li>
88 * </ol>
89 *
90 * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
91 * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
92 * @date Feb 22, 2013
93 */
94 @Service
95
96 public class TransmissionEngineDistribution { //TODO extends IoBase?
97
98 public static final String EXTENSION_VALUE_PREFIX = "transmissionEngineDistribution.priority:";
99
100 public static final Logger logger = Logger.getLogger(TransmissionEngineDistribution.class);
101
102 /**
103 * only used for performance testing
104 */
105 final boolean ONLY_FISRT_BATCH = false;
106
107
108 protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
109 "description.markers.markerType",
110 "description.elements.markers.markerType",
111 "description.elements.area",
112 "description.elements.sources.citation.authorship",
113 "description.elements.sources.nameUsedInSource",
114 "description.elements.multilanguageText",
115 "name.status.type",
116 });
117
118
119 /**
120 * A map which contains the status terms as key and the priority as value
121 * The map will contain both, the PresenceTerms and the AbsenceTerms
122 */
123 private Map<PresenceAbsenceTerm, Integer> statusPriorityMap = null;
124
125 @Autowired
126 private IDescriptionService descriptionService;
127
128 @Autowired
129 private ITermService termService;
130
131 @Autowired
132 private ITaxonService taxonService;
133
134 @Autowired
135 private IClassificationService classificationService;
136
137 @Autowired
138 private IClassificationDao classificationDao;
139
140 @Autowired
141 private INameService mameService;
142
143 @Autowired
144 private HibernateTransactionManager transactionManager;
145
146 private List<PresenceAbsenceTerm> byAreaIgnoreStatusList = null;
147
148 private List<PresenceAbsenceTerm> byRankIgnoreStatusList = null;
149
150 private final Map<NamedArea, Set<NamedArea>> subAreaMap = new HashMap<NamedArea, Set<NamedArea>>();
151
152
153 /**
154 * byAreaIgnoreStatusList contains by default:
155 * <ul>
156 * <li>AbsenceTerm.CULTIVATED_REPORTED_IN_ERROR()</li>
157 * <li>AbsenceTerm.INTRODUCED_REPORTED_IN_ERROR()</li>
158 * <li>AbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED()</li>
159 * <li>AbsenceTerm.NATIVE_REPORTED_IN_ERROR()</li>
160 * <li>AbsenceTerm.NATIVE_FORMERLY_NATIVE()</li>
161 * </ul>
162 *
163 * @return the byAreaIgnoreStatusList
164 */
165 public List<PresenceAbsenceTerm> getByAreaIgnoreStatusList() {
166 if(byAreaIgnoreStatusList == null ){
167 byAreaIgnoreStatusList = Arrays.asList(
168 new PresenceAbsenceTerm[] {
169 PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(),
170 PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(),
171 PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(),
172 PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(),
173 PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE()
174 // TODO what about PresenceAbsenceTerm.ABSENT() also ignore?
175 });
176 }
177 return byAreaIgnoreStatusList;
178 }
179
180 /**
181 * @param byAreaIgnoreStatusList the byAreaIgnoreStatusList to set
182 */
183 public void setByAreaIgnoreStatusList(List<PresenceAbsenceTerm> byAreaIgnoreStatusList) {
184 this.byAreaIgnoreStatusList = byAreaIgnoreStatusList;
185 }
186
187 /**
188 * byRankIgnoreStatusList contains by default
189 * <ul>
190 * <li>PresenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()</li>
191 * </ul>
192 *
193 * @return the byRankIgnoreStatusList
194 */
195 public List<PresenceAbsenceTerm> getByRankIgnoreStatusList() {
196
197 if (byRankIgnoreStatusList == null) {
198 byRankIgnoreStatusList = Arrays.asList(
199 new PresenceAbsenceTerm[] {
200 PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA()
201 });
202 }
203 return byRankIgnoreStatusList;
204 }
205
206 /**
207 * @param byRankIgnoreStatusList the byRankIgnoreStatusList to set
208 */
209 public void setByRankIgnoreStatusList(List<PresenceAbsenceTerm> byRankIgnoreStatusList) {
210 this.byRankIgnoreStatusList = byRankIgnoreStatusList;
211 }
212
213 /**
214 *
215 * @param superAreas
216 */
217 public TransmissionEngineDistribution() {
218 }
219
220 /**
221 * initializes the map which contains the status terms as key and the priority as value
222 * The map will contain both, the PresenceTerms and the AbsenceTerms
223 */
224 private void initializeStatusPriorityMap() {
225
226 statusPriorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
227 Integer priority;
228
229 // PresenceTerms
230 for(PresenceAbsenceTerm term : termService.list(PresenceAbsenceTerm.class, null, null, null, null)){
231 priority = getPriorityFor(term);
232 if(priority != null){
233 statusPriorityMap.put(term, priority);
234 }
235 }
236 }
237
238 /**
239 * Compares the PresenceAbsenceTermBase terms <code>a</code> and <code>b</code> and
240 * returns the PresenceAbsenceTermBase with the higher priority as stored in the statusPriorityMap.
241 * If either a or b are null b or a is returned.
242 *
243 * @see initializeStatusPriorityMap()
244 *
245 * @param a
246 * @param b
247 * @return
248 */
249 private PresenceAbsenceTerm choosePreferred(PresenceAbsenceTerm a, PresenceAbsenceTerm b){
250
251 if (statusPriorityMap == null) {
252 initializeStatusPriorityMap();
253 }
254
255 if (b == null) {
256 return a;
257 }
258 if (a == null) {
259 return b;
260 }
261
262 if (statusPriorityMap.get(a) == null) {
263 logger.warn("No priority found in map for " + a.getLabel());
264 return b;
265 }
266 if (statusPriorityMap.get(b) == null) {
267 logger.warn("No priority found in map for " + b.getLabel());
268 return a;
269 }
270 if(statusPriorityMap.get(a) > statusPriorityMap.get(b)){
271 return a;
272 } else {
273 return b;
274 }
275 }
276
277 /**
278 * reads the priority for the given status term from the extensions.
279 *
280 * @param term
281 * @return the priority value
282 */
283 private Integer getPriorityFor(DefinedTermBase<?> term) {
284 Set<Extension> extensions = term.getExtensions();
285 for(Extension extension : extensions){
286 if(!extension.getType().equals(ExtensionType.ORDER())) {
287 continue;
288 }
289 int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
290 if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
291 try {
292 Integer priority = Integer.valueOf(extension.getValue().substring(EXTENSION_VALUE_PREFIX.length()));
293 return priority;
294 } catch (NumberFormatException e) {
295 logger.warn("Invalid number format in Extension:" + extension.getValue());
296 }
297 }
298 }
299 logger.warn("no priority defined for '" + term.getLabel() + "'");
300 return null;
301 }
302
303 /**
304 * runs both steps
305 * <ul>
306 * <li>Step 1: Accumulate occurrence records by area</li>
307 * <li>Step 2: Accumulate by ranks starting from lower rank to upper rank,
308 * the status of all children are accumulated on each rank starting from
309 * lower rank to upper rank.</li>
310 * </ul>
311 *
312 * @param superAreas
313 * the areas to which the subordinate areas should be projected.
314 * @param lowerRank
315 * @param upperRank
316 * @param classification
317 * @param classification
318 * limit the accumulation process to a specific classification
319 * (not yet implemented)
320 * @param monitor
321 * the progress monitor to use for reporting progress to the
322 * user. It is the caller's responsibility to call done() on the
323 * given monitor. Accepts null, indicating that no progress
324 * should be reported and that the operation cannot be cancelled.
325 */
326 public void accumulate(AggregationMode mode, List<NamedArea> superAreas, Rank lowerRank, Rank upperRank,
327 Classification classification, IProgressMonitor monitor) {
328
329 if (monitor == null) {
330 monitor = new NullProgressMonitor();
331 }
332
333 logger.setLevel(Level.INFO); // TRACE will slow down a lot since it forces loading all term representations
334
335 logger.info("Hibernate JDBC Batch size: "
336 + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
337
338 // only for debugging:
339 logger.setLevel(Level.INFO);
340 //Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
341
342 Set<Classification> classifications = new HashSet<Classification>();
343 if(classification == null) {
344 classifications.addAll(classificationService.listClassifications(null, null, null, null));
345 } else {
346 classifications.add(classification);
347 }
348
349 int aggregationWorkTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
350
351 // take start time for performance testing
352 // NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
353 double start = System.currentTimeMillis();
354
355 monitor.beginTask("Accumulating distributions", (classifications.size() * aggregationWorkTicks) + 1 );
356 updatePriorities();
357 monitor.worked(1);
358
359 for(Classification _classification : classifications) {
360
361 ClassificationLookupDTO classificationLookupDao = classificationDao.classificationLookup(_classification);
362
363 monitor.subTask("Accumulating distributions to super areas for " + _classification.getTitleCache());
364 if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
365 accumulateByArea(superAreas, classificationLookupDao, new SubProgressMonitor(monitor, 200),
366 mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks));
367 }
368 monitor.subTask("Accumulating distributions to higher ranks for " + _classification.getTitleCache());
369
370 double end1 = System.currentTimeMillis();
371
372 logger.info("Time elapsed for accumulateByArea() : " + (end1 - start) / (1000) + "s");
373
374 double start2 = System.currentTimeMillis();
375 if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
376 accumulateByRank(lowerRank, upperRank, classification, new SubProgressMonitor(monitor, 200),
377 mode.equals(AggregationMode.byRanks));
378 }
379
380 double end2 = System.currentTimeMillis();
381 logger.info("Time elapsed for accumulateByRank() : " + (end2 - start2) / (1000) + "s");
382 logger.info("Time elapsed for accumulate(): " + (end2 - start) / (1000) + "s");
383
384 if(ONLY_FISRT_BATCH) {
385 break;
386 }
387 }
388 }
389
390 /**
391 * @return
392 */
393 private Session getSession() {
394 return descriptionService.getSession();
395 }
396
397 /**
398 * Step 1: Accumulate occurrence records by area
399 * <ul>
400 * <li>areas are projected to super areas e.g.: HS <-- HS(A), HS(G), HS(S)</li>
401 * <li>super areas do initially not have a status set ==> Prerequisite to check in CDM</li>
402 * <li>areas having a summary status of summary value different from {@link #getByAreaIgnoreStatusList()} are ignored</li>
403 * <li>areas have a priority value, the status of the area with highest priority determines the status of the super area</li>
404 * <li>the source references of the accumulated distributions are also accumulated into the new distribution,,</li>
405 * <li>this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
406 * </ul>
407 *
408 * @param superAreas
409 * the areas to which the subordinate areas should be projected
410 * @param classificationLookupDao
411 *
412 */
413 protected void accumulateByArea(List<NamedArea> superAreas, ClassificationLookupDTO classificationLookupDao, IProgressMonitor subMonitor, boolean doClearDescriptions) {
414
415 int batchSize = 1000;
416
417 TransactionStatus txStatus = startTransaction(false);
418
419 // reload superAreas TODO is it faster to getSession().merge(object) ??
420 Set<UUID> superAreaUuids = new HashSet<UUID>(superAreas.size());
421 for (NamedArea superArea : superAreas){
422 superAreaUuids.add(superArea.getUuid());
423 }
424 List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
425
426 // visit all accepted taxa
427 subMonitor.beginTask("Accumulating by area ", classificationLookupDao.getTaxonIds().size());
428 Iterator<Integer> taxonIdIterator = classificationLookupDao.getTaxonIds().iterator();
429
430 int pageIndex = 0;
431 while (taxonIdIterator.hasNext()) {
432 while (!isLastPage) {
433
434 if(txStatus == null) {
435 // transaction has been comitted at the end of this batch, start a new one
436 txStatus = startTransaction(false);
437 }
438
439 // load taxa for this batch
440 List<TaxonBase> taxa = new ArrayList<TaxonBase>(batchSize);
441 Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
442 while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
443 taxonIds.add(taxonIdIterator.next());
444 }
445
446 // logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
447
448 taxa = taxonService.listByIds(taxonIds, null, null, null, TAXONDESCRIPTION_INIT_STRATEGY);
449
450 // iterate over the taxa and accumulate areas
451 for(TaxonBase taxon : taxa) {
452 if(logger.isDebugEnabled()){
453 logger.debug("accumulateByArea() - taxon :" + taxonToString(taxon));
454 }
455
456 TaxonDescription description = findComputedDescription((Taxon)taxon, doClearDescriptions);
457 List<Distribution> distributions = distributionsFor((Taxon)taxon);
458
459 // Step through superAreas for accumulation of subAreas
460 for (NamedArea superArea : superAreaList){
461
462 // accumulate all sub area status
463 PresenceAbsenceTerm accumulatedStatus = null;
464 // TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
465 Set<NamedArea> subAreas = getSubAreasFor(superArea);
466 for(NamedArea subArea : subAreas){
467 if(logger.isTraceEnabled()){
468 logger.trace("accumulateByArea() - \t\t" + termToString(subArea));
469 }
470 // step through all distributions for the given subArea
471 for(Distribution distribution : distributions){
472 if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
473 PresenceAbsenceTerm status = distribution.getStatus();
474 if(logger.isTraceEnabled()){
475 logger.trace("accumulateByArea() - \t\t" + termToString(subArea) + ": " + termToString(status));
476 }
477 // skip all having a status value different of those in byAreaIgnoreStatusList
478 if (getByAreaIgnoreStatusList().contains(status)){
479 continue;
480 }
481 accumulatedStatus = choosePreferred(accumulatedStatus, status);
482 }
483 }
484 } // next sub area
485 if (accumulatedStatus != null) {
486 if(logger.isDebugEnabled()){
487 logger.debug("accumulateByArea() - \t >> " + termToString(superArea) + ": " + termToString(accumulatedStatus));
488 }
489 // store new distribution element for superArea in taxon description
490 Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatus);
491 newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
492 description.addElement(newDistribitionElement);
493 }
494
495 } // next super area ....
496
497 descriptionService.saveOrUpdate(description);
498 taxonService.saveOrUpdate(taxon);
499 subMonitor.worked(1);
500
501 } // next taxon
502
503 flushAndClear();
504
505 // commit for every batch, otherwise the persistent context
506 // may grow too much and eats up all the heap
507 commitTransaction(txStatus);
508 txStatus = null;
509
510 if(ONLY_FISRT_BATCH) {
511 break;
512 }
513
514 } // next batch of taxa
515
516 subMonitor.done();
517 }
518
519 /**
520 * @param taxon
521 * @param logger2
522 * @return
523 */
524 private String taxonToString(TaxonBase taxon) {
525 if(logger.isTraceEnabled()) {
526 return taxon.getTitleCache();
527 } else {
528 return taxon.toString();
529 }
530 }
531
532 /**
533 * @param taxon
534 * @param logger2
535 * @return
536 */
537 private String termToString(OrderedTermBase<?> term) {
538 if(logger.isTraceEnabled()) {
539 return term.getLabel() + " [" + term.getIdInVocabulary() + "]";
540 } else {
541 return term.getIdInVocabulary();
542 }
543 }
544
545 /**
546 * Step 2: Accumulate by ranks staring from lower rank to upper rank, the status of all children
547 * are accumulated on each rank starting from lower rank to upper rank.
548 * <ul>
549 * <li>aggregate distribution of included taxa of the next lower rank for any rank level starting from the lower rank (e.g. sub species)
550 * up to upper rank (e.g. Genus)</li>
551 * <li>the accumulation id done for each distribution area found in the included taxa</li>
552 * <li>areas of subtaxa with status endemic are ignored</li>
553 * <li>the status with the highest priority determines the value for the accumulated distribution</li>
554 * <li>the source reference of the accumulated distributions are also accumulated into the new distribution,
555 * this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
556 *</ul>
557 */
558 protected void accumulateByRank(Rank lowerRank, Rank upperRank, Classification classification, IProgressMonitor subMonitor, boolean doClearDescriptions) {
559
560 int batchSize = 500;
561
562 TransactionStatus txStatus = startTransaction(false);
563
564 // the loadRankSpecificRootNodes() method not only finds
565 // taxa of the specified rank but also taxa of lower ranks
566 // if no taxon of the specified rank exists, so we need to
567 // remember which taxa have been processed already
568 Set<Integer> taxaProcessedIds = new HashSet<Integer>();
569
570 Rank currentRank = lowerRank;
571 List<Rank> ranks = new ArrayList<Rank>();
572 ranks.add(currentRank);
573 while (!currentRank.isHigher(upperRank)) {
574 currentRank = findNextHigherRank(currentRank);
575 ranks.add(currentRank);
576 }
577
578 int ticksPerRank = 100;
579 subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
580
581 for (Rank rank : ranks) {
582
583 if(logger.isDebugEnabled()){
584 logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
585 }
586
587 Pager<TaxonNode> taxonPager = null;
588 int pageIndex = 0;
589 boolean isLastPage = false;
590 SubProgressMonitor taxonSubMonitor = null;
591 while (!isLastPage) {
592
593 if(txStatus == null) {
594 // transaction has been comitted at the end of this batch, start a new one
595 txStatus = startTransaction(false);
596 }
597
598 taxonPager = classificationService
599 .pageRankSpecificRootNodes(classification, rank, batchSize, pageIndex++, null);
600
601 if(taxonSubMonitor == null) {
602 taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
603 taxonSubMonitor.beginTask("Accumulating by rank " + rank.getLabel(), taxonPager.getCount().intValue());
604
605 }
606
607 if(taxonPager != null){
608 if(logger.isDebugEnabled()){
609 logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
610 }
611 } else {
612 logger.error("accumulateByRank() - taxonNode pager was NULL");
613 }
614
615 if(taxonPager != null){
616 isLastPage = taxonPager.getRecords().size() < batchSize;
617 if (taxonPager.getRecords().size() == 0){
618 break;
619 }
620
621 for(TaxonNode taxonNode : taxonPager.getRecords()) {
622
623 Taxon taxon = taxonNode.getTaxon();
624 if (taxaProcessedIds.contains(taxon.getId())) {
625 if(logger.isDebugEnabled()){
626 logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
627 }
628 continue;
629 }
630 taxaProcessedIds.add(taxon.getId());
631 if(logger.isDebugEnabled()){
632 logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
633 }
634
635 // Step through direct taxonomic children for accumulation
636 Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>();
637
638 for (TaxonNode subTaxonNode : taxonNode.getChildNodes()){
639
640 getSession().setReadOnly(taxonNode, true);
641 if(logger.isTraceEnabled()){
642 logger.trace(" subtaxon :" + taxonToString(subTaxonNode.getTaxon()));
643 }
644
645 for(Distribution distribution : distributionsFor(subTaxonNode.getTaxon()) ) {
646 PresenceAbsenceTerm status = distribution.getStatus();
647 NamedArea area = distribution.getArea();
648 if (status == null || getByRankIgnoreStatusList().contains(status)){
649 continue;
650 }
651 accumulatedStatusMap.put(area, choosePreferred(accumulatedStatusMap.get(area), status));
652 }
653 }
654
655 if(accumulatedStatusMap.size() > 0) {
656 TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
657 for (NamedArea area : accumulatedStatusMap.keySet()) {
658 // store new distribution element in new Description
659 Distribution newDistribitionElement = Distribution.NewInstance(area, accumulatedStatusMap.get(area));
660 newDistribitionElement.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
661 description.addElement(newDistribitionElement);
662 }
663 taxonService.saveOrUpdate(taxon);
664 descriptionService.saveOrUpdate(description);
665 }
666 taxonSubMonitor.worked(1); // one taxon worked
667
668 } // next taxon node ....
669 }
670 taxonPager = null;
671 flushAndClear();
672
673 // commit for every batch, otherwise the persistent context
674 // may grow too much and eats up all the heap
675 commitTransaction(txStatus);
676 txStatus = null;
677
678 if(ONLY_FISRT_BATCH) {
679 break;
680 }
681 } // next batch
682
683 taxonSubMonitor.done();
684 subMonitor.worked(1);
685
686 if(ONLY_FISRT_BATCH) {
687 break;
688 }
689 } // next Rank
690
691 subMonitor.done();
692 }
693
694 /**
695 *
696 */
697 private void flushAndClear() {
698 logger.debug("flushing and clearing session ...");
699 getSession().flush();
700 try {
701 Search.getFullTextSession(getSession()).flushToIndexes();
702 } catch (HibernateException e) {
703 /* IGNORE - Hibernate Search Event listeners not configured ... */
704 if(!e.getMessage().startsWith("Hibernate Search Event listeners not configured")){
705 throw e;
706 }
707 }
708 getSession().clear();
709 }
710
711
712 // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
713 public TransactionStatus startTransaction(Boolean readOnly) {
714
715 DefaultTransactionDefinition defaultTxDef = new DefaultTransactionDefinition();
716 defaultTxDef.setReadOnly(readOnly);
717 TransactionDefinition txDef = defaultTxDef;
718
719 // Log some transaction-related debug information.
720 if (logger.isTraceEnabled()) {
721 logger.trace("Transaction name = " + txDef.getName());
722 logger.trace("Transaction facets:");
723 logger.trace("Propagation behavior = " + txDef.getPropagationBehavior());
724 logger.trace("Isolation level = " + txDef.getIsolationLevel());
725 logger.trace("Timeout = " + txDef.getTimeout());
726 logger.trace("Read Only = " + txDef.isReadOnly());
727 // org.springframework.orm.hibernate5.HibernateTransactionManager
728 // provides more transaction/session-related debug information.
729 }
730
731 TransactionStatus txStatus = transactionManager.getTransaction(txDef);
732
733 getSession().setFlushMode(FlushMode.COMMIT);
734
735 return txStatus;
736 }
737
738 // TODO merge with CdmApplicationDefaultConfiguration#startTransaction() into common base class
739 public void commitTransaction(TransactionStatus txStatus){
740 logger.debug("commiting transaction ...");
741 transactionManager.commit(txStatus);
742 return;
743 }
744
745 /**
746 * returns the next higher rank
747 *
748 * TODO better implement OrderedTermBase.getNextHigherTerm() and OrderedTermBase.getNextLowerTerm()?
749 *
750 * @param rank
751 * @return
752 */
753 private Rank findNextHigherRank(Rank rank) {
754 rank = (Rank) termService.load(rank.getUuid());
755 return rank.getNextHigherTerm();
756 // OrderedTermVocabulary<Rank> rankVocabulary = mameService.getRankVocabulary();;
757 // return rankVocabulary.getNextHigherTerm(rank);
758 }
759
760 /**
761 * Either finds an existing taxon description of the given taxon or creates a new one.
762 * If the doClear is set all existing description elements will be cleared.
763 *
764 * @param taxon
765 * @param doClear will remove all existing Distributions if the taxon already
766 * has a MarkerType.COMPUTED() TaxonDescription
767 * @return
768 */
769 private TaxonDescription findComputedDescription(Taxon taxon, boolean doClear) {
770
771 String descriptionTitle = this.getClass().getSimpleName();
772
773 // find existing one
774 for (TaxonDescription description : taxon.getDescriptions()) {
775 if (description.hasMarker(MarkerType.COMPUTED(), true)) {
776 logger.debug("reusing description for " + taxon.getTitleCache());
777 if (doClear) {
778 int deleteCount = 0;
779 Set<DescriptionElementBase> deleteCandidates = new HashSet<DescriptionElementBase>();
780 for (DescriptionElementBase descriptionElement : description.getElements()) {
781 if(descriptionElement instanceof Distribution) {
782 deleteCandidates.add(descriptionElement);
783 }
784 }
785 if(deleteCandidates.size() > 0){
786 for(DescriptionElementBase descriptionElement : deleteCandidates) {
787 description.removeElement(descriptionElement);
788 descriptionService.deleteDescriptionElement(descriptionElement);
789 descriptionElement = null;
790 deleteCount++;
791 }
792 descriptionService.saveOrUpdate(description);
793 logger.debug("\t" + deleteCount +" distributions cleared");
794 }
795
796 }
797 return description;
798 }
799 }
800
801 // create a new one
802 logger.debug("creating new description for " + taxon.getTitleCache());
803 TaxonDescription description = TaxonDescription.NewInstance(taxon);
804 description.setTitleCache(descriptionTitle, true);
805 description.addMarker(Marker.NewInstance(MarkerType.COMPUTED(), true));
806 return description;
807 }
808
809 /**
810 * @param superArea
811 * @return
812 */
813 private Set<NamedArea> getSubAreasFor(NamedArea superArea) {
814
815 if(!subAreaMap.containsKey(superArea)) {
816 if(logger.isDebugEnabled()){
817 logger.debug("loading included areas for " + superArea.getLabel());
818 }
819 subAreaMap.put(superArea, superArea.getIncludes());
820 }
821 return subAreaMap.get(superArea);
822 }
823
824 /**
825 * @param taxon
826 * @return
827 */
828 private List<Distribution> distributionsFor(Taxon taxon) {
829 List<Distribution> distributions = new ArrayList<Distribution>();
830 for(TaxonDescription description: taxon.getDescriptions()) {
831 for(DescriptionElementBase deb : description.getElements()) {
832 if(deb instanceof Distribution) {
833 distributions.add((Distribution)deb);
834 }
835 }
836 }
837 return distributions;
838 }
839
840 /**
841 * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
842 * This method will start a new transaction and commits it after the work is done.
843 */
844 public void updatePriorities() {
845
846 TransactionStatus txStatus = startTransaction(false);
847
848 Map<PresenceAbsenceTerm, Integer> priorityMap = new HashMap<PresenceAbsenceTerm, Integer>();
849
850 priorityMap.put(PresenceAbsenceTerm.CULTIVATED_REPORTED_IN_ERROR(), 1);
851 priorityMap.put(PresenceAbsenceTerm.INTRODUCED_UNCERTAIN_DEGREE_OF_NATURALISATION(), 2);
852 priorityMap.put(PresenceAbsenceTerm.INTRODUCED_FORMERLY_INTRODUCED(), 3);
853 priorityMap.put(PresenceAbsenceTerm.INTRODUCED_REPORTED_IN_ERROR(), 20);
854 priorityMap.put(PresenceAbsenceTerm.NATIVE_REPORTED_IN_ERROR(), 30);
855 priorityMap.put(PresenceAbsenceTerm.CULTIVATED(), 45);
856 priorityMap.put(PresenceAbsenceTerm.NATIVE_FORMERLY_NATIVE(), 40);
857 priorityMap.put(PresenceAbsenceTerm.NATIVE_PRESENCE_QUESTIONABLE(), 60);
858 priorityMap.put(PresenceAbsenceTerm.INTRODUCED_PRESENCE_QUESTIONABLE(), 50);
859 priorityMap.put(PresenceAbsenceTerm.INTRODUCED_DOUBTFULLY_INTRODUCED(), 80);
860 priorityMap.put(PresenceAbsenceTerm.INTRODUCED(), 90);
861 priorityMap.put(PresenceAbsenceTerm.INTRODUCED_ADVENTITIOUS(), 100);
862 priorityMap.put(PresenceAbsenceTerm.INTRODUCED_NATURALIZED(), 110);
863 priorityMap.put(PresenceAbsenceTerm.NATIVE_DOUBTFULLY_NATIVE(), 120); // null
864 priorityMap.put(PresenceAbsenceTerm.NATIVE(), 130); // null
865 priorityMap.put(PresenceAbsenceTerm.ENDEMIC_FOR_THE_RELEVANT_AREA(), 999);
866
867 for(PresenceAbsenceTerm term : priorityMap.keySet()) {
868 // load the term
869 term = (PresenceAbsenceTerm) termService.load(term.getUuid());
870 // find the extension
871 Extension priorityExtension = null;
872 Set<Extension> extensions = term.getExtensions();
873 for(Extension extension : extensions){
874 if (!extension.getType().equals(ExtensionType.ORDER())) {
875 continue;
876 }
877 int pos = extension.getValue().indexOf(EXTENSION_VALUE_PREFIX);
878 if(pos == 0){ // if starts with EXTENSION_VALUE_PREFIX
879 priorityExtension = extension;
880 break;
881 }
882 }
883 if(priorityExtension == null) {
884 priorityExtension = Extension.NewInstance(term, null, ExtensionType.ORDER());
885 }
886 priorityExtension.setValue(EXTENSION_VALUE_PREFIX + priorityMap.get(term));
887
888 // save the term
889 termService.saveOrUpdate(term);
890 if (logger.isDebugEnabled()) {
891 logger.debug("Priority updated for " + term.getLabel());
892 }
893 }
894
895 commitTransaction(txStatus);
896 }
897
898 public enum AggregationMode {
899 byAreas,
900 byRanks,
901 byAreasAndRanks
902
903 }
904 }