Revision 869c3f85
Added by Andreas Kohlbecker almost 8 years ago
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/description/TransmissionEngineDistribution.java | ||
---|---|---|
38 | 38 |
import eu.etaxonomy.cdm.api.service.INameService; |
39 | 39 |
import eu.etaxonomy.cdm.api.service.ITaxonService; |
40 | 40 |
import eu.etaxonomy.cdm.api.service.ITermService; |
41 |
import eu.etaxonomy.cdm.api.service.pager.Pager; |
|
42 | 41 |
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor; |
43 | 42 |
import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor; |
44 | 43 |
import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor; |
... | ... | |
57 | 56 |
import eu.etaxonomy.cdm.model.taxon.Classification; |
58 | 57 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
59 | 58 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase; |
60 |
import eu.etaxonomy.cdm.model.taxon.TaxonNode; |
|
61 | 59 |
import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO; |
62 | 60 |
import eu.etaxonomy.cdm.persistence.query.OrderHint; |
63 | 61 |
|
... | ... | |
100 | 98 |
/** |
101 | 99 |
* only used for performance testing |
102 | 100 |
*/ |
103 |
final boolean ONLY_FISRT_BATCH = true;
|
|
101 |
final boolean ONLY_FISRT_BATCH = false;
|
|
104 | 102 |
|
105 | 103 |
|
106 | 104 |
protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] { |
... | ... | |
328 | 326 |
monitor = new NullProgressMonitor(); |
329 | 327 |
} |
330 | 328 |
|
331 |
logger.setLevel(Level.INFO); // TRACE will slow down a lot since it forces loading all term representations |
|
332 |
|
|
333 |
logger.info("Hibernate JDBC Batch size: " |
|
334 |
+ ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize()); |
|
335 | 329 |
|
336 | 330 |
// only for debugging: |
337 |
logger.setLevel(Level.INFO);
|
|
331 |
logger.setLevel(Level.DEBUG); // TRACE will slow down a lot since it forces loading all term representations
|
|
338 | 332 |
//Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG); |
339 | 333 |
|
334 |
logger.info("Hibernate JDBC Batch size: " |
|
335 |
+ ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize()); |
|
336 |
|
|
340 | 337 |
Set<Classification> classifications = new HashSet<Classification>(); |
341 | 338 |
if(classification == null) { |
342 | 339 |
classifications.addAll(classificationService.listClassifications(null, null, null, null)); |
... | ... | |
354 | 351 |
updatePriorities(); |
355 | 352 |
monitor.worked(1); |
356 | 353 |
|
354 |
List<Rank> ranks = rankInterval(lowerRank, upperRank); |
|
355 |
|
|
357 | 356 |
for(Classification _classification : classifications) { |
358 | 357 |
|
359 | 358 |
ClassificationLookupDTO classificationLookupDao = classificationService.classificationLookup(_classification); |
359 |
classificationLookupDao.filter(ranks); |
|
360 | 360 |
|
361 | 361 |
double end1 = System.currentTimeMillis(); |
362 | 362 |
logger.info("Time elapsed for classificationLookup() : " + (end1 - start) / (1000) + "s"); |
... | ... | |
373 | 373 |
|
374 | 374 |
double start3 = System.currentTimeMillis(); |
375 | 375 |
if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) { |
376 |
accumulateByRank(lowerRank, upperRank, classification, new SubProgressMonitor(monitor, 200), mode.equals(AggregationMode.byRanks));
|
|
376 |
accumulateByRank(ranks, classificationLookupDao, new SubProgressMonitor(monitor, 200), mode.equals(AggregationMode.byRanks));
|
|
377 | 377 |
} |
378 | 378 |
|
379 | 379 |
double end3 = System.currentTimeMillis(); |
... | ... | |
387 | 387 |
} |
388 | 388 |
} |
389 | 389 |
|
390 |
/** |
|
391 |
* @return |
|
392 |
*/ |
|
393 |
private Session getSession() { |
|
394 |
return descriptionService.getSession(); |
|
395 |
} |
|
396 | 390 |
|
397 | 391 |
/** |
398 | 392 |
* Step 1: Accumulate occurrence records by area |
... | ... | |
421 | 415 |
for (NamedArea superArea : superAreas){ |
422 | 416 |
superAreaUuids.add(superArea.getUuid()); |
423 | 417 |
} |
424 |
List<NamedArea> superAreaList = (List)termService.find(superAreaUuids); |
|
425 | 418 |
|
426 | 419 |
// visit all accepted taxa |
427 | 420 |
subMonitor.beginTask("Accumulating by area ", classificationLookupDao.getTaxonIds().size()); |
... | ... | |
434 | 427 |
txStatus = startTransaction(false); |
435 | 428 |
} |
436 | 429 |
|
430 |
// the session is cleared after each batch, so load the superAreaList for each batch |
|
431 |
List<NamedArea> superAreaList = (List)termService.find(superAreaUuids); |
|
432 |
|
|
437 | 433 |
// load taxa for this batch |
438 |
List<TaxonBase> taxa = new ArrayList<TaxonBase>(batchSize);
|
|
434 |
List<TaxonBase> taxa = null;
|
|
439 | 435 |
Set<Integer> taxonIds = new HashSet<Integer>(batchSize); |
440 | 436 |
while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) { |
441 | 437 |
taxonIds.add(taxonIdIterator.next()); |
... | ... | |
529 | 525 |
* this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li> |
530 | 526 |
*</ul> |
531 | 527 |
*/ |
532 |
protected void accumulateByRank(Rank lowerRank, Rank upperRank, Classification classification, IProgressMonitor subMonitor, boolean doClearDescriptions) {
|
|
528 |
protected void accumulateByRank(List<Rank> rankInterval, ClassificationLookupDTO classificationLookupDao, IProgressMonitor subMonitor, boolean doClearDescriptions) {
|
|
533 | 529 |
|
534 | 530 |
int batchSize = 500; |
535 | 531 |
|
... | ... | |
540 | 536 |
// if no taxon of the specified rank exists, so we need to |
541 | 537 |
// remember which taxa have been processed already |
542 | 538 |
Set<Integer> taxaProcessedIds = new HashSet<Integer>(); |
539 |
List<TaxonBase> taxa = null; |
|
540 |
List<TaxonBase> childTaxa = null; |
|
543 | 541 |
|
544 |
Rank currentRank = lowerRank; |
|
545 |
List<Rank> ranks = new ArrayList<Rank>(); |
|
546 |
ranks.add(currentRank); |
|
547 |
while (!currentRank.isHigher(upperRank)) { |
|
548 |
currentRank = findNextHigherRank(currentRank); |
|
549 |
ranks.add(currentRank); |
|
550 |
} |
|
542 |
List<Rank> ranks = rankInterval; |
|
551 | 543 |
|
552 | 544 |
int ticksPerRank = 100; |
553 | 545 |
subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank); |
... | ... | |
558 | 550 |
logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'"); |
559 | 551 |
} |
560 | 552 |
|
561 |
Pager<TaxonNode> taxonPager = null; |
|
562 |
int pageIndex = 0; |
|
563 |
boolean isLastPage = false; |
|
564 | 553 |
SubProgressMonitor taxonSubMonitor = null; |
565 |
while (!isLastPage) { |
|
554 |
Set<Integer> taxonIdsPerRank = classificationLookupDao.getTaxonIdByRank().get(rank); |
|
555 |
if(taxonIdsPerRank == null || taxonIdsPerRank.isEmpty()) { |
|
556 |
continue; |
|
557 |
} |
|
558 |
Iterator<Integer> taxonIdIterator = taxonIdsPerRank.iterator(); |
|
559 |
while (taxonIdIterator.hasNext()) { |
|
566 | 560 |
|
567 | 561 |
if(txStatus == null) { |
568 | 562 |
// transaction has been comitted at the end of this batch, start a new one |
569 | 563 |
txStatus = startTransaction(false); |
570 | 564 |
} |
571 | 565 |
|
572 |
taxonPager = classificationService |
|
573 |
.pageRankSpecificRootNodes(classification, rank, batchSize, pageIndex++, null); |
|
566 |
// load taxa for this batch |
|
567 |
Set<Integer> taxonIds = new HashSet<Integer>(batchSize); |
|
568 |
while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) { |
|
569 |
taxonIds.add(taxonIdIterator.next()); |
|
570 |
} |
|
571 |
|
|
572 |
taxa = taxonService.listByIds(taxonIds, null, null, emptyOrderHints, null); |
|
574 | 573 |
|
575 | 574 |
if(taxonSubMonitor == null) { |
576 | 575 |
taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank); |
577 |
taxonSubMonitor.beginTask("Accumulating by rank " + rank.getLabel(), taxonPager.getCount().intValue()); |
|
578 |
} |
|
579 |
|
|
580 |
if(taxonPager != null){ |
|
581 |
if(logger.isDebugEnabled()){ |
|
582 |
logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]"); |
|
583 |
} |
|
584 |
} else { |
|
585 |
logger.error("accumulateByRank() - taxonNode pager was NULL"); |
|
576 |
taxonSubMonitor.beginTask("Accumulating by rank " + termToString(rank), taxa.size()); |
|
586 | 577 |
} |
587 | 578 |
|
588 |
if(taxonPager != null){ |
|
589 |
isLastPage = taxonPager.getRecords().size() < batchSize; |
|
590 |
if (taxonPager.getRecords().size() == 0){ |
|
591 |
break; |
|
592 |
} |
|
579 |
// if(logger.isDebugEnabled()){ |
|
580 |
// logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]"); |
|
581 |
// } |
|
593 | 582 |
|
594 |
for(TaxonNode taxonNode : taxonPager.getRecords()) {
|
|
583 |
for(TaxonBase taxonBase : taxa) {
|
|
595 | 584 |
|
596 |
Taxon taxon = taxonNode.getTaxon(); |
|
597 |
if (taxaProcessedIds.contains(taxon.getId())) { |
|
598 |
if(logger.isDebugEnabled()){ |
|
599 |
logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon)); |
|
600 |
} |
|
601 |
continue; |
|
602 |
} |
|
603 |
taxaProcessedIds.add(taxon.getId()); |
|
585 |
Taxon taxon = (Taxon)taxonBase; |
|
586 |
if (taxaProcessedIds.contains(taxon.getId())) { |
|
604 | 587 |
if(logger.isDebugEnabled()){ |
605 |
logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
|
|
588 |
logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
|
|
606 | 589 |
} |
590 |
continue; |
|
591 |
} |
|
592 |
taxaProcessedIds.add(taxon.getId()); |
|
593 |
if(logger.isDebugEnabled()){ |
|
594 |
logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon)); |
|
595 |
} |
|
607 | 596 |
|
608 |
// Step through direct taxonomic children for accumulation
|
|
609 |
Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>();
|
|
597 |
// Step through direct taxonomic children for accumulation |
|
598 |
Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>(); |
|
610 | 599 |
|
611 |
for (TaxonNode subTaxonNode : taxonNode.getChildNodes()){ |
|
600 |
Set<Integer> childTaxonIds = classificationLookupDao.getChildTaxonMap().get(taxon.getId()); |
|
601 |
if(childTaxonIds != null && !childTaxonIds.isEmpty()) { |
|
602 |
childTaxa = taxonService.listByIds(childTaxonIds, null, null, emptyOrderHints, TAXONDESCRIPTION_INIT_STRATEGY); |
|
612 | 603 |
|
613 |
getSession().setReadOnly(taxonNode, true); |
|
604 |
for (TaxonBase childTaxonBase : childTaxa){ |
|
605 |
|
|
606 |
Taxon childTaxon = (Taxon) childTaxonBase; |
|
607 |
getSession().setReadOnly(childTaxon, true); |
|
614 | 608 |
if(logger.isTraceEnabled()){ |
615 |
logger.trace(" subtaxon :" + taxonToString(subTaxonNode.getTaxon()));
|
|
609 |
logger.trace(" subtaxon :" + taxonToString(childTaxon));
|
|
616 | 610 |
} |
617 | 611 |
|
618 |
for(Distribution distribution : distributionsFor(subTaxonNode.getTaxon()) ) {
|
|
612 |
for(Distribution distribution : distributionsFor(childTaxon) ) {
|
|
619 | 613 |
PresenceAbsenceTerm status = distribution.getStatus(); |
620 | 614 |
NamedArea area = distribution.getArea(); |
621 | 615 |
if (status == null || getByRankIgnoreStatusList().contains(status)){ |
... | ... | |
636 | 630 |
taxonService.saveOrUpdate(taxon); |
637 | 631 |
descriptionService.saveOrUpdate(description); |
638 | 632 |
} |
639 |
taxonSubMonitor.worked(1); // one taxon worked |
|
640 | 633 |
|
641 |
} // next taxon node .... |
|
642 |
} |
|
643 |
taxonPager = null; |
|
634 |
} |
|
635 |
taxonSubMonitor.worked(1); // one taxon worked |
|
636 |
|
|
637 |
} // next taxon .... |
|
638 |
|
|
644 | 639 |
flushAndClear(); |
645 | 640 |
|
646 | 641 |
// commit for every batch, otherwise the persistent context |
... | ... | |
666 | 661 |
subMonitor.done(); |
667 | 662 |
} |
668 | 663 |
|
664 |
/** |
|
665 |
* @param lowerRank |
|
666 |
* @param upperRank |
|
667 |
* @return |
|
668 |
*/ |
|
669 |
private List<Rank> rankInterval(Rank lowerRank, Rank upperRank) { |
|
670 |
Rank currentRank = lowerRank; |
|
671 |
List<Rank> ranks = new ArrayList<Rank>(); |
|
672 |
ranks.add(currentRank); |
|
673 |
while (!currentRank.isHigher(upperRank)) { |
|
674 |
currentRank = findNextHigherRank(currentRank); |
|
675 |
ranks.add(currentRank); |
|
676 |
} |
|
677 |
return ranks; |
|
678 |
} |
|
679 |
|
|
680 |
/** |
|
681 |
* @return |
|
682 |
*/ |
|
683 |
private Session getSession() { |
|
684 |
return descriptionService.getSession(); |
|
685 |
} |
|
686 |
|
|
669 | 687 |
/** |
670 | 688 |
* |
671 | 689 |
*/ |
Also available in: Unified diff
solving multiple problems in TransmissionEngine