Project

General

Profile

« Previous | Next » 

Revision 869c3f85

Added by Andreas Kohlbecker about 8 years ago

solving multiple problems in TransmissionEngine

  • avoiding LIE by fetching the parentAreas for each batch
  • skipping taxa not in specified rank interval

View differences:

cdmlib-persistence/src/main/java/eu/etaxonomy/cdm/persistence/dao/hibernate/taxon/ClassificationDaoHibernateImpl.java
273 273
        List<Object[]> result = query.list();
274 274
        for(Object[] row : result) {
275 275
            Integer parentId = null;
276
//            if(row.length == 3) { // TODO check necessary?
277
//                parentId = (Integer) row[2];
278
//            }
276
            parentId = (Integer) row[2];
279 277
            classificationLookupDTO.add((Integer)row[0], (Rank)row[1], parentId);
280 278
        }
281 279

  
cdmlib-persistence/src/main/java/eu/etaxonomy/cdm/persistence/dto/ClassificationLookupDTO.java
12 12
import java.util.Collection;
13 13
import java.util.HashMap;
14 14
import java.util.HashSet;
15
import java.util.List;
15 16
import java.util.Map;
16 17
import java.util.Set;
17 18

  
......
26 27
public class ClassificationLookupDTO {
27 28

  
28 29
    private final Map<Integer, Integer> taxonIdToParentId = new HashMap<Integer, Integer>();
29
    private final Map<Rank,Collection<Integer>> taxonIdByRank = new HashMap<Rank, Collection<Integer>>();
30
    private final Map<Integer,Collection<Integer>> childTaxonMap = new HashMap<Integer,Collection<Integer>>();
30
    private final Map<Rank,Set<Integer>> taxonIdByRank = new HashMap<Rank, Set<Integer>>();
31
    private final Map<Integer,Set<Integer>> childTaxonMap = new HashMap<Integer,Set<Integer>>();
31 32
    private Classification classification = null;
32 33

  
33 34
    /**
......
40 41
    /**
41 42
     * @return the taxonIdByRank
42 43
     */
43
    public Map<Rank, Collection<Integer>> getTaxonIdByRank() {
44
    public Map<Rank, Set<Integer>> getTaxonIdByRank() {
44 45
        return taxonIdByRank;
45 46
    }
46 47

  
47 48
    /**
48 49
     * @return the childTaxonMap
49 50
     */
50
    public Map<Integer, Collection<Integer>> getChildTaxonMap() {
51
    public Map<Integer, Set<Integer>> getChildTaxonMap() {
51 52
        return childTaxonMap;
52 53
    }
53 54

  
......
94 95
        }
95 96
    }
96 97

  
98
    /**
99
     * @param ranks
100
     */
101
    public void filter(List<Rank> ranks) {
102
       for(Rank rank : ranks) {
103
           dropRank(rank);
104
       }
105
    }
106

  
97 107
}
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/description/TransmissionEngineDistribution.java
38 38
import eu.etaxonomy.cdm.api.service.INameService;
39 39
import eu.etaxonomy.cdm.api.service.ITaxonService;
40 40
import eu.etaxonomy.cdm.api.service.ITermService;
41
import eu.etaxonomy.cdm.api.service.pager.Pager;
42 41
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
43 42
import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
44 43
import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
......
57 56
import eu.etaxonomy.cdm.model.taxon.Classification;
58 57
import eu.etaxonomy.cdm.model.taxon.Taxon;
59 58
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
60
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
61 59
import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
62 60
import eu.etaxonomy.cdm.persistence.query.OrderHint;
63 61

  
......
100 98
    /**
101 99
     * only used for performance testing
102 100
     */
103
    final boolean ONLY_FISRT_BATCH = true;
101
    final boolean ONLY_FISRT_BATCH = false;
104 102

  
105 103

  
106 104
    protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
......
328 326
            monitor = new NullProgressMonitor();
329 327
        }
330 328

  
331
        logger.setLevel(Level.INFO); // TRACE will slow down a lot since it forces loading all term representations
332

  
333
        logger.info("Hibernate JDBC Batch size: "
334
                + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
335 329

  
336 330
        // only for debugging:
337
        logger.setLevel(Level.INFO);
331
        logger.setLevel(Level.DEBUG); // TRACE will slow down a lot since it forces loading all term representations
338 332
        //Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
339 333

  
334
        logger.info("Hibernate JDBC Batch size: "
335
                + ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
336

  
340 337
        Set<Classification> classifications = new HashSet<Classification>();
341 338
        if(classification == null) {
342 339
            classifications.addAll(classificationService.listClassifications(null, null, null, null));
......
354 351
        updatePriorities();
355 352
        monitor.worked(1);
356 353

  
354
        List<Rank> ranks = rankInterval(lowerRank, upperRank);
355

  
357 356
        for(Classification _classification : classifications) {
358 357

  
359 358
            ClassificationLookupDTO classificationLookupDao = classificationService.classificationLookup(_classification);
359
            classificationLookupDao.filter(ranks);
360 360

  
361 361
            double end1 = System.currentTimeMillis();
362 362
            logger.info("Time elapsed for classificationLookup() : " + (end1 - start) / (1000) + "s");
......
373 373

  
374 374
            double start3 = System.currentTimeMillis();
375 375
            if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
376
                accumulateByRank(lowerRank, upperRank, classification, new SubProgressMonitor(monitor, 200), mode.equals(AggregationMode.byRanks));
376
                accumulateByRank(ranks, classificationLookupDao, new SubProgressMonitor(monitor, 200), mode.equals(AggregationMode.byRanks));
377 377
            }
378 378

  
379 379
            double end3 = System.currentTimeMillis();
......
387 387
        }
388 388
    }
389 389

  
390
    /**
391
     * @return
392
     */
393
    private Session getSession() {
394
        return descriptionService.getSession();
395
    }
396 390

  
397 391
    /**
398 392
     * Step 1: Accumulate occurrence records by area
......
421 415
        for (NamedArea superArea : superAreas){
422 416
            superAreaUuids.add(superArea.getUuid());
423 417
        }
424
        List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
425 418

  
426 419
        // visit all accepted taxa
427 420
        subMonitor.beginTask("Accumulating by area ",  classificationLookupDao.getTaxonIds().size());
......
434 427
                txStatus = startTransaction(false);
435 428
            }
436 429

  
430
            // the session is cleared after each batch, so load the superAreaList for each batch
431
            List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
432

  
437 433
            // load taxa for this batch
438
            List<TaxonBase> taxa = new ArrayList<TaxonBase>(batchSize);
434
            List<TaxonBase> taxa = null;
439 435
            Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
440 436
            while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
441 437
                taxonIds.add(taxonIdIterator.next());
......
529 525
    *    this has been especially implemented for the EuroMed Checklist Vol2 and might not be a general requirement</li>
530 526
    *</ul>
531 527
    */
532
    protected void accumulateByRank(Rank lowerRank, Rank upperRank, Classification classification,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
528
    protected void accumulateByRank(List<Rank> rankInterval, ClassificationLookupDTO classificationLookupDao,  IProgressMonitor subMonitor, boolean doClearDescriptions) {
533 529

  
534 530
        int batchSize = 500;
535 531

  
......
540 536
        // if no taxon of the specified rank exists, so we need to
541 537
        // remember which taxa have been processed already
542 538
        Set<Integer> taxaProcessedIds = new HashSet<Integer>();
539
        List<TaxonBase> taxa = null;
540
        List<TaxonBase> childTaxa = null;
543 541

  
544
        Rank currentRank = lowerRank;
545
        List<Rank> ranks = new ArrayList<Rank>();
546
        ranks.add(currentRank);
547
        while (!currentRank.isHigher(upperRank)) {
548
            currentRank = findNextHigherRank(currentRank);
549
            ranks.add(currentRank);
550
        }
542
        List<Rank> ranks = rankInterval;
551 543

  
552 544
        int ticksPerRank = 100;
553 545
        subMonitor.beginTask("Accumulating by rank", ranks.size() * ticksPerRank);
......
558 550
                logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
559 551
            }
560 552

  
561
            Pager<TaxonNode> taxonPager = null;
562
            int pageIndex = 0;
563
            boolean isLastPage = false;
564 553
            SubProgressMonitor taxonSubMonitor = null;
565
            while (!isLastPage) {
554
            Set<Integer> taxonIdsPerRank = classificationLookupDao.getTaxonIdByRank().get(rank);
555
            if(taxonIdsPerRank == null || taxonIdsPerRank.isEmpty()) {
556
                continue;
557
            }
558
            Iterator<Integer> taxonIdIterator = taxonIdsPerRank.iterator();
559
            while (taxonIdIterator.hasNext()) {
566 560

  
567 561
                if(txStatus == null) {
568 562
                    // transaction has been comitted at the end of this batch, start a new one
569 563
                    txStatus = startTransaction(false);
570 564
                }
571 565

  
572
                taxonPager = classificationService
573
                        .pageRankSpecificRootNodes(classification, rank, batchSize, pageIndex++, null);
566
                // load taxa for this batch
567
                Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
568
                while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
569
                    taxonIds.add(taxonIdIterator.next());
570
                }
571

  
572
                taxa = taxonService.listByIds(taxonIds, null, null, emptyOrderHints, null);
574 573

  
575 574
                if(taxonSubMonitor == null) {
576 575
                    taxonSubMonitor = new SubProgressMonitor(subMonitor, ticksPerRank);
577
                    taxonSubMonitor.beginTask("Accumulating by rank " + rank.getLabel(), taxonPager.getCount().intValue());
578
                }
579

  
580
                if(taxonPager != null){
581
                    if(logger.isDebugEnabled()){
582
                               logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
583
                    }
584
                } else {
585
                    logger.error("accumulateByRank() - taxonNode pager was NULL");
576
                    taxonSubMonitor.beginTask("Accumulating by rank " + termToString(rank), taxa.size());
586 577
                }
587 578

  
588
                if(taxonPager != null){
589
                    isLastPage = taxonPager.getRecords().size() < batchSize;
590
                    if (taxonPager.getRecords().size() == 0){
591
                        break;
592
                    }
579
//                if(logger.isDebugEnabled()){
580
//                           logger.debug("accumulateByRank() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
581
//                }
593 582

  
594
                    for(TaxonNode taxonNode : taxonPager.getRecords()) {
583
                for(TaxonBase taxonBase : taxa) {
595 584

  
596
                        Taxon taxon = taxonNode.getTaxon();
597
                        if (taxaProcessedIds.contains(taxon.getId())) {
598
                            if(logger.isDebugEnabled()){
599
                                logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
600
                            }
601
                            continue;
602
                        }
603
                        taxaProcessedIds.add(taxon.getId());
585
                    Taxon taxon = (Taxon)taxonBase;
586
                    if (taxaProcessedIds.contains(taxon.getId())) {
604 587
                        if(logger.isDebugEnabled()){
605
                            logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
588
                            logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
606 589
                        }
590
                        continue;
591
                    }
592
                    taxaProcessedIds.add(taxon.getId());
593
                    if(logger.isDebugEnabled()){
594
                        logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
595
                    }
607 596

  
608
                        // Step through direct taxonomic children for accumulation
609
                        Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>();
597
                    // Step through direct taxonomic children for accumulation
598
                    Map<NamedArea, PresenceAbsenceTerm> accumulatedStatusMap = new HashMap<NamedArea, PresenceAbsenceTerm>();
610 599

  
611
                        for (TaxonNode subTaxonNode : taxonNode.getChildNodes()){
600
                    Set<Integer> childTaxonIds = classificationLookupDao.getChildTaxonMap().get(taxon.getId());
601
                    if(childTaxonIds != null && !childTaxonIds.isEmpty()) {
602
                        childTaxa = taxonService.listByIds(childTaxonIds, null, null, emptyOrderHints, TAXONDESCRIPTION_INIT_STRATEGY);
612 603

  
613
                            getSession().setReadOnly(taxonNode, true);
604
                        for (TaxonBase childTaxonBase : childTaxa){
605

  
606
                            Taxon childTaxon = (Taxon) childTaxonBase;
607
                            getSession().setReadOnly(childTaxon, true);
614 608
                            if(logger.isTraceEnabled()){
615
                                logger.trace("                   subtaxon :" + taxonToString(subTaxonNode.getTaxon()));
609
                                logger.trace("                   subtaxon :" + taxonToString(childTaxon));
616 610
                            }
617 611

  
618
                            for(Distribution distribution : distributionsFor(subTaxonNode.getTaxon()) ) {
612
                            for(Distribution distribution : distributionsFor(childTaxon) ) {
619 613
                                PresenceAbsenceTerm status = distribution.getStatus();
620 614
                                NamedArea area = distribution.getArea();
621 615
                                if (status == null || getByRankIgnoreStatusList().contains(status)){
......
636 630
                            taxonService.saveOrUpdate(taxon);
637 631
                            descriptionService.saveOrUpdate(description);
638 632
                        }
639
                        taxonSubMonitor.worked(1); // one taxon worked
640 633

  
641
                    } // next taxon node ....
642
                }
643
                taxonPager = null;
634
                    }
635
                    taxonSubMonitor.worked(1); // one taxon worked
636

  
637
                } // next taxon ....
638

  
644 639
                flushAndClear();
645 640

  
646 641
                // commit for every batch, otherwise the persistent context
......
666 661
        subMonitor.done();
667 662
    }
668 663

  
664
/**
665
 * @param lowerRank
666
 * @param upperRank
667
 * @return
668
 */
669
private List<Rank> rankInterval(Rank lowerRank, Rank upperRank) {
670
    Rank currentRank = lowerRank;
671
    List<Rank> ranks = new ArrayList<Rank>();
672
    ranks.add(currentRank);
673
    while (!currentRank.isHigher(upperRank)) {
674
        currentRank = findNextHigherRank(currentRank);
675
        ranks.add(currentRank);
676
    }
677
    return ranks;
678
}
679

  
680
    /**
681
     * @return
682
     */
683
    private Session getSession() {
684
        return descriptionService.getSession();
685
    }
686

  
669 687
    /**
670 688
     *
671 689
     */

Also available in: Unified diff