13 |
13 |
import java.util.Arrays;
|
14 |
14 |
import java.util.HashMap;
|
15 |
15 |
import java.util.HashSet;
|
|
16 |
import java.util.Iterator;
|
16 |
17 |
import java.util.List;
|
17 |
18 |
import java.util.Map;
|
18 |
19 |
import java.util.Set;
|
... | ... | |
46 |
47 |
import eu.etaxonomy.cdm.model.common.ExtensionType;
|
47 |
48 |
import eu.etaxonomy.cdm.model.common.Marker;
|
48 |
49 |
import eu.etaxonomy.cdm.model.common.MarkerType;
|
|
50 |
import eu.etaxonomy.cdm.model.common.OrderedTermBase;
|
49 |
51 |
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
50 |
52 |
import eu.etaxonomy.cdm.model.description.Distribution;
|
51 |
53 |
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
|
... | ... | |
54 |
56 |
import eu.etaxonomy.cdm.model.name.Rank;
|
55 |
57 |
import eu.etaxonomy.cdm.model.taxon.Classification;
|
56 |
58 |
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
|
59 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
57 |
60 |
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
|
61 |
import eu.etaxonomy.cdm.persistence.dao.taxon.IClassificationDao;
|
|
62 |
import eu.etaxonomy.cdm.persistence.dto.ClassificationLookupDTO;
|
58 |
63 |
|
59 |
64 |
/**
|
60 |
65 |
* The TransmissionEngineDistribution is meant to be used from within a service class.
|
... | ... | |
100 |
105 |
final boolean ONLY_FISRT_BATCH = false;
|
101 |
106 |
|
102 |
107 |
|
|
108 |
protected static final List<String> TAXONDESCRIPTION_INIT_STRATEGY = Arrays.asList(new String [] {
|
|
109 |
"description.markers.markerType",
|
|
110 |
"description.elements.markers.markerType",
|
|
111 |
"description.elements.area",
|
|
112 |
"description.elements.sources.citation.authorship",
|
|
113 |
"description.elements.sources.nameUsedInSource",
|
|
114 |
"description.elements.multilanguageText",
|
|
115 |
"name.status.type",
|
|
116 |
});
|
|
117 |
|
|
118 |
|
103 |
119 |
/**
|
104 |
120 |
* A map which contains the status terms as key and the priority as value
|
105 |
121 |
* The map will contain both, the PresenceTerms and the AbsenceTerms
|
... | ... | |
118 |
134 |
@Autowired
|
119 |
135 |
private IClassificationService classificationService;
|
120 |
136 |
|
|
137 |
@Autowired
|
|
138 |
private IClassificationDao classificationDao;
|
|
139 |
|
121 |
140 |
@Autowired
|
122 |
141 |
private INameService mameService;
|
123 |
142 |
|
... | ... | |
311 |
330 |
monitor = new NullProgressMonitor();
|
312 |
331 |
}
|
313 |
332 |
|
314 |
|
// take start time for performance testing
|
315 |
|
// NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
|
316 |
|
double start = System.currentTimeMillis();
|
|
333 |
logger.setLevel(Level.INFO); // TRACE will slow down a lot since it forces loading all term representations
|
|
334 |
|
|
335 |
logger.info("Hibernate JDBC Batch size: "
|
|
336 |
+ ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
|
317 |
337 |
|
318 |
338 |
// only for debugging:
|
319 |
339 |
logger.setLevel(Level.INFO);
|
320 |
340 |
//Logger.getLogger("org.hibernate.SQL").setLevel(Level.DEBUG);
|
321 |
341 |
|
322 |
|
logger.info("Hibernate JDBC Batch size: "
|
323 |
|
+ ((SessionFactoryImplementor) getSession().getSessionFactory()).getSettings().getJdbcBatchSize());
|
|
342 |
Set<Classification> classifications = new HashSet<Classification>();
|
|
343 |
if(classification == null) {
|
|
344 |
classifications.addAll(classificationService.listClassifications(null, null, null, null));
|
|
345 |
} else {
|
|
346 |
classifications.add(classification);
|
|
347 |
}
|
324 |
348 |
|
325 |
|
int workTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
|
326 |
|
monitor.beginTask("Accumulating distributions", workTicks + 1 );
|
|
349 |
int aggregationWorkTicks = mode.equals(AggregationMode.byAreasAndRanks) ? 400 : 200;
|
327 |
350 |
|
|
351 |
// take start time for performance testing
|
|
352 |
// NOTE: use ONLY_FISRT_BATCH = true to measure only one batch
|
|
353 |
double start = System.currentTimeMillis();
|
328 |
354 |
|
329 |
|
monitor.subTask("updating Priorities");
|
|
355 |
monitor.beginTask("Accumulating distributions", (classifications.size() * aggregationWorkTicks) + 1 );
|
330 |
356 |
updatePriorities();
|
331 |
357 |
monitor.worked(1);
|
332 |
|
monitor.setTaskName("Accumulating distributions");
|
333 |
358 |
|
334 |
|
monitor.subTask("Accumulating distributions to super areas");
|
335 |
|
if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
|
336 |
|
accumulateByArea(superAreas, classification, new SubProgressMonitor(monitor, 200),
|
337 |
|
mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks));
|
338 |
|
}
|
339 |
|
double end1 = System.currentTimeMillis();
|
340 |
|
logger.info("Time elapsed for accumulateByArea() : " + (end1 - start) / (1000) + "s");
|
341 |
|
|
342 |
|
double start2 = System.currentTimeMillis();
|
343 |
|
monitor.subTask("Accumulating distributions to higher ranks");
|
344 |
|
if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
|
345 |
|
accumulateByRank(lowerRank, upperRank, classification, new SubProgressMonitor(monitor, 200),
|
346 |
|
mode.equals(AggregationMode.byRanks));
|
347 |
|
}
|
|
359 |
for(Classification _classification : classifications) {
|
|
360 |
|
|
361 |
ClassificationLookupDTO classificationLookupDao = classificationDao.classificationLookup(_classification);
|
|
362 |
|
|
363 |
monitor.subTask("Accumulating distributions to super areas for " + _classification.getTitleCache());
|
|
364 |
if (mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks)) {
|
|
365 |
accumulateByArea(superAreas, classificationLookupDao, new SubProgressMonitor(monitor, 200),
|
|
366 |
mode.equals(AggregationMode.byAreas) || mode.equals(AggregationMode.byAreasAndRanks));
|
|
367 |
}
|
|
368 |
monitor.subTask("Accumulating distributions to higher ranks for " + _classification.getTitleCache());
|
|
369 |
|
|
370 |
double end1 = System.currentTimeMillis();
|
348 |
371 |
|
349 |
|
double end2 = System.currentTimeMillis();
|
350 |
|
logger.info("Time elapsed for accumulateByRank() : " + (end2 - start2) / (1000) + "s");
|
351 |
|
logger.info("Time elapsed for accumulate(): " + (end2 - start) / (1000) + "s");
|
|
372 |
logger.info("Time elapsed for accumulateByArea() : " + (end1 - start) / (1000) + "s");
|
|
373 |
|
|
374 |
double start2 = System.currentTimeMillis();
|
|
375 |
if (mode.equals(AggregationMode.byRanks) || mode.equals(AggregationMode.byAreasAndRanks)) {
|
|
376 |
accumulateByRank(lowerRank, upperRank, classification, new SubProgressMonitor(monitor, 200),
|
|
377 |
mode.equals(AggregationMode.byRanks));
|
|
378 |
}
|
|
379 |
|
|
380 |
double end2 = System.currentTimeMillis();
|
|
381 |
logger.info("Time elapsed for accumulateByRank() : " + (end2 - start2) / (1000) + "s");
|
|
382 |
logger.info("Time elapsed for accumulate(): " + (end2 - start) / (1000) + "s");
|
|
383 |
|
|
384 |
if(ONLY_FISRT_BATCH) {
|
|
385 |
break;
|
|
386 |
}
|
|
387 |
}
|
352 |
388 |
}
|
353 |
389 |
|
354 |
390 |
/**
|
... | ... | |
371 |
407 |
*
|
372 |
408 |
* @param superAreas
|
373 |
409 |
* the areas to which the subordinate areas should be projected
|
374 |
|
* @param classification
|
375 |
|
* limit the accumulation process to a specific classification (not yet implemented)
|
|
410 |
* @param classificationLookupDao
|
|
411 |
*
|
376 |
412 |
*/
|
377 |
|
protected void accumulateByArea(List<NamedArea> superAreas, Classification classification, IProgressMonitor subMonitor, boolean doClearDescriptions) {
|
|
413 |
protected void accumulateByArea(List<NamedArea> superAreas, ClassificationLookupDTO classificationLookupDao, IProgressMonitor subMonitor, boolean doClearDescriptions) {
|
378 |
414 |
|
379 |
415 |
int batchSize = 1000;
|
380 |
416 |
|
... | ... | |
388 |
424 |
List<NamedArea> superAreaList = (List)termService.find(superAreaUuids);
|
389 |
425 |
|
390 |
426 |
// visit all accepted taxa
|
391 |
|
Pager<Taxon> taxonPager = null;
|
|
427 |
subMonitor.beginTask("Accumulating by area ", classificationLookupDao.getTaxonIds().size());
|
|
428 |
Iterator<Integer> taxonIdIterator = classificationLookupDao.getTaxonIds().iterator();
|
|
429 |
|
392 |
430 |
int pageIndex = 0;
|
393 |
|
boolean isLastPage = false;
|
|
431 |
while (taxonIdIterator.hasNext()) {
|
394 |
432 |
while (!isLastPage) {
|
395 |
433 |
|
396 |
434 |
if(txStatus == null) {
|
... | ... | |
398 |
436 |
txStatus = startTransaction(false);
|
399 |
437 |
}
|
400 |
438 |
|
401 |
|
//TODO limit by classification if not null
|
402 |
|
taxonPager = taxonService.page(Taxon.class, batchSize, pageIndex++, null, null);
|
403 |
|
|
404 |
|
if(taxonPager.getCurrentIndex() == 0){
|
405 |
|
subMonitor.beginTask("Accumulating by area ", taxonPager.getCount().intValue());
|
|
439 |
// load taxa for this batch
|
|
440 |
List<TaxonBase> taxa = new ArrayList<TaxonBase>(batchSize);
|
|
441 |
Set<Integer> taxonIds = new HashSet<Integer>(batchSize);
|
|
442 |
while(taxonIdIterator.hasNext() && taxonIds.size() < batchSize ) {
|
|
443 |
taxonIds.add(taxonIdIterator.next());
|
406 |
444 |
}
|
407 |
445 |
|
408 |
|
logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
|
|
446 |
// logger.debug("accumulateByArea() - taxon " + taxonPager.getFirstRecord() + " to " + taxonPager.getLastRecord() + " of " + taxonPager.getCount() + "]");
|
409 |
447 |
|
410 |
|
if (taxonPager.getRecords().size() == 0){
|
411 |
|
break;
|
412 |
|
}
|
413 |
|
isLastPage = taxonPager.getRecords().size() < batchSize;
|
|
448 |
taxa = taxonService.listByIds(taxonIds, null, null, null, TAXONDESCRIPTION_INIT_STRATEGY);
|
414 |
449 |
|
415 |
450 |
// iterate over the taxa and accumulate areas
|
416 |
|
for(Taxon taxon : taxonPager.getRecords()) {
|
|
451 |
for(TaxonBase taxon : taxa) {
|
417 |
452 |
if(logger.isDebugEnabled()){
|
418 |
|
logger.debug("accumulateByArea() - taxon :" + taxon.getTitleCache());
|
|
453 |
logger.debug("accumulateByArea() - taxon :" + taxonToString(taxon));
|
419 |
454 |
}
|
420 |
455 |
|
421 |
|
TaxonDescription description = findComputedDescription(taxon, doClearDescriptions);
|
422 |
|
List<Distribution> distributions = distributionsFor(taxon);
|
|
456 |
TaxonDescription description = findComputedDescription((Taxon)taxon, doClearDescriptions);
|
|
457 |
List<Distribution> distributions = distributionsFor((Taxon)taxon);
|
423 |
458 |
|
424 |
459 |
// Step through superAreas for accumulation of subAreas
|
425 |
460 |
for (NamedArea superArea : superAreaList){
|
426 |
461 |
|
427 |
462 |
// accumulate all sub area status
|
428 |
463 |
PresenceAbsenceTerm accumulatedStatus = null;
|
|
464 |
// TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
|
429 |
465 |
Set<NamedArea> subAreas = getSubAreasFor(superArea);
|
430 |
466 |
for(NamedArea subArea : subAreas){
|
431 |
467 |
if(logger.isTraceEnabled()){
|
432 |
|
logger.trace("accumulateByArea() - \t\t" + subArea.getLabel());
|
|
468 |
logger.trace("accumulateByArea() - \t\t" + termToString(subArea));
|
433 |
469 |
}
|
434 |
470 |
// step through all distributions for the given subArea
|
435 |
471 |
for(Distribution distribution : distributions){
|
436 |
472 |
if(distribution.getArea() != null && distribution.getArea().equals(subArea) && distribution.getStatus() != null) {
|
437 |
473 |
PresenceAbsenceTerm status = distribution.getStatus();
|
438 |
474 |
if(logger.isTraceEnabled()){
|
439 |
|
logger.trace("accumulateByArea() - \t\t" + subArea.getLabel() + ": " + status.getLabel());
|
|
475 |
logger.trace("accumulateByArea() - \t\t" + termToString(subArea) + ": " + termToString(status));
|
440 |
476 |
}
|
441 |
477 |
// skip all having a status value different of those in byAreaIgnoreStatusList
|
442 |
478 |
if (getByAreaIgnoreStatusList().contains(status)){
|
... | ... | |
448 |
484 |
} // next sub area
|
449 |
485 |
if (accumulatedStatus != null) {
|
450 |
486 |
if(logger.isDebugEnabled()){
|
451 |
|
logger.debug("accumulateByArea() - \t >> " + superArea.getLabel() + ": " + accumulatedStatus.getLabel());
|
|
487 |
logger.debug("accumulateByArea() - \t >> " + termToString(superArea) + ": " + termToString(accumulatedStatus));
|
452 |
488 |
}
|
453 |
489 |
// store new distribution element for superArea in taxon description
|
454 |
490 |
Distribution newDistribitionElement = Distribution.NewInstance(superArea, accumulatedStatus);
|
... | ... | |
464 |
500 |
|
465 |
501 |
} // next taxon
|
466 |
502 |
|
467 |
|
taxonPager = null;
|
468 |
503 |
flushAndClear();
|
469 |
504 |
|
470 |
505 |
// commit for every batch, otherwise the persistent context
|
... | ... | |
481 |
516 |
subMonitor.done();
|
482 |
517 |
}
|
483 |
518 |
|
|
519 |
/**
|
|
520 |
* @param taxon
|
|
521 |
* @param logger2
|
|
522 |
* @return
|
|
523 |
*/
|
|
524 |
private String taxonToString(TaxonBase taxon) {
|
|
525 |
if(logger.isTraceEnabled()) {
|
|
526 |
return taxon.getTitleCache();
|
|
527 |
} else {
|
|
528 |
return taxon.toString();
|
|
529 |
}
|
|
530 |
}
|
|
531 |
|
|
532 |
/**
|
|
533 |
* @param taxon
|
|
534 |
* @param logger2
|
|
535 |
* @return
|
|
536 |
*/
|
|
537 |
private String termToString(OrderedTermBase<?> term) {
|
|
538 |
if(logger.isTraceEnabled()) {
|
|
539 |
return term.getLabel() + " [" + term.getIdInVocabulary() + "]";
|
|
540 |
} else {
|
|
541 |
return term.getIdInVocabulary();
|
|
542 |
}
|
|
543 |
}
|
|
544 |
|
484 |
545 |
/**
|
485 |
546 |
* Step 2: Accumulate by ranks staring from lower rank to upper rank, the status of all children
|
486 |
547 |
* are accumulated on each rank starting from lower rank to upper rank.
|
... | ... | |
520 |
581 |
for (Rank rank : ranks) {
|
521 |
582 |
|
522 |
583 |
if(logger.isDebugEnabled()){
|
523 |
|
logger.debug("accumulateByRank() - at Rank '" + rank.getLabel() + "'");
|
|
584 |
logger.debug("accumulateByRank() - at Rank '" + termToString(rank) + "'");
|
524 |
585 |
}
|
525 |
586 |
|
526 |
587 |
Pager<TaxonNode> taxonPager = null;
|
... | ... | |
562 |
623 |
Taxon taxon = taxonNode.getTaxon();
|
563 |
624 |
if (taxaProcessedIds.contains(taxon.getId())) {
|
564 |
625 |
if(logger.isDebugEnabled()){
|
565 |
|
logger.debug("accumulateByRank() - skipping already processed taxon :" + taxon.getTitleCache());
|
|
626 |
logger.debug("accumulateByRank() - skipping already processed taxon :" + taxonToString(taxon));
|
566 |
627 |
}
|
567 |
628 |
continue;
|
568 |
629 |
}
|
569 |
630 |
taxaProcessedIds.add(taxon.getId());
|
570 |
631 |
if(logger.isDebugEnabled()){
|
571 |
|
logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxon.getTitleCache());
|
|
632 |
logger.debug("accumulateByRank() [" + rank.getLabel() + "] - taxon :" + taxonToString(taxon));
|
572 |
633 |
}
|
573 |
634 |
|
574 |
635 |
// Step through direct taxonomic children for accumulation
|
... | ... | |
578 |
639 |
|
579 |
640 |
getSession().setReadOnly(taxonNode, true);
|
580 |
641 |
if(logger.isTraceEnabled()){
|
581 |
|
logger.trace(" subtaxon :" + subTaxonNode.getTaxon().getTitleCache());
|
|
642 |
logger.trace(" subtaxon :" + taxonToString(subTaxonNode.getTaxon()));
|
582 |
643 |
}
|
583 |
644 |
|
584 |
645 |
for(Distribution distribution : distributionsFor(subTaxonNode.getTaxon()) ) {
|
... | ... | |
765 |
826 |
* @return
|
766 |
827 |
*/
|
767 |
828 |
private List<Distribution> distributionsFor(Taxon taxon) {
|
768 |
|
return descriptionService
|
769 |
|
.listDescriptionElementsForTaxon(taxon, null, Distribution.class, null, null, null);
|
|
829 |
List<Distribution> distributions = new ArrayList<Distribution>();
|
|
830 |
for(TaxonDescription description: taxon.getDescriptions()) {
|
|
831 |
for(DescriptionElementBase deb : description.getElements()) {
|
|
832 |
if(deb instanceof Distribution) {
|
|
833 |
distributions.add((Distribution)deb);
|
|
834 |
}
|
|
835 |
}
|
|
836 |
}
|
|
837 |
return distributions;
|
770 |
838 |
}
|
771 |
839 |
|
772 |
840 |
/**
|
attempt to reduce the overhead imposed by database access