2 * Copyright (C) 2013 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.api
.service
.description
;
11 import java
.util
.ArrayList
;
12 import java
.util
.Arrays
;
13 import java
.util
.HashMap
;
14 import java
.util
.HashSet
;
15 import java
.util
.LinkedList
;
16 import java
.util
.List
;
19 import java
.util
.UUID
;
21 import org
.apache
.log4j
.Logger
;
22 import org
.hibernate
.HibernateException
;
23 import org
.hibernate
.search
.Search
;
24 import org
.springframework
.transaction
.TransactionStatus
;
26 import eu
.etaxonomy
.cdm
.common
.monitor
.IProgressMonitor
;
27 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
28 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
29 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementSource
;
30 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionType
;
31 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
32 import eu
.etaxonomy
.cdm
.model
.description
.PresenceAbsenceTerm
;
33 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
34 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
35 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
36 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
37 import eu
.etaxonomy
.cdm
.model
.term
.OrderedTermBase
;
38 import eu
.etaxonomy
.cdm
.model
.term
.OrderedTermVocabulary
;
39 import eu
.etaxonomy
.cdm
.model
.term
.TermCollection
;
40 import eu
.etaxonomy
.cdm
.model
.term
.TermNode
;
41 import eu
.etaxonomy
.cdm
.model
.term
.TermTree
;
42 import eu
.etaxonomy
.cdm
.model
.term
.VocabularyEnum
;
46 * <h2>GENERAL NOTES </h2>
47 * <em>TODO: These notes are directly taken from original Transmission Engine Occurrence
48 * version 14 written in Visual Basic and still need to be
49 * adapted to the java version of the transmission engine!</em>
51 * <h3>summaryStatus</h3>
53 * Each distribution information has a summaryStatus, this is an summary of the status codes
54 * as stored in the fields of emOccurrence native, introduced, cultivated, ...
55 * The summaryStatus seems to be equivalent to the CDM DistributionStatus
57 * <h3>map generation</h3>
59 * When generating maps from the accumulated distribution information some special cases have to be handled:
61 * <li>if an entered or imported status information exists for the same area for which calculated (accumulated)
62 * data is available, the calculated data has to be given preference over other data.
64 * <li>If there is an area with a sub area and both areas have the same calculated status only the subarea
65 * status should be shown in the map, whereas the super area should be ignored.
69 * @author Anton Güntsch (author of original Transmission Engine Occurrence version 14 written in Visual Basic)
70 * @author Andreas Kohlbecker (2013, porting Transmission Engine Occurrence to Java)
71 * @author a.mueller (refactoring and merge with Structured Description Aggregation)
74 public class DistributionAggregation
75 extends DescriptionAggregationBase
<DistributionAggregation
,DistributionAggregationConfiguration
>{
77 public static final Logger logger
= Logger
.getLogger(DistributionAggregation
.class);
79 protected static final List
<String
> TAXONDESCRIPTION_INIT_STRATEGY
= Arrays
.asList(new String
[] {
80 "description.elements.area",
81 "description.elements.status",
82 "description.elements.sources.citation.authorship",
83 // "description.elements.sources.nameUsedInSource",
84 // "description.elements.multilanguageText",
85 // "name.status.type",
89 * A map which contains the status terms as key and the priority as value
90 * The map will contain both, the PresenceTerms and the AbsenceTerms
92 private List
<PresenceAbsenceTerm
> statusOrder
= null;
94 private final Map
<NamedArea
, Set
<NamedArea
>> subAreaMap
= new HashMap
<>();
96 // ******************* CONSTRUCTOR *********************************/
98 public DistributionAggregation() {}
100 protected String
pluralDataType(){
101 return "distributions";
104 // ********************* METHODS *********************************/
107 protected void preAggregate(IProgressMonitor monitor
) {
108 monitor
.subTask("make status order");
110 // take start time for performance testing
111 double start
= System
.currentTimeMillis();
115 double end1
= System
.currentTimeMillis();
116 logger
.info("Time elapsed for making status order : " + (end1
- start
) / (1000) + "s");
119 double end2
= System
.currentTimeMillis();
120 logger
.info("Time elapsed for making super areas : " + (end2
- end1
) / (1000) + "s");
124 protected void initTransaction() {
127 List
<NamedArea
> superAreaList
;
129 private void makeSuperAreas() {
130 TransactionStatus tx
= startTransaction(true);
131 if (getConfig().getSuperAreas()!= null){
132 Set
<UUID
> superAreaUuids
= new HashSet
<>(getConfig().getSuperAreas());
133 superAreaList
= getTermService().find(NamedArea
.class, superAreaUuids
);
134 for (NamedArea superArea
: superAreaList
){
135 Set
<NamedArea
> subAreas
= getSubAreasFor(superArea
);
136 for(NamedArea subArea
: subAreas
){
137 if (logger
.isTraceEnabled()) {
138 logger
.trace("Initialize " + subArea
.getTitleCache());
143 commitTransaction(tx
);
148 protected List
<String
> descriptionInitStrategy() {
149 return TAXONDESCRIPTION_INIT_STRATEGY
;
152 // ********************* METHODS *****************************************/
154 private List
<PresenceAbsenceTerm
> getByAreaIgnoreStatusList() {
155 return getConfig().getByAreaIgnoreStatusList();
158 private List
<PresenceAbsenceTerm
> getByRankIgnoreStatusList() {
159 return getConfig().getByRankIgnoreStatusList();
163 * Compares the PresenceAbsenceTermBase terms contained in <code>a.status</code> and <code>b.status</code> after
164 * the priority as stored in the statusPriorityMap. The StatusAndSources object with
165 * the higher priority is returned. In the case of <code>a == b</code> the sources of b will be added to the sources
168 * If either a or b or the status are null b or a is returned.
170 * @see initializeStatusPriorityMap()
172 * @param accumulatedStatus
174 * @param additionalSourcesForWinningNewStatus Not in Use!
175 * In the case when <code>newStatus</code> is preferred over <code>accumulatedStatus</code> these Set of sources will be added to the sources of <code>b</code>
176 * @param aggregationSourceMode
179 private StatusAndSources
choosePreferredOrMerge(StatusAndSources accumulatedStatus
, StatusAndSources newStatus
,
180 Set
<DescriptionElementSource
> additionalSourcesForWinningNewStatus
, AggregationSourceMode aggregationSourceMode
){
182 if (newStatus
== null || newStatus
.status
== null) {
183 return accumulatedStatus
;
185 if (accumulatedStatus
== null || accumulatedStatus
.status
== null) {
189 Integer indexAcc
= statusOrder
.indexOf(accumulatedStatus
.status
);
190 Integer indexNew
= statusOrder
.indexOf(newStatus
.status
);
192 if (indexNew
== -1) {
193 logger
.warn("No priority found in map for " + newStatus
.status
.getLabel());
194 return accumulatedStatus
;
196 if (indexAcc
== -1) {
197 logger
.warn("No priority found in map for " + accumulatedStatus
.status
.getLabel());
200 if(indexAcc
< indexNew
){
201 if(additionalSourcesForWinningNewStatus
!= null) {
202 newStatus
.addSources(additionalSourcesForWinningNewStatus
);
204 if (aggregationSourceMode
== AggregationSourceMode
.ALL
){
205 newStatus
.addSources(accumulatedStatus
.sources
);
209 if (indexAcc
== indexNew
|| aggregationSourceMode
== AggregationSourceMode
.ALL
){
210 accumulatedStatus
.addSources(newStatus
.sources
);
212 return accumulatedStatus
;
217 protected void addAggregationResultToDescription(TaxonDescription targetDescription
,
218 ResultHolder resultHolder
) {
220 Map
<NamedArea
, StatusAndSources
> accumulatedStatusMap
= ((DistributionResultHolder
)resultHolder
).accumulatedStatusMap
;
222 Set
<Distribution
> toDelete
= new HashSet
<>();
223 if (getConfig().isDoClearExistingDescription()){
224 clearDescription(targetDescription
);
226 toDelete
= new HashSet
<>();
228 for (NamedArea area
: accumulatedStatusMap
.keySet()) {
229 PresenceAbsenceTerm status
= accumulatedStatusMap
.get(area
).status
;
230 Distribution distribution
= findDistributionForArea(targetDescription
, area
);
231 //old: if we want to reuse distribution only with exact same status
232 // Distribution distribution = findDistributionForAreaAndStatus(aggregationDescription, area, status);
234 if(distribution
== null) {
235 // create a new distribution element
236 distribution
= Distribution
.NewInstance(area
, status
);
237 targetDescription
.addElement(distribution
);
239 distribution
.setStatus(status
);
240 toDelete
.remove(distribution
); //we keep the distribution for reuse
242 replaceSources(distribution
, accumulatedStatusMap
.get(area
).sources
);
243 // addSourcesDeduplicated(distribution.getSources(), accumulatedStatusMap.get(area).sources);
245 for(Distribution toDeleteDist
: toDelete
){
246 targetDescription
.removeElement(toDeleteDist
);
251 * Removes all description elements of type {@link Distribution} from the
252 * (aggregation) description.
254 private void clearDescription(TaxonDescription aggregationDescription
) {
256 Set
<DescriptionElementBase
> deleteCandidates
= new HashSet
<>();
257 for (DescriptionElementBase descriptionElement
: aggregationDescription
.getElements()) {
258 if(descriptionElement
.isInstanceOf(Distribution
.class)) {
259 deleteCandidates
.add(descriptionElement
);
262 aggregationDescription
.addType(DescriptionType
.AGGREGATED_DISTRIBUTION
);
263 if(deleteCandidates
.size() > 0){
264 for(DescriptionElementBase descriptionElement
: deleteCandidates
) {
265 aggregationDescription
.removeElement(descriptionElement
);
266 getDescriptionService().deleteDescriptionElement(descriptionElement
);
267 descriptionElement
= null;
270 getDescriptionService().saveOrUpdate(aggregationDescription
);
271 logger
.debug("\t" + deleteCount
+" distributions cleared");
276 protected void aggregateWithinSingleTaxon(Taxon taxon
,
277 ResultHolder resultHolder
,
278 Set
<TaxonDescription
> excludedDescriptions
) {
280 Map
<NamedArea
, StatusAndSources
> accumulatedStatusMap
=
281 ((DistributionResultHolder
)resultHolder
).accumulatedStatusMap
;
283 if(logger
.isDebugEnabled()){
284 logger
.debug("accumulateByArea() - taxon :" + taxonToString(taxon
));
287 Set
<TaxonDescription
> descriptions
= descriptionsFor(taxon
, excludedDescriptions
);
288 Set
<Distribution
> distributions
= distributionsFor(descriptions
);
290 // Step through superAreas for accumulation of subAreas
291 for (NamedArea superArea
: superAreaList
){
293 // accumulate all sub area status
294 StatusAndSources accumulatedStatusAndSources
= null;
295 AggregationSourceMode aggregationSourceMode
= getConfig().getWithinTaxonSourceMode();
296 // TODO consider using the TermHierarchyLookup (only in local branch a.kohlbecker)
297 Set
<NamedArea
> subAreas
= getSubAreasFor(superArea
);
298 for(NamedArea subArea
: subAreas
){
299 if(logger
.isTraceEnabled()){logger
.trace("accumulateByArea() - \t\t" + termToString(subArea
));}
300 // step through all distributions for the given subArea
301 for(Distribution distribution
: distributions
){
302 //TODO AM is the status handling here correct? The mapping to CDM handled
303 if(subArea
.equals(distribution
.getArea()) && distribution
.getStatus() != null) {
304 PresenceAbsenceTerm status
= distribution
.getStatus();
305 if(logger
.isTraceEnabled()){logger
.trace("accumulateByArea() - \t\t" + termToString(subArea
) + ": " + termToString(status
));}
306 // skip all having a status value in the ignore list
307 if (status
== null || getByAreaIgnoreStatusList().contains(status
)
308 || (getConfig().isIgnoreAbsentStatusByArea() && status
.isAbsenceTerm())){
311 StatusAndSources subAreaStatusAndSources
= new StatusAndSources(status
, distribution
, aggregationSourceMode
);
312 accumulatedStatusAndSources
= choosePreferredOrMerge(accumulatedStatusAndSources
, subAreaStatusAndSources
, null, aggregationSourceMode
);
318 if (accumulatedStatusAndSources
!= null) {
319 StatusAndSources preferedStatus
= choosePreferredOrMerge(accumulatedStatusMap
.get(superArea
), accumulatedStatusAndSources
, null, aggregationSourceMode
);
320 accumulatedStatusMap
.put(superArea
, preferedStatus
);
323 } // next super area ....
326 private class DistributionResultHolder
extends ResultHolder
{
327 Map
<NamedArea
, StatusAndSources
> accumulatedStatusMap
= new HashMap
<>();
331 protected ResultHolder
createResultHolder() {
332 return new DistributionResultHolder();
335 protected class StatusAndSources
{
337 private final PresenceAbsenceTerm status
;
338 private final Set
<DescriptionElementSource
> sources
= new HashSet
<>();
340 public StatusAndSources(PresenceAbsenceTerm status
, DescriptionElementBase deb
, AggregationSourceMode aggregationSourceMode
) {
341 this.status
= status
;
342 if (aggregationSourceMode
== AggregationSourceMode
.NONE
){
344 }else if (aggregationSourceMode
== AggregationSourceMode
.DESCRIPTION
){
345 sources
.add(DescriptionElementSource
.NewAggregationInstance(deb
.getInDescription()));
346 }else if (aggregationSourceMode
== AggregationSourceMode
.TAXON
){
347 if (deb
.getInDescription().isInstanceOf(TaxonDescription
.class)){
348 TaxonDescription td
= CdmBase
.deproxy(deb
.getInDescription(), TaxonDescription
.class);
349 sources
.add(DescriptionElementSource
.NewAggregationInstance(td
.getTaxon()));
351 logger
.warn("Description is not of type TaxonDescription. Adding source not possible");
353 }else if (aggregationSourceMode
== AggregationSourceMode
.ALL
|| aggregationSourceMode
== AggregationSourceMode
.ALL_SAMEVALUE
){
354 addSourcesDeduplicated(this.sources
, deb
.getSources());
356 throw new RuntimeException("Unhandled source aggregation mode: " + aggregationSourceMode
);
360 public void addSources(Set
<DescriptionElementSource
> sources
) {
361 addSourcesDeduplicated(this.sources
, sources
);
365 public String
toString() {
366 return "StatusAndSources [status=" + status
+ ", sources=" + sources
.size() + "]";
371 protected void aggregateToParentTaxon(TaxonNode taxonNode
,
372 ResultHolder resultHolder
,
373 Set
<TaxonDescription
> excludedDescriptions
) {
375 Map
<NamedArea
, StatusAndSources
> accumulatedStatusMap
=
376 ((DistributionResultHolder
)resultHolder
).accumulatedStatusMap
;
378 Taxon taxon
= CdmBase
.deproxy(taxonNode
.getTaxon());
379 if(logger
.isDebugEnabled()){
380 logger
.debug("accumulateByRank() [" + /*rank.getLabel() +*/ "] - taxon :" + taxonToString(taxon
));
383 if(!taxonNode
.getChildNodes().isEmpty()) {
385 LinkedList
<Taxon
> childStack
= new LinkedList
<>();
386 for (TaxonNode node
: taxonNode
.getChildNodes()){
388 continue; //just in case if sortindex is broken
390 Taxon child
= CdmBase
.deproxy(node
.getTaxon());
391 //TODO maybe we should also use child catching from taxon node filter
392 // we could e.g. clone the filter and set the parent as subtree filter
393 // and this way get all children via service layer, this may improve also
395 if (getConfig().getTaxonNodeFilter().isIncludeUnpublished()||
397 childStack
.add(child
);
401 while(childStack
.size() > 0){
403 Taxon childTaxon
= childStack
.pop();
404 getSession().setReadOnly(childTaxon
, true);
405 if(logger
.isTraceEnabled()){
406 logger
.trace(" subtaxon :" + taxonToString(childTaxon
));
409 Set
<Distribution
> distributions
= distributionsFor(descriptionsFor(childTaxon
, excludedDescriptions
));
410 for(Distribution distribution
: distributions
) {
412 PresenceAbsenceTerm status
= distribution
.getStatus();
413 if (status
== null || getByRankIgnoreStatusList().contains(status
)
414 || (getConfig().isIgnoreAbsentStatusByRank() && status
.isAbsenceTerm())){
418 NamedArea area
= distribution
.getArea();
419 AggregationSourceMode aggregationSourceMode
= getConfig().getToParentSourceMode();
421 StatusAndSources childStatusAndSources
= new StatusAndSources(status
, distribution
, aggregationSourceMode
);
422 StatusAndSources preferedStatus
= choosePreferredOrMerge(accumulatedStatusMap
.get(area
),
423 childStatusAndSources
, null, aggregationSourceMode
);
424 accumulatedStatusMap
.put(area
, preferedStatus
);
427 // evict all initialized entities of the childTaxon
428 // TODO consider using cascade="evict" in the model classes
429 // for( TaxonDescription description : ((Taxon)childTaxonBase).getDescriptions()) {
430 // for (DescriptionElementBase deb : description.getElements()) {
431 // getSession().evict(deb);
433 // getSession().evict(description); // this causes in some cases the taxon object to be detached from the session
435 // getSession().evict(childTaxon); // no longer needed, save heap
440 private Distribution
findDistributionForArea(TaxonDescription description
, NamedArea area
) {
441 for(DescriptionElementBase item
: description
.getElements()) {
442 if(!(item
.isInstanceOf(Distribution
.class))) {
445 Distribution distribution
= CdmBase
.deproxy(item
, Distribution
.class);
446 if(distribution
.getArea().equals(area
)) {
454 * Old: For if we want to reuse distributions only for the exact same status or
455 * if we aggregate for each status separately. Otherwise use {@link #findDistributionForArea(TaxonDescription, NamedArea)}
457 private Distribution
findDistributionForAreaAndStatus(TaxonDescription description
, NamedArea area
, PresenceAbsenceTerm status
) {
458 for(DescriptionElementBase item
: description
.getElements()) {
459 if(!(item
.isInstanceOf(Distribution
.class))) {
462 Distribution distribution
= CdmBase
.deproxy(item
, Distribution
.class);
463 if(distribution
.getArea().equals(area
) && distribution
.getStatus().equals(status
)) {
470 private void flush() {
471 logger
.debug("flushing session ...");
472 getSession().flush();
474 logger
.debug("flushing to indexes ...");
475 Search
.getFullTextSession(getSession()).flushToIndexes();
476 } catch (HibernateException e
) {
477 /* IGNORE - Hibernate Search Event listeners not configured ... */
478 if(!e
.getMessage().startsWith("Hibernate Search Event listeners not configured")){
484 private void flushAndClear() {
486 logger
.debug("clearing session ...");
487 getSession().clear();
491 protected TaxonDescription
createNewDescription(Taxon taxon
) {
492 String title
= taxon
.getTitleCache();
493 logger
.debug("creating new description for " + title
);
494 TaxonDescription description
= TaxonDescription
.NewInstance(taxon
);
495 description
.addType(DescriptionType
.AGGREGATED_DISTRIBUTION
);
496 setDescriptionTitle(description
, taxon
);
501 protected boolean hasDescriptionType(TaxonDescription description
) {
502 return description
.isAggregatedDistribution();
506 protected void setDescriptionTitle(TaxonDescription description
, Taxon taxon
) {
507 String title
= taxon
.getName() != null? taxon
.getName().getTitleCache() : taxon
.getTitleCache();
508 description
.setTitleCache("Aggregated distribution for " + title
, true);
512 private Set
<NamedArea
> getSubAreasFor(NamedArea superArea
) {
514 if(!subAreaMap
.containsKey(superArea
)) {
515 if(logger
.isDebugEnabled()){
516 logger
.debug("loading included areas for " + superArea
.getLabel());
518 subAreaMap
.put(superArea
, superArea
.getIncludes());
520 return subAreaMap
.get(superArea
);
523 private Set
<TaxonDescription
> descriptionsFor(Taxon taxon
, Set
<TaxonDescription
> excludedDescriptions
) {
524 Set
<TaxonDescription
> result
= new HashSet
<>();
525 for(TaxonDescription description
: taxon
.getDescriptions()) {
526 // readOnlyIfInSession(description); //not needed for tests anymore
527 if (excludedDescriptions
== null || !excludedDescriptions
.contains(description
)){
528 result
.add(description
);
534 private Set
<Distribution
> distributionsFor(Set
<TaxonDescription
> descriptions
) {
535 Set
<Distribution
> result
= new HashSet
<>();
536 for(TaxonDescription description
: descriptions
) {
537 for(DescriptionElementBase deb
: description
.getElements()) {
538 if(deb
.isInstanceOf(Distribution
.class)) {
539 // readOnlyIfInSession(deb); //not needed for tests anymore
540 result
.add(CdmBase
.deproxy(deb
, Distribution
.class));
548 * This method avoids problems when running the {@link DistributionAggregationTest}.
549 * For some unknown reason entities are not in the PersitenceContext even if they are
550 * loaded by a service method. Setting these entities to read-only would raise a
551 * TransientObjectException("Instance was not associated with this persistence context")
555 private void readOnlyIfInSession(CdmBase entity
) {
556 if(getSession().contains(entity
)) {
557 getSession().setReadOnly(entity
, true);
562 private String
termToString(OrderedTermBase
<?
> term
) {
563 if(logger
.isTraceEnabled()) {
564 return term
.getLabel() + " [" + term
.getIdInVocabulary() + "]";
566 return term
.getIdInVocabulary();
571 * Sets the priorities for presence and absence terms, the priorities are stored in extensions.
572 * This method will start a new transaction and commits it after the work is done.
574 private void makeStatusOrder() {
576 TransactionStatus txStatus
= startTransaction(false);
578 @SuppressWarnings("rawtypes")
579 TermCollection
<PresenceAbsenceTerm
, TermNode
> stOrder
= getConfig().getStatusOrder();
580 if (stOrder
== null){
581 stOrder
= defaultStatusOrder();
583 if (stOrder
.isInstanceOf(TermTree
.class)){
584 statusOrder
= CdmBase
.deproxy(stOrder
, TermTree
.class).asTermList();
585 }else if (stOrder
.isInstanceOf(OrderedTermVocabulary
.class)){
586 statusOrder
= new ArrayList
<>(CdmBase
.deproxy(stOrder
, OrderedTermVocabulary
.class).getOrderedTerms());
588 throw new RuntimeException("TermCollection type for status order not supported: " + statusOrder
.getClass().getSimpleName());
591 commitTransaction(txStatus
);
594 private OrderedTermVocabulary
<PresenceAbsenceTerm
> defaultStatusOrder() {
595 @SuppressWarnings("unchecked")
596 OrderedTermVocabulary
<PresenceAbsenceTerm
> voc
= (OrderedTermVocabulary
<PresenceAbsenceTerm
>)getRepository().getVocabularyService().find(VocabularyEnum
.PresenceAbsenceTerm
.getUuid());
600 private void replaceSources(Distribution distribution
, Set
<DescriptionElementSource
> newSources
) {
601 Set
<DescriptionElementSource
> toDeleteSources
= new HashSet
<>(distribution
.getSources());
602 for(DescriptionElementSource newSource
: newSources
) {
603 boolean contained
= false;
604 for(DescriptionElementSource existingSource
: distribution
.getSources()) {
605 if(existingSource
.equalsByShallowCompare(newSource
)) {
607 toDeleteSources
.remove(existingSource
);
613 distribution
.addSource(newSource
.clone());
614 } catch (CloneNotSupportedException e
) {
615 // should never happen
616 throw new RuntimeException(e
);
620 for (DescriptionElementSource toDeleteSource
: toDeleteSources
){
621 distribution
.removeSource(toDeleteSource
);