2 * Copyright (C) 2019 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.api
.service
.description
;
11 import java
.math
.BigDecimal
;
12 import java
.math
.MathContext
;
13 import java
.util
.ArrayList
;
14 import java
.util
.Comparator
;
15 import java
.util
.HashMap
;
16 import java
.util
.HashSet
;
17 import java
.util
.List
;
19 import java
.util
.Optional
;
21 import java
.util
.stream
.Collectors
;
23 import eu
.etaxonomy
.cdm
.common
.BigDecimalUtil
;
24 import eu
.etaxonomy
.cdm
.common
.monitor
.IProgressMonitor
;
25 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
26 import eu
.etaxonomy
.cdm
.model
.common
.ICdmBase
;
27 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
28 import eu
.etaxonomy
.cdm
.model
.description
.CategoricalData
;
29 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionBase
;
30 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
31 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionType
;
32 import eu
.etaxonomy
.cdm
.model
.description
.DescriptiveDataSet
;
33 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
34 import eu
.etaxonomy
.cdm
.model
.description
.IDescribable
;
35 import eu
.etaxonomy
.cdm
.model
.description
.IndividualsAssociation
;
36 import eu
.etaxonomy
.cdm
.model
.description
.QuantitativeData
;
37 import eu
.etaxonomy
.cdm
.model
.description
.SpecimenDescription
;
38 import eu
.etaxonomy
.cdm
.model
.description
.State
;
39 import eu
.etaxonomy
.cdm
.model
.description
.StateData
;
40 import eu
.etaxonomy
.cdm
.model
.description
.StatisticalMeasure
;
41 import eu
.etaxonomy
.cdm
.model
.description
.StatisticalMeasurementValue
;
42 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
43 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationBase
;
44 import eu
.etaxonomy
.cdm
.model
.reference
.ICdmTarget
;
45 import eu
.etaxonomy
.cdm
.model
.reference
.OriginalSourceType
;
46 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
47 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
50 * Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
52 * For all {@link SpecimenDescription}s belonging to this data set a new
53 * aggregated {@link TaxonDescription}s are created for every taxon the
54 * specimens are directly associated with.<BR>
55 * Also lower rank taxon descriptions are aggregated to upper rank taxa.
61 public class StructuredDescriptionAggregation
62 extends DescriptionAggregationBase
<StructuredDescriptionAggregation
, StructuredDescriptionAggregationConfiguration
>{
64 private DescriptiveDataSet dataSet
;
67 protected String
pluralDataType(){
68 return "structured descriptive data";
72 protected void preAggregate(IProgressMonitor monitor
) {
73 monitor
.subTask("preAccumulate - nothing to do");
75 // take start time for performance testing
76 double start
= System
.currentTimeMillis();
78 getResult().setCdmEntity(getDescriptiveDatasetService().load(getConfig().getDatasetUuid()));
80 double end1
= System
.currentTimeMillis();
81 logger
.info("Time elapsed for pre-accumulate() : " + (end1
- start
) / (1000) + "s");
85 protected void verifyConfiguration(IProgressMonitor monitor
){
86 if (!AggregationSourceMode
.list(AggregationMode
.ToParent
, AggregationType
.StructuredDescription
)
87 .contains(getConfig().getToParentSourceMode())){
88 throw new AggregationException("Unsupported source mode for to-parent aggregation: " + getConfig().getToParentSourceMode());
90 if (!AggregationSourceMode
.list(AggregationMode
.WithinTaxon
, AggregationType
.StructuredDescription
)
91 .contains(getConfig().getWithinTaxonSourceMode())){
92 throw new AggregationException("Unsupported source mode for within-taxon aggregation: " + getConfig().getWithinTaxonSourceMode());
96 private boolean hasCharacterData(DescriptionElementBase element
) {
97 return hasCategoricalData(element
) || hasQuantitativeData(element
);
100 private boolean hasQuantitativeData(DescriptionElementBase element
) {
101 if(element
instanceof QuantitativeData
102 && !((QuantitativeData
) element
).getStatisticalValues().isEmpty()){
103 QuantitativeData quantitativeData
= (QuantitativeData
)element
;
104 return !getExactValues(quantitativeData
).isEmpty()
105 || quantitativeData
.getMin()!=null
106 || quantitativeData
.getMax()!=null;
111 private boolean hasCategoricalData(DescriptionElementBase element
) {
112 return element
instanceof CategoricalData
&& !((CategoricalData
) element
).getStatesOnly().isEmpty();
116 protected void setDescriptionTitle(TaxonDescription description
, Taxon taxon
) {
117 String title
= taxon
.getName() != null? taxon
.getName().getTitleCache() : taxon
.getTitleCache();
118 description
.setTitleCache("Aggregated description for " + title
, true);
123 protected TaxonDescription
createNewDescription(Taxon taxon
) {
124 String title
= taxon
.getTitleCache();
125 if (logger
.isDebugEnabled()){logger
.debug("creating new description for " + title
);}
126 TaxonDescription description
= TaxonDescription
.NewInstance(taxon
);
127 description
.addType(DescriptionType
.AGGREGATED_STRUC_DESC
);
128 setDescriptionTitle(description
, taxon
);
133 protected boolean hasDescriptionType(TaxonDescription description
) {
134 return dataSet
.getDescriptions().contains(description
) && description
.isAggregatedStructuredDescription();
138 protected List
<String
> descriptionInitStrategy() {
139 return new ArrayList
<>();
143 protected void addAggregationResultToDescription(TaxonDescription targetDescription
,
144 ResultHolder resultHolder
) {
146 StructuredDescriptionResultHolder structuredResultHolder
= (StructuredDescriptionResultHolder
)resultHolder
;
147 mergeDescriptionElements(targetDescription
, structuredResultHolder
.categoricalMap
, CategoricalData
.class);
148 mergeDescriptionElements(targetDescription
, structuredResultHolder
.quantitativeMap
, QuantitativeData
.class);
149 addAggregationSources(targetDescription
, structuredResultHolder
);
151 if(!targetDescription
.getElements().isEmpty()){
152 dataSet
.addDescription(targetDescription
);
154 dataSet
.removeDescription(targetDescription
);
158 private <T
extends DescriptionBase
<?
>> void addAggregationSources(TaxonDescription targetDescription
,
159 StructuredDescriptionResultHolder structuredResultHolder
) {
161 //Remove sources from description
162 Set
<IdentifiableSource
> sourcesToRemove
= targetDescription
.getSources().stream()
163 .filter(source
->source
.getType().equals(OriginalSourceType
.Aggregation
))
164 .collect(Collectors
.toSet());
166 Set
<IdentifiableSource
> newSources
= structuredResultHolder
.sources
;
167 for (IdentifiableSource newSource
: newSources
) {
168 IdentifiableSource mergeSourceCandidate
= findSourceCandidate(targetDescription
, newSource
);
169 if (mergeSourceCandidate
== null){
170 addNewSource(targetDescription
, newSource
);
172 mergeSource(mergeSourceCandidate
, newSource
);
173 sourcesToRemove
.remove(mergeSourceCandidate
);
177 //remove remaining sources-to-be-removed
178 for (IdentifiableSource sourceToRemove
: sourcesToRemove
) {
179 targetDescription
.removeSource(sourceToRemove
);
180 ICdmTarget target
= sourceToRemove
.getCdmSource();
182 if (target
.isInstanceOf(DescriptionBase
.class)){
183 @SuppressWarnings("unchecked")
184 T descriptionToDelete
= ((T
)sourceToRemove
.getCdmSource());
185 ((IDescribable
<T
>)descriptionToDelete
.describedEntity()).removeDescription(descriptionToDelete
);
186 structuredResultHolder
.descriptionsToDelete
.add(descriptionToDelete
);
187 }else if (target
.isInstanceOf(Taxon
.class)){
188 //nothing to do for now
190 throw new AggregationException("CdmLink target type not yet supported: " + target
.getClass().getSimpleName());
196 private <T
extends DescriptionBase
<?
>> void addNewSource(TaxonDescription targetDescription
,
197 IdentifiableSource newSource
) {
200 targetDescription
.addSource(newSource
);
201 //if it is a description add it to the described entity (specimen, taxon)
202 ICdmBase target
= newSource
.getCdmSource();
204 if (target
.isInstanceOf(DescriptionBase
.class)){
205 @SuppressWarnings("unchecked")
206 T description
= (T
)CdmBase
.deproxy(target
);
207 ((IDescribable
<T
>)description
.describedEntity()).addDescription(description
);
212 //mergeablity has been checked before
213 private <T
extends DescriptionBase
<?
>> void mergeSource(IdentifiableSource mergeCandidate
, IdentifiableSource newSource
) {
215 ICdmBase newTarget
= newSource
.getCdmSource();
216 if (newTarget
!= null){
217 newTarget
= CdmBase
.deproxy(newTarget
);
218 if (newTarget
instanceof DescriptionBase
){
219 @SuppressWarnings("unchecked")
220 T newTargetDesc
= (T
)newTarget
;
221 @SuppressWarnings("unchecked")
222 T existingTargetDesc
= CdmBase
.deproxy((T
)mergeCandidate
.getCdmSource());
223 mergeSourceDescription(existingTargetDesc
, newTargetDesc
);
224 ((IDescribable
<T
>)existingTargetDesc
.describedEntity()).addDescription(existingTargetDesc
);
225 ((IDescribable
<T
>)newTargetDesc
.describedEntity()).removeDescription(newTargetDesc
);
226 }else if (newTarget
instanceof Taxon
){
227 //nothing to do for now (we do not support reuse of sources linking to different taxa yet)
229 throw new AggregationException("Sources not linking to a description or a taxon instance currently not yet supported.");
232 throw new AggregationException("Sources not linking to another CdmBase instance currently not yet supported.");
236 private <T
extends DescriptionBase
<?
>> void mergeSourceDescription(T existingSourceDescription
, T newSourceDescription
) {
238 Set
<DescriptionElementBase
> elementsToRemove
= new HashSet
<>(existingSourceDescription
.getElements());
239 Set
<DescriptionElementBase
> newElements
= new HashSet
<>(newSourceDescription
.getElements());
240 for (DescriptionElementBase newElement
: newElements
){
241 DescriptionElementBase newElementClone
= newElement
.clone();
242 Optional
<DescriptionElementBase
> matchingElement
= elementsToRemove
.stream()
243 .filter(e
->e
.getFeature()!= null
244 && e
.getFeature().equals(newElementClone
.getFeature()))
246 if (matchingElement
.isPresent()){
247 mergeDescriptionElement(matchingElement
.get(), newElementClone
);
248 elementsToRemove
.remove(matchingElement
.get());
250 existingSourceDescription
.addElement(newElementClone
);
253 addSourceDescriptionToDescribedEntity(newSourceDescription
);
254 existingSourceDescription
.setTitleCache(newSourceDescription
.getTitleCache(), true);
256 for (DescriptionElementBase debToRemove
: elementsToRemove
){
257 existingSourceDescription
.removeElement(debToRemove
);
262 @SuppressWarnings("unchecked")
263 private <T
extends DescriptionBase
<?
>> void addSourceDescriptionToDescribedEntity(T sourceDescription
) {
264 ((IDescribable
<T
>)sourceDescription
.describedEntity()).addDescription(sourceDescription
);
266 @SuppressWarnings("unchecked")
267 private <T
extends DescriptionBase
<?
>> void removeSourceDescriptionFromDescribedEntity(T sourceDescription
) {
268 ((IDescribable
<T
>)sourceDescription
.describedEntity()).removeDescription(sourceDescription
);
271 private IdentifiableSource
findSourceCandidate(TaxonDescription targetDescription
, IdentifiableSource newSource
) {
272 for (IdentifiableSource existingSource
: targetDescription
.getSources()){
273 boolean isCandidate
= isCandidateForSourceReuse(existingSource
, newSource
);
275 return existingSource
;
281 private boolean isCandidateForSourceReuse(IdentifiableSource existingSource
, IdentifiableSource newSource
) {
282 if (newSource
.getCdmSource()!= null){
283 if (existingSource
.getCdmSource() == null){
286 ICdmBase newTarget
= CdmBase
.deproxy(newSource
.getCdmSource());
287 ICdmBase existingTarget
= CdmBase
.deproxy((CdmBase
)existingSource
.getCdmSource());
288 if (!newTarget
.getClass().equals(existingTarget
.getClass())){
291 if (newTarget
instanceof SpecimenDescription
){
292 SpecimenOrObservationBase
<?
> newSob
= ((SpecimenDescription
)newTarget
).getDescribedSpecimenOrObservation();
293 SpecimenOrObservationBase
<?
> existingSob
= ((SpecimenDescription
)existingTarget
).getDescribedSpecimenOrObservation();
294 //for now reuse is possible if both are descriptions for the same specimen
295 return newSob
!= null && newSob
.equals(existingSob
);
296 }else if (newTarget
instanceof TaxonDescription
){
297 Taxon newTaxon
= ((TaxonDescription
)newTarget
).getTaxon();
298 Taxon existingTaxon
= ((TaxonDescription
)existingTarget
).getTaxon();
299 //for now reuse is possible if both are descriptions for the same taxon
300 return newTaxon
!= null && newTaxon
.equals(existingTaxon
);
301 }else if (newTarget
instanceof Taxon
){
302 return newTarget
.equals(existingTarget
);
304 throw new AggregationException("Other classes then SpecimenDescription and TaxonDescription are not yet supported. But was: " + newTarget
.getClass());
313 private <T
extends DescriptionBase
<?
>> T
cloneNewSourceDescription(T newSourceDescription
) {
314 @SuppressWarnings("unchecked")
315 T clonedDescription
= (T
)newSourceDescription
.clone();
316 clonedDescription
.removeDescriptiveDataSet(dataSet
);
317 clonedDescription
.getTypes().add(DescriptionType
.CLONE_FOR_SOURCE
);
318 clonedDescription
.setTitleCache("Clone: " + clonedDescription
.getTitleCache(), true);
319 return clonedDescription
;
322 private <S
extends DescriptionElementBase
> void mergeDescriptionElements(TaxonDescription targetDescription
,
323 Map
<Feature
, ?
extends DescriptionElementBase
> newElementsMap
, Class
<?
extends DescriptionElementBase
> debClass
) {
325 Set
<DescriptionElementBase
> elementsToRemove
= new HashSet
<>(
326 targetDescription
.getElements().stream()
327 .filter(el
->el
.isInstanceOf(debClass
))
328 .collect(Collectors
.toSet()));
330 //for each character in "characters of new elements"
331 for (Feature characterNew
: newElementsMap
.keySet()) {
333 //if elements for this character exist in old data, remember any of them to keep (in clean data there should be only max. 1
334 DescriptionElementBase elementToStay
= null;
335 for (DescriptionElementBase existingDeb
: elementsToRemove
) {
336 if(existingDeb
.getFeature().equals(characterNew
)){
337 elementToStay
= existingDeb
;
338 elementsToRemove
.remove(existingDeb
);
343 //if there is no element for this character in old data, add the new element for this character to the target description (otherwise reuse old element)
344 if (elementToStay
== null){
345 targetDescription
.addElement(newElementsMap
.get(characterNew
));
347 mergeDescriptionElement(elementToStay
, newElementsMap
.get(characterNew
));
351 //remove all elements not needed anymore
352 for(DescriptionElementBase elementToRemove
: elementsToRemove
){
353 targetDescription
.removeElement(elementToRemove
);
357 private void mergeDescriptionElement(DescriptionElementBase targetElement
,
358 DescriptionElementBase newElement
) {
360 targetElement
= CdmBase
.deproxy(targetElement
);
361 newElement
= CdmBase
.deproxy(newElement
);
362 if (targetElement
instanceof CategoricalData
){
363 mergeDescriptionElement((CategoricalData
)targetElement
, (CategoricalData
)newElement
);
364 }else if (targetElement
.isInstanceOf(QuantitativeData
.class)){
365 mergeDescriptionElement((QuantitativeData
)targetElement
, (QuantitativeData
)newElement
);
367 throw new AggregationException("Class not supported: " + targetElement
.getClass().getName());
371 private void mergeDescriptionElement(CategoricalData elementToStay
,
372 CategoricalData newElement
) {
373 List
<StateData
> oldData
= new ArrayList
<>(elementToStay
.getStateData());
374 List
<StateData
> newData
= new ArrayList
<>(newElement
.getStateData());
375 for (StateData newStateData
: newData
){
376 State state
= newStateData
.getState();
377 StateData oldStateData
= firstByState(state
, oldData
);
378 if (oldStateData
!= null){
379 //for now only state and count is used for aggregation, below code needs to be adapted if this changes
380 oldStateData
.setCount(newStateData
.getCount());
381 oldData
.remove(oldStateData
);
383 elementToStay
.addStateData(newStateData
);
386 for (StateData stateDataToRemove
: oldData
){
387 elementToStay
.removeStateData(stateDataToRemove
);
391 private StateData
firstByState(State state
, List
<StateData
> oldData
) {
395 for (StateData sd
: oldData
){
396 if (state
.equals(sd
.getState())){
403 private void mergeDescriptionElement(QuantitativeData elementToStay
,
404 QuantitativeData newElement
) {
405 Set
<StatisticalMeasurementValue
> oldValues
= new HashSet
<>(elementToStay
.getStatisticalValues());
406 Set
<StatisticalMeasurementValue
> newValues
= new HashSet
<>(newElement
.getStatisticalValues());
407 for (StatisticalMeasurementValue newValue
: newValues
){
408 StatisticalMeasure type
= newValue
.getType();
409 StatisticalMeasurementValue oldValue
= firstValueByType(type
, oldValues
);
410 if (oldValue
!= null){
411 //for now only state and count is used for aggregation, below code needs to be adapted if this changes
412 oldValue
.setValue(newValue
.getValue());
413 oldValues
.remove(oldValue
);
415 elementToStay
.addStatisticalValue(newValue
);
418 for (StatisticalMeasurementValue valueToRemove
: oldValues
){
419 elementToStay
.removeStatisticalValue(valueToRemove
);
423 private StatisticalMeasurementValue
firstValueByType(StatisticalMeasure type
, Set
<StatisticalMeasurementValue
> oldValues
) {
427 for (StatisticalMeasurementValue value
: oldValues
){
428 if (type
.equals(value
.getType())){
436 protected void initTransaction() {
437 dataSet
= getDescriptiveDatasetService().load(getConfig().getDatasetUuid());
441 protected void removeDescriptionIfEmpty(TaxonDescription description
, ResultHolder resultHolder
) {
442 super.removeDescriptionIfEmpty(description
, resultHolder
);
443 if (description
.getElements().isEmpty()){
444 dataSet
.removeDescription(description
);
449 protected void aggregateToParentTaxon(TaxonNode taxonNode
,
450 ResultHolder resultHolder
,
451 Set
<TaxonDescription
> excludedDescriptions
) {
452 StructuredDescriptionResultHolder descriptiveResultHolder
= (StructuredDescriptionResultHolder
)resultHolder
;
453 Set
<TaxonDescription
> childDescriptions
= getChildTaxonDescriptions(taxonNode
, dataSet
);
454 addDescriptionToResultHolder(descriptiveResultHolder
, childDescriptions
, AggregationMode
.ToParent
);
458 protected void aggregateWithinSingleTaxon(Taxon taxon
,
459 ResultHolder resultHolder
,
460 Set
<TaxonDescription
> excludedDescriptions
) {
461 StructuredDescriptionResultHolder descriptiveResultHolder
= (StructuredDescriptionResultHolder
)resultHolder
;
462 Set
<SpecimenDescription
> specimenDescriptions
= getSpecimenDescriptions(taxon
, dataSet
);
463 addDescriptionToResultHolder(descriptiveResultHolder
, specimenDescriptions
, AggregationMode
.WithinTaxon
);
464 if (getConfig().isIncludeLiterature()){
465 Set
<TaxonDescription
> literatureDescriptions
= getLiteratureDescriptions(taxon
, dataSet
);
466 addDescriptionToResultHolder(descriptiveResultHolder
, literatureDescriptions
, AggregationMode
.WithinTaxon
);
468 //TODO add default descriptions
473 private void addDescriptionToResultHolder(StructuredDescriptionResultHolder descriptiveResultHolder
,
474 Set
<?
extends DescriptionBase
<?
>> specimenLiteraturOrDefaultDescriptions
,
475 AggregationMode aggregationMode
) {
477 boolean descriptionWasUsed
= false;
478 for (DescriptionBase
<?
> desc
: specimenLiteraturOrDefaultDescriptions
){
479 for (DescriptionElementBase deb
: desc
.getElements()){
480 if (hasCharacterData(deb
)){
481 if (deb
.isInstanceOf(CategoricalData
.class)){
482 addToCategorical(CdmBase
.deproxy(deb
, CategoricalData
.class), descriptiveResultHolder
);
483 descriptionWasUsed
= true;
484 }else if (deb
.isInstanceOf(QuantitativeData
.class)){
485 addToQuantitativData(CdmBase
.deproxy(deb
, QuantitativeData
.class), descriptiveResultHolder
);
486 descriptionWasUsed
= true;
492 AggregationSourceMode sourceMode
= getConfig().getSourceMode(aggregationMode
);
493 if(descriptionWasUsed
&& sourceMode
!= AggregationSourceMode
.NONE
){
494 IdentifiableSource source
= IdentifiableSource
.NewAggregationSourceInstance();
495 desc
= CdmBase
.deproxy(desc
);
499 DescriptionBase
<?
> clonedDesc
= cloneNewSourceDescription(desc
);
500 source
.setCdmSource(clonedDesc
);
503 if (desc
instanceof TaxonDescription
){
504 Taxon taxon
= ((TaxonDescription
) desc
).getTaxon();
505 source
.setCdmSource(taxon
);
507 throw new AggregationException("Description type not yet supported for aggregation source mode TAXON: " + desc
.getClass().getSimpleName() );
513 case ALL
: //not yet supported
514 throw new AggregationException("Source mode not yet supported: " + sourceMode
);
515 case ALL_SAMEVALUE
: //makes no sense
516 throw new AggregationException("Illegal source mode: " + sourceMode
);
518 throw new AggregationException("Source mode not supported: " + sourceMode
);
521 descriptiveResultHolder
.sources
.add(source
);
527 private void addToQuantitativData(QuantitativeData qd
, StructuredDescriptionResultHolder resultHolder
) {
528 QuantitativeData aggregatedQuantitativeData
= resultHolder
.quantitativeMap
.get(qd
.getFeature());
529 if(aggregatedQuantitativeData
==null){
530 // no QuantitativeData with this feature in aggregation
531 aggregatedQuantitativeData
= aggregateWithinQuantitativeData(qd
);
534 aggregatedQuantitativeData
= addToExistingQuantitativeData(aggregatedQuantitativeData
, qd
);
536 if (aggregatedQuantitativeData
!= null){
537 resultHolder
.quantitativeMap
.put(qd
.getFeature(), aggregatedQuantitativeData
);
541 private void addToCategorical(CategoricalData cd
, StructuredDescriptionResultHolder resultHolder
) {
542 CategoricalData aggregatedCategoricalData
= resultHolder
.categoricalMap
.get(cd
.getFeature());
543 if(aggregatedCategoricalData
==null){
544 // no CategoricalData with this feature in aggregation
545 aggregatedCategoricalData
= cd
.clone();
546 // set count to 1 if not set
547 aggregatedCategoricalData
.getStateData().stream().filter(sd
->sd
.getCount()==null).forEach(sd
->sd
.incrementCount());
548 resultHolder
.categoricalMap
.put(aggregatedCategoricalData
.getFeature(), aggregatedCategoricalData
);
551 // split all StateData into those where the state already exists and those where it doesn't
552 List
<State
> statesOnly
= aggregatedCategoricalData
.getStatesOnly();
553 List
<StateData
> sdWithExistingStateInAggregation
= cd
.getStateData().stream().filter(sd
->statesOnly
.contains(sd
.getState())).collect(Collectors
.toList());
554 List
<StateData
> sdWithNoExistingStateInAggregation
= cd
.getStateData().stream().filter(sd
->!statesOnly
.contains(sd
.getState())).collect(Collectors
.toList());
556 for (StateData sd
: sdWithNoExistingStateInAggregation
) {
557 StateData clone
= sd
.clone();
558 // set count to 1 if not set
559 if(clone
.getCount()==null){
560 clone
.incrementCount();
562 aggregatedCategoricalData
.addStateData(clone
);
565 for (StateData sdExist
: sdWithExistingStateInAggregation
) {
566 List
<StateData
> aggregatedSameStateData
= aggregatedCategoricalData
.getStateData().stream()
567 .filter(sd
->hasSameState(sdExist
, sd
))
568 .collect(Collectors
.toList());
569 for (StateData stateData
: aggregatedSameStateData
) {
570 if(sdExist
.getCount()==null){
571 stateData
.incrementCount();
574 stateData
.setCount(stateData
.getCount()+sdExist
.getCount());
582 protected StructuredDescriptionResultHolder
createResultHolder() {
583 return new StructuredDescriptionResultHolder();
586 private class StructuredDescriptionResultHolder
extends ResultHolder
{
587 private Map
<Feature
, CategoricalData
> categoricalMap
= new HashMap
<>();
588 private Map
<Feature
, QuantitativeData
> quantitativeMap
= new HashMap
<>();
589 private Set
<IdentifiableSource
> sources
= new HashSet
<>();
591 public String
toString() {
592 return "SDResultHolder [categoricals=" + categoricalMap
.size()
593 + ", quantitatives=" + quantitativeMap
.size()
594 + ", sources=" + sources
.size()
595 + ", descriptionsToDelete=" + this.descriptionsToDelete
.size()
600 private Set
<TaxonDescription
> getChildTaxonDescriptions(TaxonNode taxonNode
, DescriptiveDataSet dataSet
) {
601 Set
<TaxonDescription
> result
= new HashSet
<>();
602 List
<TaxonNode
> childNodes
= taxonNode
.getChildNodes();
603 for (TaxonNode childNode
: childNodes
) {
604 Set
<TaxonDescription
> childDescriptions
= childNode
.getTaxon().getDescriptions();
605 result
.addAll(childDescriptions
.stream()
606 .filter(desc
->desc
.getTypes().contains(DescriptionType
.AGGREGATED_STRUC_DESC
))
607 .filter(desc
->dataSet
.getDescriptions().contains(desc
))
608 .collect(Collectors
.toSet()));
614 * Computes all specimen attached to the given taxon within the given dataSet.
615 * For these secimen it returns all attache
617 private Set
<SpecimenDescription
> getSpecimenDescriptions(Taxon taxon
, DescriptiveDataSet dataSet
) {
618 Set
<SpecimenDescription
> result
= new HashSet
<>();
619 //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
620 for (TaxonDescription taxonDesc
: taxon
.getDescriptions()){
621 for (DescriptionElementBase taxonDeb
: taxonDesc
.getElements()){
622 if (taxonDeb
.isInstanceOf(IndividualsAssociation
.class)){
623 IndividualsAssociation indAss
= CdmBase
.deproxy(taxonDeb
, IndividualsAssociation
.class);
624 SpecimenOrObservationBase
<?
> specimen
= indAss
.getAssociatedSpecimenOrObservation();
625 Set
<SpecimenDescription
> descriptions
= specimen
.getSpecimenDescriptions();
626 for(SpecimenDescription specimenDescription
: descriptions
){
627 if(dataSet
.getDescriptions().contains(specimenDescription
) &&
628 specimenDescription
.getTypes().stream().noneMatch(type
->type
.equals(DescriptionType
.CLONE_FOR_SOURCE
))){
629 result
.add(specimenDescription
);
638 private Set
<TaxonDescription
> getLiteratureDescriptions(Taxon taxon
, DescriptiveDataSet dataSet
) {
639 Set
<TaxonDescription
> result
= new HashSet
<>();
640 //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
641 for(TaxonDescription taxonDescription
: taxon
.getDescriptions()){
642 if(dataSet
.getDescriptions().contains(taxonDescription
)
643 && taxonDescription
.getTypes().stream().anyMatch(type
->type
.equals(DescriptionType
.SECONDARY_DATA
))
644 && taxonDescription
.getTypes().stream().noneMatch(type
->type
.equals(DescriptionType
.CLONE_FOR_SOURCE
)) ){
645 result
.add(taxonDescription
);
652 * Evaluates statistics for exact values collection and handles missing min and max values
654 private QuantitativeData
aggregateWithinQuantitativeData(QuantitativeData sourceQd
){
655 QuantitativeData aggQD
= QuantitativeData
.NewInstance(sourceQd
.getFeature());
656 aggQD
.setUnit(sourceQd
.getUnit());
657 Set
<BigDecimal
> exactValues
= sourceQd
.getExactValues();
658 if(!exactValues
.isEmpty()){
659 // qd is not already aggregated
660 Comparator
<BigDecimal
> comp
= Comparator
.naturalOrder();
661 int exactValueSampleSize
= exactValues
.size();
662 BigDecimal exactValueMin
= exactValues
.stream().min(comp
).get();
663 BigDecimal exactValueMax
= exactValues
.stream().max(comp
).get();
664 BigDecimal exactValueAvg
= BigDecimalUtil
.average(exactValues
);
665 //TODO also check for typical boundary data
666 if(sourceQd
.getMin() == null && sourceQd
.getMax() == null){
667 aggQD
.setSampleSize(new BigDecimal(exactValueSampleSize
), null);
668 aggQD
.setAverage(exactValueAvg
, null);
670 aggQD
.setMinimum(sourceQd
.getMin() == null ? exactValueMin
: sourceQd
.getMin().min(exactValueMin
), null);
671 aggQD
.setMaximum(sourceQd
.getMax() == null ? exactValueMax
: sourceQd
.getMax().max(exactValueMax
), null);
674 // qd has only min, max, ... but no exact values
675 aggQD
= sourceQd
.clone();
676 aggQD
= handleMissingValues(aggQD
);
681 private QuantitativeData
handleMissingValues(QuantitativeData qd
) {
683 qd
= handleMissingMinOrMax(qd
);
685 if (qd
!= null && qd
.getAverage() == null){
686 BigDecimal n
= qd
.getSampleSize();
687 if(n
!= null && !n
.equals(0f
)){
688 BigDecimal average
= (qd
.getMax().add(qd
.getMin())).divide(n
);
689 qd
.setAverage(average
, null);
695 private QuantitativeData
handleMissingMinOrMax(QuantitativeData qd
) {
696 return handleMissingMinOrMax(qd
, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
699 public static QuantitativeData
handleMissingMinOrMax(QuantitativeData aggQD
, MissingMinimumMode missingMinMode
,
700 MissingMaximumMode missingMaxMode
) {
701 if(aggQD
.getMin() == null && aggQD
.getMax() != null){
702 if (missingMinMode
== MissingMinimumMode
.MinToZero
) {
703 aggQD
.setMinimum(BigDecimal
.valueOf(0f
), null);
704 }else if (missingMinMode
== MissingMinimumMode
.MinToMax
){
705 aggQD
.setMinimum(aggQD
.getMax(), null);
706 }else if (missingMinMode
== MissingMinimumMode
.SkipRecord
){
710 if(aggQD
.getMax() == null && aggQD
.getMin() != null){
711 if (missingMaxMode
== MissingMaximumMode
.MaxToMin
){
712 aggQD
.setMaximum(aggQD
.getMin(), null);
713 }else if (missingMaxMode
== MissingMaximumMode
.SkipRecord
){
720 private QuantitativeData
addToExistingQuantitativeData(QuantitativeData aggQd
, QuantitativeData newQd
) {
722 newQd
= aggregateWithinQuantitativeData(newQd
); //alternatively we could check, if newQd is already basically aggregated, but for this we need a clear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
724 BigDecimal min
= null;
725 BigDecimal max
= null;
726 BigDecimal average
= null;
727 BigDecimal sampleSize
= null;
728 newQd
= handleMissingValues(newQd
);
732 min
= aggQd
.getMin().min(newQd
.getMin());
733 max
= aggQd
.getMax().max(newQd
.getMax());
734 if (newQd
.getSampleSize() != null && aggQd
.getSampleSize() != null){
735 sampleSize
= newQd
.getSampleSize().add(aggQd
.getSampleSize());
737 if (sampleSize
!= null && !sampleSize
.equals(0f
) && aggQd
.getAverage() != null && newQd
.getAverage() != null){
738 BigDecimal aggTotalSum
= aggQd
.getAverage().multiply(aggQd
.getSampleSize(), MathContext
.DECIMAL32
);
739 BigDecimal newTotalSum
= newQd
.getAverage().multiply(newQd
.getSampleSize(), MathContext
.DECIMAL32
);
740 BigDecimal totalSum
= aggTotalSum
.add(newTotalSum
);
741 average
= totalSum
.divide(sampleSize
, MathContext
.DECIMAL32
).stripTrailingZeros(); //to be discussed if we really want to reduce precision here, however, due to the current way to compute average we do not have exact precision anyway
743 aggQd
.setMinimum(min
, null);
744 aggQd
.setMaximum(max
, null);
745 aggQd
.setSampleSize(sampleSize
, null);
746 aggQd
.setAverage(average
, null);
750 private static List
<BigDecimal
> getExactValues(QuantitativeData qd
) {
751 List
<BigDecimal
> exactValues
= qd
.getStatisticalValues().stream()
752 .filter(value
->value
.getType().equals(StatisticalMeasure
.EXACT_VALUE()))
753 .map(exact
->exact
.getValue())
754 .collect(Collectors
.toList());
758 private static boolean hasSameState(StateData sd1
, StateData sd2
) {
759 if (sd2
.getState() == null || sd1
.getState() == null){
762 return sd2
.getState().getUuid().equals(sd1
.getState().getUuid());