ref #7980 fix concurrent modification exception
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / description / StructuredDescriptionAggregation.java
1 /**
2 * Copyright (C) 2019 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.api.service.description;
10
11 import java.math.BigDecimal;
12 import java.math.MathContext;
13 import java.util.ArrayList;
14 import java.util.Comparator;
15 import java.util.HashMap;
16 import java.util.HashSet;
17 import java.util.List;
18 import java.util.Map;
19 import java.util.Optional;
20 import java.util.Set;
21 import java.util.stream.Collectors;
22
23 import eu.etaxonomy.cdm.common.BigDecimalUtil;
24 import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
25 import eu.etaxonomy.cdm.model.common.CdmBase;
26 import eu.etaxonomy.cdm.model.common.ICdmBase;
27 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
28 import eu.etaxonomy.cdm.model.description.CategoricalData;
29 import eu.etaxonomy.cdm.model.description.DescriptionBase;
30 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
31 import eu.etaxonomy.cdm.model.description.DescriptionType;
32 import eu.etaxonomy.cdm.model.description.DescriptiveDataSet;
33 import eu.etaxonomy.cdm.model.description.Feature;
34 import eu.etaxonomy.cdm.model.description.IDescribable;
35 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
36 import eu.etaxonomy.cdm.model.description.QuantitativeData;
37 import eu.etaxonomy.cdm.model.description.SpecimenDescription;
38 import eu.etaxonomy.cdm.model.description.State;
39 import eu.etaxonomy.cdm.model.description.StateData;
40 import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
41 import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
42 import eu.etaxonomy.cdm.model.description.TaxonDescription;
43 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
44 import eu.etaxonomy.cdm.model.reference.ICdmTarget;
45 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
46 import eu.etaxonomy.cdm.model.taxon.Taxon;
47 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
48
49 /**
50 * Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
51 * <br>
52 * For all {@link SpecimenDescription}s belonging to this data set a new
53 * aggregated {@link TaxonDescription}s are created for every taxon the
54 * specimens are directly associated with.<BR>
55 * Also lower rank taxon descriptions are aggregated to upper rank taxa.
56 *
57 * @author a.mueller
58 * @author p.plitzner
59 * @since 03.11.2019
60 */
61 public class StructuredDescriptionAggregation
62 extends DescriptionAggregationBase<StructuredDescriptionAggregation, StructuredDescriptionAggregationConfiguration>{
63
64 private DescriptiveDataSet dataSet;
65
66 @Override
67 protected String pluralDataType(){
68 return "structured descriptive data";
69 }
70
71 @Override
72 protected void preAggregate(IProgressMonitor monitor) {
73 monitor.subTask("preAccumulate - nothing to do");
74
75 // take start time for performance testing
76 double start = System.currentTimeMillis();
77
78 getResult().setCdmEntity(getDescriptiveDatasetService().load(getConfig().getDatasetUuid()));
79
80 double end1 = System.currentTimeMillis();
81 logger.info("Time elapsed for pre-accumulate() : " + (end1 - start) / (1000) + "s");
82 }
83
84 @Override
85 protected void verifyConfiguration(IProgressMonitor monitor){
86 if (!AggregationSourceMode.list(AggregationMode.ToParent, AggregationType.StructuredDescription)
87 .contains(getConfig().getToParentSourceMode())){
88 throw new AggregationException("Unsupported source mode for to-parent aggregation: " + getConfig().getToParentSourceMode());
89 }
90 if (!AggregationSourceMode.list(AggregationMode.WithinTaxon, AggregationType.StructuredDescription)
91 .contains(getConfig().getWithinTaxonSourceMode())){
92 throw new AggregationException("Unsupported source mode for within-taxon aggregation: " + getConfig().getWithinTaxonSourceMode());
93 }
94 }
95
96 private boolean hasCharacterData(DescriptionElementBase element) {
97 return hasCategoricalData(element) || hasQuantitativeData(element);
98 }
99
100 private boolean hasQuantitativeData(DescriptionElementBase element) {
101 if(element instanceof QuantitativeData
102 && !((QuantitativeData) element).getStatisticalValues().isEmpty()){
103 QuantitativeData quantitativeData = (QuantitativeData)element;
104 return !getExactValues(quantitativeData).isEmpty()
105 || quantitativeData.getMin()!=null
106 || quantitativeData.getMax()!=null;
107 }
108 return false;
109 }
110
111 private boolean hasCategoricalData(DescriptionElementBase element) {
112 return element instanceof CategoricalData && !((CategoricalData) element).getStatesOnly().isEmpty();
113 }
114
115 @Override
116 protected void setDescriptionTitle(TaxonDescription description, Taxon taxon) {
117 String title = taxon.getName() != null? taxon.getName().getTitleCache() : taxon.getTitleCache();
118 description.setTitleCache("Aggregated description for " + title, true);
119 return;
120 }
121
122 @Override
123 protected TaxonDescription createNewDescription(Taxon taxon) {
124 String title = taxon.getTitleCache();
125 if (logger.isDebugEnabled()){logger.debug("creating new description for " + title);}
126 TaxonDescription description = TaxonDescription.NewInstance(taxon);
127 description.addType(DescriptionType.AGGREGATED_STRUC_DESC);
128 setDescriptionTitle(description, taxon);
129 return description;
130 }
131
132 @Override
133 protected boolean hasDescriptionType(TaxonDescription description) {
134 return dataSet.getDescriptions().contains(description) && description.isAggregatedStructuredDescription();
135 }
136
137 @Override
138 protected List<String> descriptionInitStrategy() {
139 return new ArrayList<>();
140 }
141
142 @Override
143 protected void addAggregationResultToDescription(TaxonDescription targetDescription,
144 ResultHolder resultHolder) {
145
146 StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
147 mergeDescriptionElements(targetDescription, structuredResultHolder.categoricalMap, CategoricalData.class);
148 mergeDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap, QuantitativeData.class);
149 addAggregationSources(targetDescription, structuredResultHolder);
150
151 if(!targetDescription.getElements().isEmpty()){
152 dataSet.addDescription(targetDescription);
153 }else{
154 dataSet.removeDescription(targetDescription);
155 }
156 }
157
158 private <T extends DescriptionBase<?>> void addAggregationSources(TaxonDescription targetDescription,
159 StructuredDescriptionResultHolder structuredResultHolder) {
160
161 //Remove sources from description
162 Set<IdentifiableSource> sourcesToRemove = targetDescription.getSources().stream()
163 .filter(source->source.getType().equals(OriginalSourceType.Aggregation))
164 .collect(Collectors.toSet());
165
166 Set<IdentifiableSource> newSources = structuredResultHolder.sources;
167 for (IdentifiableSource newSource : newSources) {
168 IdentifiableSource mergeSourceCandidate = findSourceCandidate(targetDescription, newSource);
169 if (mergeSourceCandidate == null){
170 addNewSource(targetDescription, newSource);
171 }else{
172 mergeSource(mergeSourceCandidate, newSource);
173 sourcesToRemove.remove(mergeSourceCandidate);
174 }
175 }
176
177 //remove remaining sources-to-be-removed
178 for (IdentifiableSource sourceToRemove : sourcesToRemove) {
179 targetDescription.removeSource(sourceToRemove);
180 ICdmTarget target = sourceToRemove.getCdmSource();
181 if (target != null){
182 if (target.isInstanceOf(DescriptionBase.class)){
183 @SuppressWarnings("unchecked")
184 T descriptionToDelete = ((T)sourceToRemove.getCdmSource());
185 ((IDescribable<T>)descriptionToDelete.describedEntity()).removeDescription(descriptionToDelete);
186 structuredResultHolder.descriptionsToDelete.add(descriptionToDelete);
187 }else if (target.isInstanceOf(Taxon.class)){
188 //nothing to do for now
189 } else {
190 throw new AggregationException("CdmLink target type not yet supported: " + target.getClass().getSimpleName());
191 }
192 }
193 }
194 }
195
196 private <T extends DescriptionBase<?>> void addNewSource(TaxonDescription targetDescription,
197 IdentifiableSource newSource) {
198
199 //add source
200 targetDescription.addSource(newSource);
201 //if it is a description add it to the described entity (specimen, taxon)
202 ICdmBase target = newSource.getCdmSource();
203 if (target != null){
204 if (target.isInstanceOf(DescriptionBase.class)){
205 @SuppressWarnings("unchecked")
206 T description = (T)CdmBase.deproxy(target);
207 ((IDescribable<T>)description.describedEntity()).addDescription(description);
208 }
209 }
210 }
211
212 //mergeablity has been checked before
213 private <T extends DescriptionBase<?>> void mergeSource(IdentifiableSource mergeCandidate, IdentifiableSource newSource) {
214
215 ICdmBase newTarget = newSource.getCdmSource();
216 if (newTarget != null){
217 newTarget = CdmBase.deproxy(newTarget);
218 if (newTarget instanceof DescriptionBase){
219 @SuppressWarnings("unchecked")
220 T newTargetDesc = (T)newTarget;
221 @SuppressWarnings("unchecked")
222 T existingTargetDesc = CdmBase.deproxy((T)mergeCandidate.getCdmSource());
223 mergeSourceDescription(existingTargetDesc, newTargetDesc);
224 ((IDescribable<T>)existingTargetDesc.describedEntity()).addDescription(existingTargetDesc);
225 ((IDescribable<T>)newTargetDesc.describedEntity()).removeDescription(newTargetDesc);
226 }else if (newTarget instanceof Taxon){
227 //nothing to do for now (we do not support reuse of sources linking to different taxa yet)
228 }else{
229 throw new AggregationException("Sources not linking to a description or a taxon instance currently not yet supported.");
230 }
231 }else{
232 throw new AggregationException("Sources not linking to another CdmBase instance currently not yet supported.");
233 }
234 }
235
236 private <T extends DescriptionBase<?>> void mergeSourceDescription(T existingSourceDescription, T newSourceDescription) {
237
238 Set<DescriptionElementBase> elementsToRemove = new HashSet<>(existingSourceDescription.getElements());
239 Set<DescriptionElementBase> newElements = new HashSet<>(newSourceDescription.getElements());
240 for (DescriptionElementBase newElement : newElements){
241 DescriptionElementBase newElementClone = newElement.clone();
242 Optional<DescriptionElementBase> matchingElement = elementsToRemove.stream()
243 .filter(e->e.getFeature()!= null
244 && e.getFeature().equals(newElementClone.getFeature()))
245 .findFirst();
246 if (matchingElement.isPresent()){
247 mergeDescriptionElement(matchingElement.get(), newElementClone);
248 elementsToRemove.remove(matchingElement.get());
249 }else{
250 existingSourceDescription.addElement(newElementClone);
251 }
252 }
253 addSourceDescriptionToDescribedEntity(newSourceDescription);
254 existingSourceDescription.setTitleCache(newSourceDescription.getTitleCache(), true);
255
256 for (DescriptionElementBase debToRemove : elementsToRemove){
257 existingSourceDescription.removeElement(debToRemove);
258 }
259
260 }
261
262 @SuppressWarnings("unchecked")
263 private <T extends DescriptionBase<?>> void addSourceDescriptionToDescribedEntity(T sourceDescription) {
264 ((IDescribable<T>)sourceDescription.describedEntity()).addDescription(sourceDescription);
265 }
266 @SuppressWarnings("unchecked")
267 private <T extends DescriptionBase<?>> void removeSourceDescriptionFromDescribedEntity(T sourceDescription) {
268 ((IDescribable<T>)sourceDescription.describedEntity()).removeDescription(sourceDescription);
269 }
270
271 private IdentifiableSource findSourceCandidate(TaxonDescription targetDescription, IdentifiableSource newSource) {
272 for (IdentifiableSource existingSource : targetDescription.getSources()){
273 boolean isCandidate = isCandidateForSourceReuse(existingSource, newSource);
274 if (isCandidate){
275 return existingSource;
276 }
277 }
278 return null;
279 }
280
281 private boolean isCandidateForSourceReuse(IdentifiableSource existingSource, IdentifiableSource newSource) {
282 if (newSource.getCdmSource()!= null){
283 if (existingSource.getCdmSource() == null){
284 return false;
285 }else {
286 ICdmBase newTarget = CdmBase.deproxy(newSource.getCdmSource());
287 ICdmBase existingTarget = CdmBase.deproxy((CdmBase)existingSource.getCdmSource());
288 if (!newTarget.getClass().equals(existingTarget.getClass())){
289 return false;
290 }else{
291 if (newTarget instanceof SpecimenDescription){
292 SpecimenOrObservationBase<?> newSob = ((SpecimenDescription)newTarget).getDescribedSpecimenOrObservation();
293 SpecimenOrObservationBase<?> existingSob = ((SpecimenDescription)existingTarget).getDescribedSpecimenOrObservation();
294 //for now reuse is possible if both are descriptions for the same specimen
295 return newSob != null && newSob.equals(existingSob);
296 }else if (newTarget instanceof TaxonDescription){
297 Taxon newTaxon = ((TaxonDescription)newTarget).getTaxon();
298 Taxon existingTaxon = ((TaxonDescription)existingTarget).getTaxon();
299 //for now reuse is possible if both are descriptions for the same taxon
300 return newTaxon != null && newTaxon.equals(existingTaxon);
301 }else if (newTarget instanceof Taxon){
302 return newTarget.equals(existingTarget);
303 }else{
304 throw new AggregationException("Other classes then SpecimenDescription and TaxonDescription are not yet supported. But was: " + newTarget.getClass());
305 }
306 }
307 }
308 }
309
310 return false;
311 }
312
313 private <T extends DescriptionBase<?>> T cloneNewSourceDescription(T newSourceDescription) {
314 @SuppressWarnings("unchecked")
315 T clonedDescription = (T)newSourceDescription.clone();
316 clonedDescription.removeDescriptiveDataSet(dataSet);
317 clonedDescription.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
318 clonedDescription.setTitleCache("Clone: " + clonedDescription.getTitleCache(), true);
319 return clonedDescription;
320 }
321
322 private <S extends DescriptionElementBase> void mergeDescriptionElements(TaxonDescription targetDescription,
323 Map<Feature, ? extends DescriptionElementBase> newElementsMap, Class<? extends DescriptionElementBase> debClass) {
324
325 Set<DescriptionElementBase> elementsToRemove = new HashSet<>(
326 targetDescription.getElements().stream()
327 .filter(el->el.isInstanceOf(debClass))
328 .collect(Collectors.toSet()));
329
330 //for each character in "characters of new elements"
331 for (Feature characterNew : newElementsMap.keySet()) {
332
333 //if elements for this character exist in old data, remember any of them to keep (in clean data there should be only max. 1
334 DescriptionElementBase elementToStay = null;
335 for (DescriptionElementBase existingDeb : elementsToRemove) {
336 if(existingDeb.getFeature().equals(characterNew)){
337 elementToStay = existingDeb;
338 elementsToRemove.remove(existingDeb);
339 break;
340 }
341 }
342
343 //if there is no element for this character in old data, add the new element for this character to the target description (otherwise reuse old element)
344 if (elementToStay == null){
345 targetDescription.addElement(newElementsMap.get(characterNew));
346 }else{
347 mergeDescriptionElement(elementToStay, newElementsMap.get(characterNew));
348 }
349 }
350
351 //remove all elements not needed anymore
352 for(DescriptionElementBase elementToRemove : elementsToRemove){
353 targetDescription.removeElement(elementToRemove);
354 }
355 }
356
357 private void mergeDescriptionElement(DescriptionElementBase targetElement,
358 DescriptionElementBase newElement) {
359
360 targetElement = CdmBase.deproxy(targetElement);
361 newElement = CdmBase.deproxy(newElement);
362 if (targetElement instanceof CategoricalData){
363 mergeDescriptionElement((CategoricalData)targetElement, (CategoricalData)newElement);
364 }else if (targetElement.isInstanceOf(QuantitativeData.class)){
365 mergeDescriptionElement((QuantitativeData)targetElement, (QuantitativeData)newElement);
366 }else{
367 throw new AggregationException("Class not supported: " + targetElement.getClass().getName());
368 }
369 }
370
371 private void mergeDescriptionElement(CategoricalData elementToStay,
372 CategoricalData newElement) {
373 List<StateData> oldData = new ArrayList<>(elementToStay.getStateData());
374 List<StateData> newData = new ArrayList<>(newElement.getStateData());
375 for (StateData newStateData : newData){
376 State state = newStateData.getState();
377 StateData oldStateData = firstByState(state, oldData);
378 if (oldStateData != null){
379 //for now only state and count is used for aggregation, below code needs to be adapted if this changes
380 oldStateData.setCount(newStateData.getCount());
381 oldData.remove(oldStateData);
382 }else{
383 elementToStay.addStateData(newStateData);
384 }
385 }
386 for (StateData stateDataToRemove : oldData){
387 elementToStay.removeStateData(stateDataToRemove);
388 }
389 }
390
391 private StateData firstByState(State state, List<StateData> oldData) {
392 if (state == null){
393 return null;
394 }
395 for (StateData sd : oldData){
396 if (state.equals(sd.getState())){
397 return sd;
398 }
399 }
400 return null;
401 }
402
403 private void mergeDescriptionElement(QuantitativeData elementToStay,
404 QuantitativeData newElement) {
405 Set<StatisticalMeasurementValue> oldValues = new HashSet<>(elementToStay.getStatisticalValues());
406 Set<StatisticalMeasurementValue> newValues = new HashSet<>(newElement.getStatisticalValues());
407 for (StatisticalMeasurementValue newValue : newValues){
408 StatisticalMeasure type = newValue.getType();
409 StatisticalMeasurementValue oldValue = firstValueByType(type, oldValues);
410 if (oldValue != null){
411 //for now only state and count is used for aggregation, below code needs to be adapted if this changes
412 oldValue.setValue(newValue.getValue());
413 oldValues.remove(oldValue);
414 }else{
415 elementToStay.addStatisticalValue(newValue);
416 }
417 }
418 for (StatisticalMeasurementValue valueToRemove : oldValues){
419 elementToStay.removeStatisticalValue(valueToRemove);
420 }
421 }
422
423 private StatisticalMeasurementValue firstValueByType(StatisticalMeasure type, Set<StatisticalMeasurementValue> oldValues) {
424 if (type == null){
425 return null;
426 }
427 for (StatisticalMeasurementValue value : oldValues){
428 if (type.equals(value.getType())){
429 return value;
430 }
431 }
432 return null;
433 }
434
435 @Override
436 protected void initTransaction() {
437 dataSet = getDescriptiveDatasetService().load(getConfig().getDatasetUuid());
438 }
439
440 @Override
441 protected void removeDescriptionIfEmpty(TaxonDescription description, ResultHolder resultHolder) {
442 super.removeDescriptionIfEmpty(description, resultHolder);
443 if (description.getElements().isEmpty()){
444 dataSet.removeDescription(description);
445 }
446 }
447
448 @Override
449 protected void aggregateToParentTaxon(TaxonNode taxonNode,
450 ResultHolder resultHolder,
451 Set<TaxonDescription> excludedDescriptions) {
452 StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
453 Set<TaxonDescription> childDescriptions = getChildTaxonDescriptions(taxonNode, dataSet);
454 addDescriptionToResultHolder(descriptiveResultHolder, childDescriptions, AggregationMode.ToParent);
455 }
456
457 @Override
458 protected void aggregateWithinSingleTaxon(Taxon taxon,
459 ResultHolder resultHolder,
460 Set<TaxonDescription> excludedDescriptions) {
461 StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
462 Set<SpecimenDescription> specimenDescriptions = getSpecimenDescriptions(taxon, dataSet);
463 addDescriptionToResultHolder(descriptiveResultHolder, specimenDescriptions, AggregationMode.WithinTaxon);
464 if (getConfig().isIncludeLiterature()){
465 Set<TaxonDescription> literatureDescriptions = getLiteratureDescriptions(taxon, dataSet);
466 addDescriptionToResultHolder(descriptiveResultHolder, literatureDescriptions, AggregationMode.WithinTaxon);
467 }
468 //TODO add default descriptions
469 //xxx
470
471 }
472
473 private void addDescriptionToResultHolder(StructuredDescriptionResultHolder descriptiveResultHolder,
474 Set<? extends DescriptionBase<?>> specimenLiteraturOrDefaultDescriptions,
475 AggregationMode aggregationMode) {
476
477 boolean descriptionWasUsed = false;
478 for (DescriptionBase<?> desc: specimenLiteraturOrDefaultDescriptions){
479 for (DescriptionElementBase deb: desc.getElements()){
480 if (hasCharacterData(deb)){
481 if (deb.isInstanceOf(CategoricalData.class)){
482 addToCategorical(CdmBase.deproxy(deb, CategoricalData.class), descriptiveResultHolder);
483 descriptionWasUsed = true;
484 }else if (deb.isInstanceOf(QuantitativeData.class)){
485 addToQuantitativData(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
486 descriptionWasUsed = true;
487 }
488 }
489 }
490
491 //sources
492 AggregationSourceMode sourceMode = getConfig().getSourceMode(aggregationMode);
493 if(descriptionWasUsed && sourceMode != AggregationSourceMode.NONE){
494 IdentifiableSource source = IdentifiableSource.NewAggregationSourceInstance();
495 desc = CdmBase.deproxy(desc);
496
497 switch (sourceMode){
498 case DESCRIPTION:
499 DescriptionBase<?> clonedDesc = cloneNewSourceDescription(desc);
500 source.setCdmSource(clonedDesc);
501 break;
502 case TAXON:
503 if (desc instanceof TaxonDescription){
504 Taxon taxon = ((TaxonDescription) desc).getTaxon();
505 source.setCdmSource(taxon);
506 }else {
507 throw new AggregationException("Description type not yet supported for aggregation source mode TAXON: " + desc.getClass().getSimpleName() );
508 }
509 break;
510 case NONE:
511 source = null;
512 break;
513 case ALL: //not yet supported
514 throw new AggregationException("Source mode not yet supported: " + sourceMode);
515 case ALL_SAMEVALUE: //makes no sense
516 throw new AggregationException("Illegal source mode: " + sourceMode);
517 default:
518 throw new AggregationException("Source mode not supported: " + sourceMode);
519 }
520 if (source != null){
521 descriptiveResultHolder.sources.add(source);
522 }
523 }
524 }
525 }
526
527 private void addToQuantitativData(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
528 QuantitativeData aggregatedQuantitativeData = resultHolder.quantitativeMap.get(qd.getFeature());
529 if(aggregatedQuantitativeData==null){
530 // no QuantitativeData with this feature in aggregation
531 aggregatedQuantitativeData = aggregateWithinQuantitativeData(qd);
532 }
533 else{
534 aggregatedQuantitativeData = addToExistingQuantitativeData(aggregatedQuantitativeData, qd);
535 }
536 if (aggregatedQuantitativeData != null){
537 resultHolder.quantitativeMap.put(qd.getFeature(), aggregatedQuantitativeData);
538 }
539 }
540
541 private void addToCategorical(CategoricalData cd, StructuredDescriptionResultHolder resultHolder) {
542 CategoricalData aggregatedCategoricalData = resultHolder.categoricalMap.get(cd.getFeature());
543 if(aggregatedCategoricalData==null){
544 // no CategoricalData with this feature in aggregation
545 aggregatedCategoricalData = cd.clone();
546 // set count to 1 if not set
547 aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
548 resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
549 }
550 else{
551 // split all StateData into those where the state already exists and those where it doesn't
552 List<State> statesOnly = aggregatedCategoricalData.getStatesOnly();
553 List<StateData> sdWithExistingStateInAggregation = cd.getStateData().stream().filter(sd->statesOnly.contains(sd.getState())).collect(Collectors.toList());
554 List<StateData> sdWithNoExistingStateInAggregation = cd.getStateData().stream().filter(sd->!statesOnly.contains(sd.getState())).collect(Collectors.toList());
555
556 for (StateData sd : sdWithNoExistingStateInAggregation) {
557 StateData clone = sd.clone();
558 // set count to 1 if not set
559 if(clone.getCount()==null){
560 clone.incrementCount();
561 }
562 aggregatedCategoricalData.addStateData(clone);
563 }
564
565 for (StateData sdExist : sdWithExistingStateInAggregation) {
566 List<StateData> aggregatedSameStateData = aggregatedCategoricalData.getStateData().stream()
567 .filter(sd->hasSameState(sdExist, sd))
568 .collect(Collectors.toList());
569 for (StateData stateData : aggregatedSameStateData) {
570 if(sdExist.getCount()==null){
571 stateData.incrementCount();
572 }
573 else{
574 stateData.setCount(stateData.getCount()+sdExist.getCount());
575 }
576 }
577 }
578 }
579 }
580
581 @Override
582 protected StructuredDescriptionResultHolder createResultHolder() {
583 return new StructuredDescriptionResultHolder();
584 }
585
586 private class StructuredDescriptionResultHolder extends ResultHolder{
587 private Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
588 private Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
589 private Set<IdentifiableSource> sources = new HashSet<>();
590 @Override
591 public String toString() {
592 return "SDResultHolder [categoricals=" + categoricalMap.size()
593 + ", quantitatives=" + quantitativeMap.size()
594 + ", sources=" + sources.size()
595 + ", descriptionsToDelete=" + this.descriptionsToDelete.size()
596 + "]";
597 }
598 }
599
600 private Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
601 Set<TaxonDescription> result = new HashSet<>();
602 List<TaxonNode> childNodes = taxonNode.getChildNodes();
603 for (TaxonNode childNode : childNodes) {
604 Set<TaxonDescription> childDescriptions = childNode.getTaxon().getDescriptions();
605 result.addAll(childDescriptions.stream()
606 .filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
607 .filter(desc->dataSet.getDescriptions().contains(desc))
608 .collect(Collectors.toSet()));
609 }
610 return result;
611 }
612
613 /**
614 * Computes all specimen attached to the given taxon within the given dataSet.
615 * For these secimen it returns all attache
616 * */
617 private Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
618 Set<SpecimenDescription> result = new HashSet<>();
619 //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
620 for (TaxonDescription taxonDesc: taxon.getDescriptions()){
621 for (DescriptionElementBase taxonDeb : taxonDesc.getElements()){
622 if (taxonDeb.isInstanceOf(IndividualsAssociation.class)){
623 IndividualsAssociation indAss = CdmBase.deproxy(taxonDeb, IndividualsAssociation.class);
624 SpecimenOrObservationBase<?> specimen = indAss.getAssociatedSpecimenOrObservation();
625 Set<SpecimenDescription> descriptions = specimen.getSpecimenDescriptions();
626 for(SpecimenDescription specimenDescription : descriptions){
627 if(dataSet.getDescriptions().contains(specimenDescription) &&
628 specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
629 result.add(specimenDescription);
630 }
631 }
632 }
633 }
634 }
635 return result;
636 }
637
638 private Set<TaxonDescription> getLiteratureDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
639 Set<TaxonDescription> result = new HashSet<>();
640 //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
641 for(TaxonDescription taxonDescription : taxon.getDescriptions()){
642 if(dataSet.getDescriptions().contains(taxonDescription)
643 && taxonDescription.getTypes().stream().anyMatch(type->type.equals(DescriptionType.SECONDARY_DATA))
644 && taxonDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE)) ){
645 result.add(taxonDescription);
646 }
647 }
648 return result;
649 }
650
651 /**
652 * Evaluates statistics for exact values collection and handles missing min and max values
653 */
654 private QuantitativeData aggregateWithinQuantitativeData(QuantitativeData sourceQd){
655 QuantitativeData aggQD = QuantitativeData.NewInstance(sourceQd.getFeature());
656 aggQD.setUnit(sourceQd.getUnit());
657 Set<BigDecimal> exactValues = sourceQd.getExactValues();
658 if(!exactValues.isEmpty()){
659 // qd is not already aggregated
660 Comparator<BigDecimal> comp = Comparator.naturalOrder();
661 int exactValueSampleSize = exactValues.size();
662 BigDecimal exactValueMin = exactValues.stream().min(comp).get();
663 BigDecimal exactValueMax = exactValues.stream().max(comp).get();
664 BigDecimal exactValueAvg = BigDecimalUtil.average(exactValues);
665 //TODO also check for typical boundary data
666 if(sourceQd.getMin() == null && sourceQd.getMax() == null){
667 aggQD.setSampleSize(new BigDecimal(exactValueSampleSize), null);
668 aggQD.setAverage(exactValueAvg, null);
669 }
670 aggQD.setMinimum(sourceQd.getMin() == null ? exactValueMin: sourceQd.getMin().min(exactValueMin), null);
671 aggQD.setMaximum(sourceQd.getMax() == null ? exactValueMax: sourceQd.getMax().max(exactValueMax), null);
672 }
673 else{
674 // qd has only min, max, ... but no exact values
675 aggQD = sourceQd.clone();
676 aggQD = handleMissingValues(aggQD);
677 }
678 return aggQD;
679 }
680
681 private QuantitativeData handleMissingValues(QuantitativeData qd) {
682 //min max
683 qd = handleMissingMinOrMax(qd);
684 //average
685 if (qd != null && qd.getAverage() == null){
686 BigDecimal n = qd.getSampleSize();
687 if(n != null && !n.equals(0f)){
688 BigDecimal average = (qd.getMax().add(qd.getMin())).divide(n);
689 qd.setAverage(average, null);
690 }
691 }
692 return qd;
693 }
694
695 private QuantitativeData handleMissingMinOrMax(QuantitativeData qd) {
696 return handleMissingMinOrMax(qd, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
697 }
698
699 public static QuantitativeData handleMissingMinOrMax(QuantitativeData aggQD, MissingMinimumMode missingMinMode,
700 MissingMaximumMode missingMaxMode) {
701 if(aggQD.getMin() == null && aggQD.getMax() != null){
702 if (missingMinMode == MissingMinimumMode.MinToZero) {
703 aggQD.setMinimum(BigDecimal.valueOf(0f), null);
704 }else if (missingMinMode == MissingMinimumMode.MinToMax){
705 aggQD.setMinimum(aggQD.getMax(), null);
706 }else if (missingMinMode == MissingMinimumMode.SkipRecord){
707 return null;
708 }
709 }
710 if(aggQD.getMax() == null && aggQD.getMin() != null){
711 if (missingMaxMode == MissingMaximumMode.MaxToMin){
712 aggQD.setMaximum(aggQD.getMin(), null);
713 }else if (missingMaxMode == MissingMaximumMode.SkipRecord){
714 return null;
715 }
716 }
717 return aggQD;
718 }
719
720 private QuantitativeData addToExistingQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
721
722 newQd = aggregateWithinQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a clear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
723
724 BigDecimal min = null;
725 BigDecimal max = null;
726 BigDecimal average = null;
727 BigDecimal sampleSize = null;
728 newQd = handleMissingValues(newQd);
729 if (newQd == null){
730 return aggQd;
731 }
732 min = aggQd.getMin().min(newQd.getMin());
733 max = aggQd.getMax().max(newQd.getMax());
734 if (newQd.getSampleSize() != null && aggQd.getSampleSize() != null){
735 sampleSize = newQd.getSampleSize().add(aggQd.getSampleSize());
736 }
737 if (sampleSize != null && !sampleSize.equals(0f) && aggQd.getAverage() != null && newQd.getAverage() != null){
738 BigDecimal aggTotalSum = aggQd.getAverage().multiply(aggQd.getSampleSize(), MathContext.DECIMAL32);
739 BigDecimal newTotalSum = newQd.getAverage().multiply(newQd.getSampleSize(), MathContext.DECIMAL32);
740 BigDecimal totalSum = aggTotalSum.add(newTotalSum);
741 average = totalSum.divide(sampleSize, MathContext.DECIMAL32).stripTrailingZeros(); //to be discussed if we really want to reduce precision here, however, due to the current way to compute average we do not have exact precision anyway
742 }
743 aggQd.setMinimum(min, null);
744 aggQd.setMaximum(max, null);
745 aggQd.setSampleSize(sampleSize, null);
746 aggQd.setAverage(average, null);
747 return aggQd;
748 }
749
750 private static List<BigDecimal> getExactValues(QuantitativeData qd) {
751 List<BigDecimal> exactValues = qd.getStatisticalValues().stream()
752 .filter(value->value.getType().equals(StatisticalMeasure.EXACT_VALUE()))
753 .map(exact->exact.getValue())
754 .collect(Collectors.toList());
755 return exactValues;
756 }
757
758 private static boolean hasSameState(StateData sd1, StateData sd2) {
759 if (sd2.getState() == null || sd1.getState() == null){
760 return false;
761 }else{
762 return sd2.getState().getUuid().equals(sd1.getState().getUuid());
763 }
764 }
765 }