cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/description/StructuredDescriptionAggregation.java

   1 /**
   2 * Copyright (C) 2019 EDIT
   3 * European Distributed Institute of Taxonomy
   4 * http://www.e-taxonomy.eu
   5 *
   6 * The contents of this file are subject to the Mozilla Public License Version 1.1
   7 * See LICENSE.TXT at the top of this package for the full license terms.
   8 */
   9 package eu.etaxonomy.cdm.api.service.description;
  10
  11 import java.math.BigDecimal;
  12 import java.math.MathContext;
  13 import java.util.ArrayList;
  14 import java.util.Comparator;
  15 import java.util.HashMap;
  16 import java.util.HashSet;
  17 import java.util.List;
  18 import java.util.Map;
  19 import java.util.Optional;
  20 import java.util.Set;
  21 import java.util.stream.Collectors;
  22
  23 import eu.etaxonomy.cdm.common.BigDecimalUtil;
  24 import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
  25 import eu.etaxonomy.cdm.model.common.CdmBase;
  26 import eu.etaxonomy.cdm.model.common.ICdmBase;
  27 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
  28 import eu.etaxonomy.cdm.model.description.CategoricalData;
  29 import eu.etaxonomy.cdm.model.description.DescriptionBase;
  30 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
  31 import eu.etaxonomy.cdm.model.description.DescriptionType;
  32 import eu.etaxonomy.cdm.model.description.DescriptiveDataSet;
  33 import eu.etaxonomy.cdm.model.description.Feature;
  34 import eu.etaxonomy.cdm.model.description.IDescribable;
  35 import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
  36 import eu.etaxonomy.cdm.model.description.QuantitativeData;
  37 import eu.etaxonomy.cdm.model.description.SpecimenDescription;
  38 import eu.etaxonomy.cdm.model.description.State;
  39 import eu.etaxonomy.cdm.model.description.StateData;
  40 import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
  41 import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
  42 import eu.etaxonomy.cdm.model.description.TaxonDescription;
  43 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
  44 import eu.etaxonomy.cdm.model.reference.ICdmTarget;
  45 import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
  46 import eu.etaxonomy.cdm.model.taxon.Taxon;
  47 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
  48
  49 /**
  50  * Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
  51  * <br>
  52  * For all {@link SpecimenDescription}s belonging to this data set a new
  53  * aggregated {@link TaxonDescription}s are created for every taxon the
  54  * specimens are directly associated with.<BR>
  55  * Also lower rank taxon descriptions are aggregated to upper rank taxa.
  56  *
  57  * @author a.mueller
  58  * @author p.plitzner
  59  * @since 03.11.2019
  60  */
  61 public class StructuredDescriptionAggregation
  62         extends DescriptionAggregationBase<StructuredDescriptionAggregation, StructuredDescriptionAggregationConfiguration>{
  63
  64     private DescriptiveDataSet dataSet;
  65
  66     @Override
  67     protected String pluralDataType(){
  68         return "structured descriptive data";
  69     }
  70
  71     @Override
  72     protected void preAggregate(IProgressMonitor monitor) {
  73         monitor.subTask("preAccumulate - nothing to do");
  74
  75         // take start time for performance testing
  76         double start = System.currentTimeMillis();
  77
  78         getResult().setCdmEntity(getDescriptiveDatasetService().load(getConfig().getDatasetUuid()));
  79
  80         double end1 = System.currentTimeMillis();
  81         logger.info("Time elapsed for pre-accumulate() : " + (end1 - start) / (1000) + "s");
  82     }
  83
  84     @Override
  85     protected void verifyConfiguration(IProgressMonitor monitor){
  86         if (!AggregationSourceMode.list(AggregationMode.ToParent, AggregationType.StructuredDescription)
  87             .contains(getConfig().getToParentSourceMode())){
  88             throw new AggregationException("Unsupported source mode for to-parent aggregation: " + getConfig().getToParentSourceMode());
  89         }
  90         if (!AggregationSourceMode.list(AggregationMode.WithinTaxon, AggregationType.StructuredDescription)
  91                 .contains(getConfig().getWithinTaxonSourceMode())){
  92                 throw new AggregationException("Unsupported source mode for within-taxon aggregation: " + getConfig().getWithinTaxonSourceMode());
  93         }
  94     }
  95
  96     private boolean hasCharacterData(DescriptionElementBase element) {
  97         return hasCategoricalData(element) || hasQuantitativeData(element);
  98     }
  99
 100     private boolean hasQuantitativeData(DescriptionElementBase element) {
 101         if(element instanceof QuantitativeData
 102                 && !((QuantitativeData) element).getStatisticalValues().isEmpty()){
 103             QuantitativeData quantitativeData = (QuantitativeData)element;
 104             return !getExactValues(quantitativeData).isEmpty()
 105                     || quantitativeData.getMin()!=null
 106                     || quantitativeData.getMax()!=null;
 107         }
 108         return false;
 109     }
 110
 111     private boolean hasCategoricalData(DescriptionElementBase element) {
 112         return element instanceof CategoricalData && !((CategoricalData) element).getStatesOnly().isEmpty();
 113     }
 114
 115     @Override
 116     protected void setDescriptionTitle(TaxonDescription description, Taxon taxon) {
 117         String title = taxon.getName() != null? taxon.getName().getTitleCache() : taxon.getTitleCache();
 118         description.setTitleCache("Aggregated description for " + title, true);
 119         return;
 120     }
 121
 122     @Override
 123     protected TaxonDescription createNewDescription(Taxon taxon) {
 124         String title = taxon.getTitleCache();
 125         if (logger.isDebugEnabled()){logger.debug("creating new description for " + title);}
 126         TaxonDescription description = TaxonDescription.NewInstance(taxon);
 127         description.addType(DescriptionType.AGGREGATED_STRUC_DESC);
 128         setDescriptionTitle(description, taxon);
 129         return description;
 130     }
 131
 132     @Override
 133     protected boolean hasDescriptionType(TaxonDescription description) {
 134         return dataSet.getDescriptions().contains(description) && description.isAggregatedStructuredDescription();
 135     }
 136
 137     @Override
 138     protected List<String> descriptionInitStrategy() {
 139         return new ArrayList<>();
 140     }
 141
 142     @Override
 143     protected void addAggregationResultToDescription(TaxonDescription targetDescription,
 144             ResultHolder resultHolder) {
 145
 146         StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
 147         mergeDescriptionElements(targetDescription, structuredResultHolder.categoricalMap, CategoricalData.class);
 148         mergeDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap, QuantitativeData.class);
 149         addAggregationSources(targetDescription, structuredResultHolder);
 150
 151         if(!targetDescription.getElements().isEmpty()){
 152             dataSet.addDescription(targetDescription);
 153         }else{
 154             dataSet.removeDescription(targetDescription);
 155         }
 156     }
 157
 158     private <T extends DescriptionBase<?>> void addAggregationSources(TaxonDescription targetDescription,
 159                 StructuredDescriptionResultHolder structuredResultHolder) {
 160
 161         //Remove sources from description
 162         Set<IdentifiableSource> sourcesToRemove = targetDescription.getSources().stream()
 163                 .filter(source->source.getType().equals(OriginalSourceType.Aggregation))
 164                 .collect(Collectors.toSet());
 165
 166         Set<IdentifiableSource> newSources = structuredResultHolder.sources;
 167         for (IdentifiableSource newSource : newSources) {
 168             IdentifiableSource mergeSourceCandidate = findSourceCandidate(targetDescription, newSource);
 169             if (mergeSourceCandidate == null){
 170                 addNewSource(targetDescription, newSource);
 171             }else{
 172                 mergeSource(mergeSourceCandidate, newSource);
 173                 sourcesToRemove.remove(mergeSourceCandidate);
 174             }
 175         }
 176
 177         //remove remaining sources-to-be-removed
 178         for (IdentifiableSource sourceToRemove : sourcesToRemove) {
 179             targetDescription.removeSource(sourceToRemove);
 180             ICdmTarget target = sourceToRemove.getCdmSource();
 181             if (target != null){
 182                 if (target.isInstanceOf(DescriptionBase.class)){
 183                     @SuppressWarnings("unchecked")
 184                     T descriptionToDelete = ((T)sourceToRemove.getCdmSource());
 185                     ((IDescribable<T>)descriptionToDelete.describedEntity()).removeDescription(descriptionToDelete);
 186                     structuredResultHolder.descriptionsToDelete.add(descriptionToDelete);
 187                 }else if (target.isInstanceOf(Taxon.class)){
 188                     //nothing to do for now
 189                 } else {
 190                     throw new AggregationException("CdmLink target type not yet supported: " + target.getClass().getSimpleName());
 191                 }
 192             }
 193         }
 194     }
 195
 196     private <T extends DescriptionBase<?>> void addNewSource(TaxonDescription targetDescription,
 197             IdentifiableSource newSource) {
 198
 199         //add source
 200         targetDescription.addSource(newSource);
 201         //if it is a description add it to the described entity (specimen, taxon)
 202         ICdmBase target = newSource.getCdmSource();
 203         if (target != null){
 204             if (target.isInstanceOf(DescriptionBase.class)){
 205                 @SuppressWarnings("unchecked")
 206                 T description = (T)CdmBase.deproxy(target);
 207                 ((IDescribable<T>)description.describedEntity()).addDescription(description);
 208             }
 209         }
 210     }
 211
 212     //mergeablity has been checked before
 213     private <T extends DescriptionBase<?>> void mergeSource(IdentifiableSource mergeCandidate, IdentifiableSource newSource) {
 214
 215         ICdmBase newTarget = newSource.getCdmSource();
 216         if (newTarget != null){
 217             newTarget = CdmBase.deproxy(newTarget);
 218             if (newTarget instanceof DescriptionBase){
 219                 @SuppressWarnings("unchecked")
 220                 T newTargetDesc = (T)newTarget;
 221                 @SuppressWarnings("unchecked")
 222                 T existingTargetDesc = CdmBase.deproxy((T)mergeCandidate.getCdmSource());
 223                 mergeSourceDescription(existingTargetDesc, newTargetDesc);
 224                 ((IDescribable<T>)existingTargetDesc.describedEntity()).addDescription(existingTargetDesc);
 225                 ((IDescribable<T>)newTargetDesc.describedEntity()).removeDescription(newTargetDesc);
 226             }else if (newTarget instanceof Taxon){
 227                 //nothing to do for now (we do not support reuse of sources linking to different taxa yet)
 228             }else{
 229                 throw new AggregationException("Sources not linking to a description or a taxon instance currently not yet supported.");
 230             }
 231         }else{
 232             throw new AggregationException("Sources not linking to another CdmBase instance currently not yet supported.");
 233         }
 234     }
 235
 236     private <T extends DescriptionBase<?>> void mergeSourceDescription(T existingSourceDescription, T newSourceDescription) {
 237
 238         Set<DescriptionElementBase> elementsToRemove = new HashSet<>(existingSourceDescription.getElements());
 239         Set<DescriptionElementBase> newElements = new HashSet<>(newSourceDescription.getElements());
 240         for (DescriptionElementBase newElement : newElements){
 241             DescriptionElementBase newElementClone = newElement.clone();
 242             Optional<DescriptionElementBase> matchingElement = elementsToRemove.stream()
 243                     .filter(e->e.getFeature()!= null
 244                         && e.getFeature().equals(newElementClone.getFeature()))
 245                     .findFirst();
 246             if (matchingElement.isPresent()){
 247                 mergeDescriptionElement(matchingElement.get(), newElementClone);
 248                 elementsToRemove.remove(matchingElement.get());
 249             }else{
 250                 existingSourceDescription.addElement(newElementClone);
 251             }
 252         }
 253         addSourceDescriptionToDescribedEntity(newSourceDescription);
 254         existingSourceDescription.setTitleCache(newSourceDescription.getTitleCache(), true);
 255
 256         for (DescriptionElementBase debToRemove : elementsToRemove){
 257             existingSourceDescription.removeElement(debToRemove);
 258         }
 259
 260     }
 261
 262     @SuppressWarnings("unchecked")
 263     private <T extends DescriptionBase<?>> void addSourceDescriptionToDescribedEntity(T sourceDescription) {
 264         ((IDescribable<T>)sourceDescription.describedEntity()).addDescription(sourceDescription);
 265     }
 266     @SuppressWarnings("unchecked")
 267     private <T extends DescriptionBase<?>> void removeSourceDescriptionFromDescribedEntity(T sourceDescription) {
 268         ((IDescribable<T>)sourceDescription.describedEntity()).removeDescription(sourceDescription);
 269     }
 270
 271     private IdentifiableSource findSourceCandidate(TaxonDescription targetDescription, IdentifiableSource newSource) {
 272         for (IdentifiableSource existingSource : targetDescription.getSources()){
 273             boolean isCandidate = isCandidateForSourceReuse(existingSource, newSource);
 274             if (isCandidate){
 275                 return existingSource;
 276             }
 277         }
 278         return null;
 279     }
 280
 281     private boolean isCandidateForSourceReuse(IdentifiableSource existingSource, IdentifiableSource newSource) {
 282         if (newSource.getCdmSource()!= null){
 283             if (existingSource.getCdmSource() == null){
 284                 return false;
 285             }else {
 286                 ICdmBase newTarget = CdmBase.deproxy(newSource.getCdmSource());
 287                 ICdmBase existingTarget = CdmBase.deproxy((CdmBase)existingSource.getCdmSource());
 288                 if (!newTarget.getClass().equals(existingTarget.getClass())){
 289                     return false;
 290                 }else{
 291                     if (newTarget instanceof SpecimenDescription){
 292                         SpecimenOrObservationBase<?> newSob = ((SpecimenDescription)newTarget).getDescribedSpecimenOrObservation();
 293                         SpecimenOrObservationBase<?> existingSob = ((SpecimenDescription)existingTarget).getDescribedSpecimenOrObservation();
 294                         //for now reuse is possible if both are descriptions for the same specimen
 295                         return newSob != null && newSob.equals(existingSob);
 296                     }else if (newTarget instanceof TaxonDescription){
 297                         Taxon newTaxon = ((TaxonDescription)newTarget).getTaxon();
 298                         Taxon existingTaxon = ((TaxonDescription)existingTarget).getTaxon();
 299                         //for now reuse is possible if both are descriptions for the same taxon
 300                         return newTaxon != null && newTaxon.equals(existingTaxon);
 301                     }else if (newTarget instanceof Taxon){
 302                         return newTarget.equals(existingTarget);
 303                     }else{
 304                         throw new AggregationException("Other classes then SpecimenDescription and TaxonDescription are not yet supported. But was: " + newTarget.getClass());
 305                     }
 306                 }
 307             }
 308         }
 309
 310         return false;
 311     }
 312
 313     private <T extends DescriptionBase<?>> T cloneNewSourceDescription(T newSourceDescription) {
 314         @SuppressWarnings("unchecked")
 315         T clonedDescription = (T)newSourceDescription.clone();
 316         clonedDescription.removeDescriptiveDataSet(dataSet);
 317         clonedDescription.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
 318         clonedDescription.setTitleCache("Clone: " + clonedDescription.getTitleCache(), true);
 319         return clonedDescription;
 320     }
 321
 322     private <S extends DescriptionElementBase> void mergeDescriptionElements(TaxonDescription targetDescription,
 323             Map<Feature, ? extends DescriptionElementBase> newElementsMap, Class<? extends DescriptionElementBase> debClass) {
 324
 325         Set<DescriptionElementBase> elementsToRemove = new HashSet<>(
 326                 targetDescription.getElements().stream()
 327                     .filter(el->el.isInstanceOf(debClass))
 328                     .collect(Collectors.toSet()));
 329
 330         //for each character in "characters of new elements"
 331         for (Feature characterNew : newElementsMap.keySet()) {
 332
 333             //if elements for this character exist in old data, remember any of them to keep (in clean data there should be only max. 1
 334             DescriptionElementBase elementToStay = null;
 335             for (DescriptionElementBase existingDeb : elementsToRemove) {
 336                 if(existingDeb.getFeature().equals(characterNew)){
 337                     elementToStay = existingDeb;
 338                     elementsToRemove.remove(existingDeb);
 339                     break;
 340                 }
 341             }
 342
 343             //if there is no element for this character in old data, add the new element for this character to the target description (otherwise reuse old element)
 344             if (elementToStay == null){
 345                 targetDescription.addElement(newElementsMap.get(characterNew));
 346             }else{
 347                 mergeDescriptionElement(elementToStay, newElementsMap.get(characterNew));
 348             }
 349         }
 350
 351         //remove all elements not needed anymore
 352         for(DescriptionElementBase elementToRemove : elementsToRemove){
 353             targetDescription.removeElement(elementToRemove);
 354         }
 355     }
 356
 357     private void mergeDescriptionElement(DescriptionElementBase targetElement,
 358             DescriptionElementBase newElement) {
 359
 360         targetElement = CdmBase.deproxy(targetElement);
 361         newElement = CdmBase.deproxy(newElement);
 362         if (targetElement instanceof CategoricalData){
 363             mergeDescriptionElement((CategoricalData)targetElement, (CategoricalData)newElement);
 364         }else if (targetElement.isInstanceOf(QuantitativeData.class)){
 365             mergeDescriptionElement((QuantitativeData)targetElement, (QuantitativeData)newElement);
 366         }else{
 367             throw new AggregationException("Class not supported: " + targetElement.getClass().getName());
 368         }
 369     }
 370
 371     private void mergeDescriptionElement(CategoricalData elementToStay,
 372             CategoricalData newElement) {
 373         List<StateData> oldData = new ArrayList<>(elementToStay.getStateData());
 374         List<StateData> newData = new ArrayList<>(newElement.getStateData());
 375         for (StateData newStateData : newData){
 376             State state = newStateData.getState();
 377             StateData oldStateData = firstByState(state, oldData);
 378             if (oldStateData != null){
 379                 //for now only state and count is used for aggregation, below code needs to be adapted if this changes
 380                 oldStateData.setCount(newStateData.getCount());
 381                 oldData.remove(oldStateData);
 382             }else{
 383                 elementToStay.addStateData(newStateData);
 384             }
 385         }
 386         for (StateData stateDataToRemove : oldData){
 387             elementToStay.removeStateData(stateDataToRemove);
 388         }
 389     }
 390
 391     private StateData firstByState(State state, List<StateData> oldData) {
 392         if (state == null){
 393             return null;
 394         }
 395         for (StateData sd : oldData){
 396             if (state.equals(sd.getState())){
 397                 return sd;
 398             }
 399         }
 400         return null;
 401     }
 402
 403     private void mergeDescriptionElement(QuantitativeData elementToStay,
 404             QuantitativeData newElement) {
 405         Set<StatisticalMeasurementValue> oldValues = new HashSet<>(elementToStay.getStatisticalValues());
 406         Set<StatisticalMeasurementValue> newValues = new HashSet<>(newElement.getStatisticalValues());
 407         for (StatisticalMeasurementValue newValue : newValues){
 408             StatisticalMeasure type = newValue.getType();
 409             StatisticalMeasurementValue oldValue = firstValueByType(type, oldValues);
 410             if (oldValue != null){
 411                 //for now only state and count is used for aggregation, below code needs to be adapted if this changes
 412                 oldValue.setValue(newValue.getValue());
 413                 oldValues.remove(oldValue);
 414             }else{
 415                 elementToStay.addStatisticalValue(newValue);
 416             }
 417         }
 418         for (StatisticalMeasurementValue valueToRemove : oldValues){
 419             elementToStay.removeStatisticalValue(valueToRemove);
 420         }
 421     }
 422
 423     private StatisticalMeasurementValue firstValueByType(StatisticalMeasure type, Set<StatisticalMeasurementValue> oldValues) {
 424         if (type == null){
 425             return null;
 426         }
 427         for (StatisticalMeasurementValue value : oldValues){
 428             if (type.equals(value.getType())){
 429                 return value;
 430             }
 431         }
 432         return null;
 433     }
 434
 435     @Override
 436     protected void initTransaction() {
 437         dataSet = getDescriptiveDatasetService().load(getConfig().getDatasetUuid());
 438     }
 439
 440     @Override
 441     protected void removeDescriptionIfEmpty(TaxonDescription description, ResultHolder resultHolder) {
 442         super.removeDescriptionIfEmpty(description, resultHolder);
 443         if (description.getElements().isEmpty()){
 444             dataSet.removeDescription(description);
 445         }
 446     }
 447
 448     @Override
 449     protected void aggregateToParentTaxon(TaxonNode taxonNode,
 450             ResultHolder resultHolder,
 451             Set<TaxonDescription> excludedDescriptions) {
 452         StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
 453         Set<TaxonDescription> childDescriptions = getChildTaxonDescriptions(taxonNode, dataSet);
 454         addDescriptionToResultHolder(descriptiveResultHolder, childDescriptions, AggregationMode.ToParent);
 455     }
 456
 457     @Override
 458     protected void aggregateWithinSingleTaxon(Taxon taxon,
 459             ResultHolder resultHolder,
 460             Set<TaxonDescription> excludedDescriptions) {
 461         StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
 462         Set<SpecimenDescription> specimenDescriptions = getSpecimenDescriptions(taxon, dataSet);
 463         addDescriptionToResultHolder(descriptiveResultHolder, specimenDescriptions, AggregationMode.WithinTaxon);
 464         if (getConfig().isIncludeLiterature()){
 465             Set<TaxonDescription> literatureDescriptions = getLiteratureDescriptions(taxon, dataSet);
 466             addDescriptionToResultHolder(descriptiveResultHolder, literatureDescriptions, AggregationMode.WithinTaxon);
 467         }
 468         //TODO add default descriptions
 469         //xxx
 470
 471     }
 472
 473     private void addDescriptionToResultHolder(StructuredDescriptionResultHolder descriptiveResultHolder,
 474             Set<? extends DescriptionBase<?>> specimenLiteraturOrDefaultDescriptions,
 475             AggregationMode aggregationMode) {
 476
 477         boolean descriptionWasUsed = false;
 478         for (DescriptionBase<?> desc: specimenLiteraturOrDefaultDescriptions){
 479             for (DescriptionElementBase deb: desc.getElements()){
 480                 if (hasCharacterData(deb)){
 481                     if (deb.isInstanceOf(CategoricalData.class)){
 482                         addToCategorical(CdmBase.deproxy(deb, CategoricalData.class), descriptiveResultHolder);
 483                         descriptionWasUsed = true;
 484                     }else if (deb.isInstanceOf(QuantitativeData.class)){
 485                         addToQuantitativData(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
 486                         descriptionWasUsed = true;
 487                     }
 488                 }
 489             }
 490
 491             //sources
 492             AggregationSourceMode sourceMode = getConfig().getSourceMode(aggregationMode);
 493             if(descriptionWasUsed && sourceMode != AggregationSourceMode.NONE){
 494                 IdentifiableSource source = IdentifiableSource.NewAggregationSourceInstance();
 495                 desc = CdmBase.deproxy(desc);
 496
 497                 switch (sourceMode){
 498                     case DESCRIPTION:
 499                         DescriptionBase<?> clonedDesc = cloneNewSourceDescription(desc);
 500                         source.setCdmSource(clonedDesc);
 501                         break;
 502                     case TAXON:
 503                         if (desc instanceof TaxonDescription){
 504                             Taxon taxon = ((TaxonDescription) desc).getTaxon();
 505                             source.setCdmSource(taxon);
 506                         }else {
 507                             throw new AggregationException("Description type not yet supported for aggregation source mode TAXON: " + desc.getClass().getSimpleName() );
 508                         }
 509                         break;
 510                     case NONE:
 511                         source = null;
 512                         break;
 513                     case ALL: //not yet supported
 514                         throw new AggregationException("Source mode not yet supported: " + sourceMode);
 515                     case ALL_SAMEVALUE: //makes no sense
 516                         throw new AggregationException("Illegal source mode: " + sourceMode);
 517                     default:
 518                         throw new AggregationException("Source mode not supported: " + sourceMode);
 519                 }
 520                 if (source != null){
 521                     descriptiveResultHolder.sources.add(source);
 522                 }
 523             }
 524         }
 525     }
 526
 527     private void addToQuantitativData(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
 528         QuantitativeData aggregatedQuantitativeData = resultHolder.quantitativeMap.get(qd.getFeature());
 529         if(aggregatedQuantitativeData==null){
 530             // no QuantitativeData with this feature in aggregation
 531             aggregatedQuantitativeData = aggregateWithinQuantitativeData(qd);
 532         }
 533         else{
 534             aggregatedQuantitativeData = addToExistingQuantitativeData(aggregatedQuantitativeData, qd);
 535         }
 536         if (aggregatedQuantitativeData != null){
 537             resultHolder.quantitativeMap.put(qd.getFeature(), aggregatedQuantitativeData);
 538         }
 539     }
 540
 541     private void addToCategorical(CategoricalData cd, StructuredDescriptionResultHolder resultHolder) {
 542         CategoricalData aggregatedCategoricalData = resultHolder.categoricalMap.get(cd.getFeature());
 543         if(aggregatedCategoricalData==null){
 544             // no CategoricalData with this feature in aggregation
 545             aggregatedCategoricalData = cd.clone();
 546             // set count to 1 if not set
 547             aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
 548             resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
 549         }
 550         else{
 551             // split all StateData into those where the state already exists and those where it doesn't
 552             List<State> statesOnly = aggregatedCategoricalData.getStatesOnly();
 553             List<StateData> sdWithExistingStateInAggregation = cd.getStateData().stream().filter(sd->statesOnly.contains(sd.getState())).collect(Collectors.toList());
 554             List<StateData> sdWithNoExistingStateInAggregation = cd.getStateData().stream().filter(sd->!statesOnly.contains(sd.getState())).collect(Collectors.toList());
 555
 556             for (StateData sd : sdWithNoExistingStateInAggregation) {
 557                 StateData clone = sd.clone();
 558                 // set count to 1 if not set
 559                 if(clone.getCount()==null){
 560                     clone.incrementCount();
 561                 }
 562                 aggregatedCategoricalData.addStateData(clone);
 563             }
 564
 565             for (StateData sdExist : sdWithExistingStateInAggregation) {
 566                 List<StateData> aggregatedSameStateData = aggregatedCategoricalData.getStateData().stream()
 567                         .filter(sd->hasSameState(sdExist, sd))
 568                         .collect(Collectors.toList());
 569                 for (StateData stateData : aggregatedSameStateData) {
 570                     if(sdExist.getCount()==null){
 571                         stateData.incrementCount();
 572                     }
 573                     else{
 574                         stateData.setCount(stateData.getCount()+sdExist.getCount());
 575                     }
 576                 }
 577             }
 578         }
 579     }
 580
 581     @Override
 582     protected StructuredDescriptionResultHolder createResultHolder() {
 583         return new StructuredDescriptionResultHolder();
 584     }
 585
 586     private class StructuredDescriptionResultHolder extends ResultHolder{
 587         private Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
 588         private Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
 589         private Set<IdentifiableSource> sources = new HashSet<>();
 590         @Override
 591         public String toString() {
 592             return "SDResultHolder [categoricals=" + categoricalMap.size()
 593                 + ", quantitatives=" + quantitativeMap.size()
 594                 + ", sources=" + sources.size()
 595                 + ", descriptionsToDelete=" + this.descriptionsToDelete.size()
 596                 + "]";
 597         }
 598     }
 599
 600     private Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
 601         Set<TaxonDescription> result = new HashSet<>();
 602         List<TaxonNode> childNodes = taxonNode.getChildNodes();
 603         for (TaxonNode childNode : childNodes) {
 604             Set<TaxonDescription> childDescriptions = childNode.getTaxon().getDescriptions();
 605             result.addAll(childDescriptions.stream()
 606                 .filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
 607                 .filter(desc->dataSet.getDescriptions().contains(desc))
 608                 .collect(Collectors.toSet()));
 609         }
 610         return result;
 611     }
 612
 613     /**
 614      * Computes all specimen attached to the given taxon within the given dataSet.
 615      * For these secimen it returns all attache
 616      * */
 617     private Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
 618         Set<SpecimenDescription> result = new HashSet<>();
 619         //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
 620         for (TaxonDescription taxonDesc: taxon.getDescriptions()){
 621             for (DescriptionElementBase taxonDeb : taxonDesc.getElements()){
 622                 if (taxonDeb.isInstanceOf(IndividualsAssociation.class)){
 623                     IndividualsAssociation indAss = CdmBase.deproxy(taxonDeb, IndividualsAssociation.class);
 624                     SpecimenOrObservationBase<?> specimen = indAss.getAssociatedSpecimenOrObservation();
 625                     Set<SpecimenDescription> descriptions = specimen.getSpecimenDescriptions();
 626                     for(SpecimenDescription specimenDescription : descriptions){
 627                         if(dataSet.getDescriptions().contains(specimenDescription) &&
 628                                 specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
 629                             result.add(specimenDescription);
 630                         }
 631                     }
 632                 }
 633             }
 634         }
 635         return result;
 636     }
 637
 638     private Set<TaxonDescription> getLiteratureDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
 639         Set<TaxonDescription> result = new HashSet<>();
 640         //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
 641         for(TaxonDescription taxonDescription : taxon.getDescriptions()){
 642             if(dataSet.getDescriptions().contains(taxonDescription)
 643                     && taxonDescription.getTypes().stream().anyMatch(type->type.equals(DescriptionType.SECONDARY_DATA))
 644                     && taxonDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE)) ){
 645                 result.add(taxonDescription);
 646             }
 647         }
 648         return result;
 649     }
 650
 651     /**
 652      * Evaluates statistics for exact values collection and handles missing min and max values
 653      */
 654     private QuantitativeData aggregateWithinQuantitativeData(QuantitativeData sourceQd){
 655         QuantitativeData aggQD = QuantitativeData.NewInstance(sourceQd.getFeature());
 656         aggQD.setUnit(sourceQd.getUnit());
 657         Set<BigDecimal> exactValues = sourceQd.getExactValues();
 658         if(!exactValues.isEmpty()){
 659             // qd is not already aggregated
 660             Comparator<BigDecimal> comp = Comparator.naturalOrder();
 661             int exactValueSampleSize = exactValues.size();
 662             BigDecimal exactValueMin = exactValues.stream().min(comp).get();
 663             BigDecimal exactValueMax = exactValues.stream().max(comp).get();
 664             BigDecimal exactValueAvg = BigDecimalUtil.average(exactValues);
 665             //TODO also check for typical boundary data
 666             if(sourceQd.getMin() == null && sourceQd.getMax() == null){
 667                 aggQD.setSampleSize(new BigDecimal(exactValueSampleSize), null);
 668                 aggQD.setAverage(exactValueAvg, null);
 669             }
 670             aggQD.setMinimum(sourceQd.getMin() == null ? exactValueMin: sourceQd.getMin().min(exactValueMin), null);
 671             aggQD.setMaximum(sourceQd.getMax() == null ? exactValueMax: sourceQd.getMax().max(exactValueMax), null);
 672         }
 673         else{
 674             // qd has only min, max, ... but no exact values
 675             aggQD = sourceQd.clone();
 676             aggQD = handleMissingValues(aggQD);
 677         }
 678         return aggQD;
 679     }
 680
 681     private QuantitativeData handleMissingValues(QuantitativeData qd) {
 682         //min max
 683         qd = handleMissingMinOrMax(qd);
 684         //average
 685         if (qd != null && qd.getAverage() == null){
 686             BigDecimal n = qd.getSampleSize();
 687             if(n != null && !n.equals(0f)){
 688                 BigDecimal average = (qd.getMax().add(qd.getMin())).divide(n);
 689                 qd.setAverage(average, null);
 690             }
 691         }
 692         return qd;
 693     }
 694
 695     private QuantitativeData handleMissingMinOrMax(QuantitativeData qd) {
 696         return handleMissingMinOrMax(qd, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
 697     }
 698
 699     public static QuantitativeData handleMissingMinOrMax(QuantitativeData aggQD, MissingMinimumMode missingMinMode,
 700             MissingMaximumMode missingMaxMode) {
 701         if(aggQD.getMin() == null && aggQD.getMax() != null){
 702             if (missingMinMode == MissingMinimumMode.MinToZero) {
 703                 aggQD.setMinimum(BigDecimal.valueOf(0f), null);
 704             }else if (missingMinMode == MissingMinimumMode.MinToMax){
 705                 aggQD.setMinimum(aggQD.getMax(), null);
 706             }else if (missingMinMode == MissingMinimumMode.SkipRecord){
 707                 return null;
 708             }
 709         }
 710         if(aggQD.getMax() == null && aggQD.getMin() != null){
 711             if (missingMaxMode == MissingMaximumMode.MaxToMin){
 712                 aggQD.setMaximum(aggQD.getMin(), null);
 713             }else if (missingMaxMode == MissingMaximumMode.SkipRecord){
 714                 return null;
 715             }
 716         }
 717         return aggQD;
 718     }
 719
 720     private QuantitativeData addToExistingQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
 721
 722         newQd = aggregateWithinQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a clear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
 723
 724         BigDecimal min = null;
 725         BigDecimal max = null;
 726         BigDecimal average = null;
 727         BigDecimal sampleSize = null;
 728         newQd = handleMissingValues(newQd);
 729         if (newQd == null){
 730             return aggQd;
 731         }
 732         min = aggQd.getMin().min(newQd.getMin());
 733         max = aggQd.getMax().max(newQd.getMax());
 734         if (newQd.getSampleSize() != null && aggQd.getSampleSize() != null){
 735             sampleSize = newQd.getSampleSize().add(aggQd.getSampleSize());
 736         }
 737         if (sampleSize != null && !sampleSize.equals(0f) && aggQd.getAverage() != null && newQd.getAverage() != null){
 738             BigDecimal aggTotalSum = aggQd.getAverage().multiply(aggQd.getSampleSize(), MathContext.DECIMAL32);
 739             BigDecimal newTotalSum = newQd.getAverage().multiply(newQd.getSampleSize(), MathContext.DECIMAL32);
 740             BigDecimal totalSum = aggTotalSum.add(newTotalSum);
 741             average = totalSum.divide(sampleSize, MathContext.DECIMAL32).stripTrailingZeros();  //to be discussed if we really want to reduce precision here, however, due to the current way to compute average we do not have exact precision anyway
 742         }
 743         aggQd.setMinimum(min, null);
 744         aggQd.setMaximum(max, null);
 745         aggQd.setSampleSize(sampleSize, null);
 746         aggQd.setAverage(average, null);
 747         return aggQd;
 748     }
 749
 750     private static List<BigDecimal> getExactValues(QuantitativeData qd) {
 751         List<BigDecimal> exactValues = qd.getStatisticalValues().stream()
 752                 .filter(value->value.getType().equals(StatisticalMeasure.EXACT_VALUE()))
 753                 .map(exact->exact.getValue())
 754                 .collect(Collectors.toList());
 755         return exactValues;
 756     }
 757
 758     private static boolean hasSameState(StateData sd1, StateData sd2) {
 759         if (sd2.getState() == null || sd1.getState() == null){
 760             return false;
 761         }else{
 762             return sd2.getState().getUuid().equals(sd1.getState().getUuid());
 763         }
 764     }
 765 }