Merge branch 'release/5.45.0'

[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / description / StructuredDescriptionAggregation.java
diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/description/StructuredDescriptionAggregation.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/description/StructuredDescriptionAggregation.java

index 04b8e160ed0a382bf9a6a6630e909efee3455678..f8137092b4ac05afe5dd0452b2109a873c5a3d61 100644 (file)
--- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/description/StructuredDescriptionAggregation.java
+++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/description/StructuredDescriptionAggregation.java
@@ -9,18 +9,25 @@
  package eu.etaxonomy.cdm.api.service.description;
  
  import java.math.BigDecimal;
+import java.math.MathContext;
  import java.util.ArrayList;
  import java.util.Comparator;
  import java.util.HashMap;
  import java.util.HashSet;
  import java.util.List;
  import java.util.Map;
-import java.util.Map.Entry;
+import java.util.Optional;
  import java.util.Set;
  import java.util.stream.Collectors;
  
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
  import eu.etaxonomy.cdm.common.BigDecimalUtil;
+import eu.etaxonomy.cdm.common.CdmUtils;
+import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
  import eu.etaxonomy.cdm.model.common.CdmBase;
+import eu.etaxonomy.cdm.model.common.ICdmBase;
  import eu.etaxonomy.cdm.model.common.IdentifiableSource;
  import eu.etaxonomy.cdm.model.description.CategoricalData;
  import eu.etaxonomy.cdm.model.description.DescriptionBase;
@@ -28,17 +35,19 @@ import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
  import eu.etaxonomy.cdm.model.description.DescriptionType;
  import eu.etaxonomy.cdm.model.description.DescriptiveDataSet;
  import eu.etaxonomy.cdm.model.description.Feature;
+import eu.etaxonomy.cdm.model.description.IDescribable;
  import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
  import eu.etaxonomy.cdm.model.description.QuantitativeData;
  import eu.etaxonomy.cdm.model.description.SpecimenDescription;
-import eu.etaxonomy.cdm.model.description.State;
  import eu.etaxonomy.cdm.model.description.StateData;
  import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
+import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
  import eu.etaxonomy.cdm.model.description.TaxonDescription;
  import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
  import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
  import eu.etaxonomy.cdm.model.taxon.Taxon;
  import eu.etaxonomy.cdm.model.taxon.TaxonNode;
+import eu.etaxonomy.cdm.model.term.DefinedTermBase;
  
  /**
   * Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
@@ -55,6 +64,8 @@ import eu.etaxonomy.cdm.model.taxon.TaxonNode;
  public class StructuredDescriptionAggregation
          extends DescriptionAggregationBase<StructuredDescriptionAggregation, StructuredDescriptionAggregationConfiguration>{
  
+    private static final Logger logger = LogManager.getLogger();
+
      private DescriptiveDataSet dataSet;
  
      @Override
@@ -63,8 +74,8 @@ public class StructuredDescriptionAggregation
      }
  
      @Override
-    protected void preAggregate() {
-        subTask("preAccumulate - nothing to do");
+    protected void preAggregate(IProgressMonitor monitor) {
+        monitor.subTask("preAccumulate - nothing to do");
  
          // take start time for performance testing
          double start = System.currentTimeMillis();
@@ -75,6 +86,17 @@ public class StructuredDescriptionAggregation
          logger.info("Time elapsed for pre-accumulate() : " + (end1 - start) / (1000) + "s");
      }
  
+    @Override
+    protected void verifyConfiguration(IProgressMonitor monitor){
+        if (!AggregationSourceMode.list(AggregationMode.ToParent, AggregationType.StructuredDescription)
+            .contains(getConfig().getToParentSourceMode())){
+            throw new AggregationException("Unsupported source mode for to-parent aggregation: " + getConfig().getToParentSourceMode());
+        }
+        if (!AggregationSourceMode.list(AggregationMode.WithinTaxon, AggregationType.StructuredDescription)
+                .contains(getConfig().getWithinTaxonSourceMode())){
+                throw new AggregationException("Unsupported source mode for within-taxon aggregation: " + getConfig().getWithinTaxonSourceMode());
+        }
+    }
  
      private boolean hasCharacterData(DescriptionElementBase element) {
          return hasCategoricalData(element) || hasQuantitativeData(element);
@@ -105,7 +127,7 @@ public class StructuredDescriptionAggregation
      @Override
      protected TaxonDescription createNewDescription(Taxon taxon) {
          String title = taxon.getTitleCache();
-        logger.debug("creating new description for " + title);
+        if (logger.isDebugEnabled()){logger.debug("creating new description for " + title);}
          TaxonDescription description = TaxonDescription.NewInstance(taxon);
          description.addType(DescriptionType.AGGREGATED_STRUC_DESC);
          setDescriptionTitle(description, taxon);
@@ -123,73 +145,311 @@ public class StructuredDescriptionAggregation
      }
  
      @Override
-    protected void addAggregationResultToDescription(TaxonDescription targetDescription,
+    protected boolean mergeAggregationResultIntoTargetDescription(TaxonDescription targetDescription,
              ResultHolder resultHolder) {
-        StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
  
-        replaceExistingDescriptionElements(targetDescription, structuredResultHolder.categoricalMap);
-        replaceExistingDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap);
-        addAggregationSources(targetDescription, structuredResultHolder);
+        StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
+        boolean updated = mergeDescriptionElements(targetDescription, structuredResultHolder.categoricalMap, CategoricalData.class);
+        updated |= mergeDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap, QuantitativeData.class);
+        updated |= mergeDescriptionSources(targetDescription, structuredResultHolder);
  
          if(!targetDescription.getElements().isEmpty()){
              dataSet.addDescription(targetDescription);
+        }else{
+            dataSet.removeDescription(targetDescription);
          }
+        return updated;
+    }
+
+    @Override
+    protected boolean isRelevantDescriptionElement(DescriptionElementBase deb){
+        return deb.isInstanceOf(CategoricalData.class) || deb.isInstanceOf(QuantitativeData.class);
      }
  
-    private void addAggregationSources(TaxonDescription targetDescription,
-            StructuredDescriptionResultHolder structuredResultHolder) {
-        //FIXME Re-use sources if possible
+    private <T extends DescriptionBase<?>> boolean mergeDescriptionSources(TaxonDescription targetDescription,
+                StructuredDescriptionResultHolder structuredResultHolder) {
+
+        boolean updated = false;
          //Remove sources from description
          Set<IdentifiableSource> sourcesToRemove = targetDescription.getSources().stream()
                  .filter(source->source.getType().equals(OriginalSourceType.Aggregation))
                  .collect(Collectors.toSet());
  
-        for (IdentifiableSource source : sourcesToRemove) {
-            targetDescription.removeSource(source);
+        Set<IdentifiableSource> newSources = structuredResultHolder.sources;
+        for (IdentifiableSource newSource : newSources) {
+            IdentifiableSource mergeSourceCandidate = findSourceCandidate(targetDescription, newSource);
+            if (mergeSourceCandidate == null){
+                addNewSource(targetDescription, newSource);
+                updated = true;
+            }else{
+                updated |= mergeSource(mergeSourceCandidate, newSource);
+                sourcesToRemove.remove(mergeSourceCandidate);
+            }
          }
  
-        Set<DescriptionBase<?>> sourceDescriptions = structuredResultHolder.sourceDescriptions;
-        for (DescriptionBase<?> descriptionBase : sourceDescriptions) {
-            DescriptionBase<?> sourceDescription = null;
-            if(descriptionBase.isInstanceOf(SpecimenDescription.class)){
-                DescriptionBase<?> clone = descriptionBase.clone();
-                clone.removeDescriptiveDataSet(dataSet);
-                clone.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
-                SpecimenOrObservationBase<?> specimen = CdmBase.deproxy(descriptionBase, SpecimenDescription.class).getDescribedSpecimenOrObservation();
-                specimen.addDescription(CdmBase.deproxy(clone, SpecimenDescription.class));
-                sourceDescription=clone;
+        //remove remaining sources-to-be-removed
+        for (IdentifiableSource sourceToRemove : sourcesToRemove) {
+            targetDescription.removeSource(sourceToRemove);
+            updated |= sourceToRemove.isPersisted();
+            ICdmBase target = CdmBase.deproxy(sourceToRemove.getCdmSource());
+            if (target != null){
+                sourceToRemove.setCdmSource(null); //workaround for missing orphan removal #9801
+                if (target instanceof DescriptionBase){
+                    @SuppressWarnings("unchecked")
+                    T descriptionToDelete = (T)target;
+                    if (descriptionToDelete.isCloneForSource()){
+                        //TODO maybe this is not really needed as it is later done anyway with .deltedDescription
+                        //but currently this still leads to a re-saved by cascade exception
+                        ((IDescribable<T>)descriptionToDelete.describedEntity()).removeDescription(descriptionToDelete);
+                        structuredResultHolder.descriptionsToDelete.add(descriptionToDelete);
+                    }
+                } else if (target.isInstanceOf(Taxon.class)){
+                    //nothing to do for now
+                } else {
+                    throw new AggregationException("CdmLink target type not yet supported: " + target.getClass().getSimpleName());
+                }
              }
-            else if(descriptionBase.isInstanceOf(TaxonDescription.class)){
-                Taxon taxon = CdmBase.deproxy(descriptionBase, TaxonDescription.class).getTaxon();
-                taxon.addDescription(CdmBase.deproxy(descriptionBase, TaxonDescription.class));
-                sourceDescription=descriptionBase;
+        }
+        return updated;
+    }
+
+    private <T extends DescriptionBase<?>> void addNewSource(TaxonDescription targetDescription,
+            IdentifiableSource newSource) {
+
+        //add source
+        targetDescription.addSource(newSource);
+        //if it is a description add it to the described entity (specimen, taxon)
+        ICdmBase target = newSource.getCdmSource();
+        if (target != null){
+            if (target.isInstanceOf(DescriptionBase.class)){
+                @SuppressWarnings("unchecked")
+                T description = (T)CdmBase.deproxy(target);
+                ((IDescribable<T>)description.describedEntity()).addDescription(description);
              }
-            if(sourceDescription!=null){
-                targetDescription.addAggregationSource(sourceDescription);
+        }
+    }
+
+    //mergeablity has been checked before
+    private <T extends DescriptionBase<?>> boolean mergeSource(IdentifiableSource mergeCandidate, IdentifiableSource newSource) {
+
+        boolean updated = false;
+        ICdmBase newTarget = newSource.getCdmSource();
+        if (newTarget != null){
+            newTarget = CdmBase.deproxy(newTarget);
+            if (newTarget instanceof DescriptionBase){
+                @SuppressWarnings("unchecked")
+                T newTargetDesc = (T)newTarget;
+                @SuppressWarnings("unchecked")
+                T existingTargetDesc = CdmBase.deproxy((T)mergeCandidate.getCdmSource());
+                updated |= mergeSourceDescription(existingTargetDesc, newTargetDesc);
+                ((IDescribable<T>)existingTargetDesc.describedEntity()).addDescription(existingTargetDesc);
+                if (!existingTargetDesc.equals(newTargetDesc)){
+                    ((IDescribable<T>)newTargetDesc.describedEntity()).removeDescription(newTargetDesc);
+                }
+            }else if (newTarget instanceof Taxon){
+                //nothing to do for now (we do not support reuse of sources linking to different taxa yet)
+            }else{
+                throw new AggregationException("Sources not linking to a description or a taxon instance currently not yet supported.");
              }
+        }else{
+            throw new AggregationException("Sources not linking to another CdmBase instance currently not yet supported.");
+        }
+        return updated;
+    }
+
+    private <T extends DescriptionBase<?>> boolean mergeSourceDescription(T existingSourceDescription, T newSourceDescription) {
+
+        boolean updated = false;
+        Set<DescriptionElementBase> elementsToRemove = new HashSet<>(existingSourceDescription.getElements());
+        Set<DescriptionElementBase> newElements = new HashSet<>(newSourceDescription.getElements());
+
+        for (DescriptionElementBase newElement : newElements){
+            DescriptionElementBase newElementClone = newElement.clone();
+            Optional<DescriptionElementBase> matchingElement = elementsToRemove.stream()
+                    .filter(e->e.getFeature()!= null
+                        && e.getFeature().equals(newElementClone.getFeature()))
+                    .findFirst();
+            if (matchingElement.isPresent()){
+                updated |= mergeDescriptionElement(matchingElement.get(), newElementClone);
+                elementsToRemove.remove(matchingElement.get());
+            }else{
+                existingSourceDescription.addElement(newElementClone);
+                updated = true;
+            }
+        }
+        updated |= addSourceDescriptionToDescribedEntity(newSourceDescription);
+        existingSourceDescription.setTitleCache(newSourceDescription.getTitleCache(), true);
+
+        for (DescriptionElementBase debToRemove : elementsToRemove){
+            existingSourceDescription.removeElement(debToRemove);
+            updated |= debToRemove.isPersisted();
+        }
+        return updated;
+    }
+
+    @SuppressWarnings("unchecked")
+    private <T extends DescriptionBase<?>> boolean addSourceDescriptionToDescribedEntity(T sourceDescription) {
+        boolean updated = false;
+        IDescribable<T> describedEntity = ((IDescribable<T>)sourceDescription.describedEntity());
+        if (describedEntity.getDescriptions().contains(sourceDescription)){
+            describedEntity.addDescription(sourceDescription);
+            updated = true;
+        }
+        return updated;
+    }
+
+    private IdentifiableSource findSourceCandidate(TaxonDescription targetDescription, IdentifiableSource newSource) {
+        for (IdentifiableSource existingSource : targetDescription.getSources()){
+            boolean isCandidate = isCandidateForSourceReuse(existingSource, newSource);
+            if (isCandidate){
+                return existingSource;
+            }
+        }
+        return null;
+    }
+
+    private boolean isCandidateForSourceReuse(IdentifiableSource existingSource, IdentifiableSource newSource) {
+        if (newSource.getCdmSource()!= null){
+            if (existingSource.getCdmSource() == null){
+                return false;
+            }else {
+                ICdmBase newTarget = CdmBase.deproxy(newSource.getCdmSource());
+                ICdmBase existingTarget = CdmBase.deproxy((CdmBase)existingSource.getCdmSource());
+                if (!newTarget.getClass().equals(existingTarget.getClass())){
+                    return false;
+                }else{
+                    if (newTarget instanceof SpecimenDescription){
+                        SpecimenOrObservationBase<?> newSob = ((SpecimenDescription)newTarget).getDescribedSpecimenOrObservation();
+                        SpecimenOrObservationBase<?> existingSob = ((SpecimenDescription)existingTarget).getDescribedSpecimenOrObservation();
+                        //for now reuse is possible if both are descriptions for the same specimen
+                        return newSob != null && newSob.equals(existingSob);
+                    }else if (newTarget instanceof TaxonDescription){
+                        Taxon newTaxon = ((TaxonDescription)newTarget).getTaxon();
+                        Taxon existingTaxon = ((TaxonDescription)existingTarget).getTaxon();
+                        //for now reuse is possible if both are descriptions for the same taxon
+                        return newTaxon != null && newTaxon.equals(existingTaxon);
+                    }else if (newTarget instanceof Taxon){
+                        return newTarget.equals(existingTarget);
+                    }else{
+                        throw new AggregationException("Other classes then SpecimenDescription and TaxonDescription are not yet supported. But was: " + newTarget.getClass());
+                    }
+                }
+            }
+        }
+
+        return false;
+    }
+
+    private <T extends DescriptionBase<?>> T cloneNewSourceDescription(T newSourceDescription) {
+        if (!getConfig().isCloneAggregatedSourceDescriptions() && newSourceDescription.isAggregatedStructuredDescription()){
+            return newSourceDescription;
+        }
+        @SuppressWarnings("unchecked")
+        T clonedDescription = (T)newSourceDescription.clone();
+//        clonedDescription.removeSources();
+        clonedDescription.removeDescriptiveDataSet(dataSet);
+        clonedDescription.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
+        clonedDescription.setTitleCache("Clone: " + clonedDescription.getTitleCache(), true);
+        return clonedDescription;
+    }
+
+    @Override
+    protected <S extends DescriptionElementBase> boolean mergeDescriptionElement(S targetElement,
+            S newElement) {
+
+        boolean updated = false;
+        targetElement = CdmBase.deproxy(targetElement);
+        newElement = CdmBase.deproxy(newElement);
+        if (targetElement instanceof CategoricalData){
+            updated |= mergeDescriptionElement((CategoricalData)targetElement, (CategoricalData)newElement);
+        }else if (targetElement.isInstanceOf(QuantitativeData.class)){
+            updated |= mergeDescriptionElement((QuantitativeData)targetElement, (QuantitativeData)newElement);
+        }else{
+            throw new AggregationException("Class not supported: " + targetElement.getClass().getName());
          }
+        return updated;
      }
  
-    private void replaceExistingDescriptionElements(TaxonDescription targetDescription,
-            Map<Feature, ? extends DescriptionElementBase> elementMap) {
-        for (Entry<Feature, ? extends DescriptionElementBase> entry : elementMap.entrySet()) {
-            DescriptionElementBase elementToRemove = null;
-            DescriptionElementBase elementReplacement = null;
-            for (DescriptionElementBase descriptionElementBase : targetDescription.getElements()) {
-                if(descriptionElementBase.getFeature().equals(entry.getKey())){
-                    elementToRemove = descriptionElementBase;
-                    elementReplacement = entry.getValue();
-                    break;
+    private boolean mergeDescriptionElement(CategoricalData elementToStay,
+            CategoricalData newElement) {
+
+        boolean updated = false;
+        List<StateData> dataToRemove = new ArrayList<>(elementToStay.getStateData());
+        List<StateData> newData = new ArrayList<>(newElement.getStateData());
+        for (StateData newStateData : newData){
+            DefinedTermBase<?> state = newStateData.getState();
+            StateData oldStateData = firstByState(state, dataToRemove);
+            if (oldStateData != null){
+                //for now only state and count is used for aggregation, below code needs to be adapted if this changes
+                if (!CdmUtils.nullSafeEqual(oldStateData.getCount(), newStateData.getCount())){
+                    oldStateData.setCount(newStateData.getCount());
+//                  getResult().addUpdatedUuid(oldStateData);
+                    updated = true;
                  }
+                dataToRemove.remove(oldStateData);
+            }else{
+                elementToStay.addStateData(newStateData);
+                updated = true;
              }
-            if(elementToRemove!=null && elementReplacement!=null){
-                targetDescription.removeElement(elementToRemove);
-                targetDescription.addElement(elementReplacement);
+        }
+        for (StateData stateDataToRemove : dataToRemove){
+            elementToStay.removeStateData(stateDataToRemove);
+            updated |= stateDataToRemove.isPersisted();
+        }
+        return updated;
+    }
+
+    private StateData firstByState(DefinedTermBase<?> state, List<StateData> oldData) {
+        if (state == null){
+            return null;
+        }
+        for (StateData sd : oldData){
+            if (state.equals(sd.getState())){
+                return sd;
              }
-            else{
-                targetDescription.addElement(entry.getValue());
+        }
+        return null;
+    }
+
+    private boolean mergeDescriptionElement(QuantitativeData elementToStay,
+            QuantitativeData newElement) {
+
+        boolean updated = false;
+
+        Set<StatisticalMeasurementValue> oldValues = new HashSet<>(elementToStay.getStatisticalValues());
+        Set<StatisticalMeasurementValue> newValues = new HashSet<>(newElement.getStatisticalValues());
+        for (StatisticalMeasurementValue newValue : newValues){
+            StatisticalMeasure type = newValue.getType();
+            StatisticalMeasurementValue oldValue = firstValueByType(type, oldValues);
+            if (oldValue != null){
+                //for now only state and count is used for aggregation, below code needs to be adapted if this changes
+                if (!CdmUtils.nullSafeEqual(oldValue.getValue(), newValue.getValue())){
+                    oldValue.setValue(newValue.getValue());
+                    updated = true;
+                }
+                oldValues.remove(oldValue);
+            }else{
+                elementToStay.addStatisticalValue(newValue);
+                updated = true;
              }
          }
+        for (StatisticalMeasurementValue valueToRemove : oldValues){
+            elementToStay.removeStatisticalValue(valueToRemove);
+            updated |= valueToRemove.isPersisted();
+        }
+        return updated;
+    }
+
+    private StatisticalMeasurementValue firstValueByType(StatisticalMeasure type, Set<StatisticalMeasurementValue> oldValues) {
+        if (type == null){
+            return null;
+        }
+        for (StatisticalMeasurementValue value : oldValues){
+            if (type.equals(value.getType())){
+                return value;
+            }
+        }
+        return null;
      }
  
      @Override
@@ -198,8 +458,8 @@ public class StructuredDescriptionAggregation
      }
  
      @Override
-    protected void removeDescriptionIfEmpty(TaxonDescription description) {
-        super.removeDescriptionIfEmpty(description);
+    protected void removeDescriptionIfEmpty(TaxonDescription description, ResultHolder resultHolder) {
+        super.removeDescriptionIfEmpty(description, resultHolder);
          if (description.getElements().isEmpty()){
              dataSet.removeDescription(description);
          }
@@ -210,7 +470,8 @@ public class StructuredDescriptionAggregation
              ResultHolder resultHolder,
              Set<TaxonDescription> excludedDescriptions) {
          StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
-        addDescriptionElement(descriptiveResultHolder, getChildTaxonDescriptions(taxonNode, dataSet));
+        Set<TaxonDescription> childDescriptions = getChildTaxonDescriptions(taxonNode, dataSet);
+        addDescriptionToResultHolder(descriptiveResultHolder, childDescriptions, AggregationMode.ToParent);
      }
  
      @Override
@@ -218,38 +479,85 @@ public class StructuredDescriptionAggregation
              ResultHolder resultHolder,
              Set<TaxonDescription> excludedDescriptions) {
          StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
-        addDescriptionElement(descriptiveResultHolder, getSpecimenDescriptions(taxon, dataSet));
+
+        //specimen descriptions
+        Set<SpecimenDescription> specimenDescriptions = getSpecimenDescriptions(taxon, dataSet);
+        addDescriptionToResultHolder(descriptiveResultHolder, specimenDescriptions, AggregationMode.WithinTaxon);
+
+        //"literature" descriptions
+        if (getConfig().isIncludeLiterature()){
+            Set<TaxonDescription> literatureDescriptions = getLiteratureDescriptions(taxon, dataSet);
+            addDescriptionToResultHolder(descriptiveResultHolder, literatureDescriptions, AggregationMode.WithinTaxon);
+        }
+
+        //"default" descriptions
+        //TODO add default descriptions
+        //xxx
+
      }
  
-    private void addDescriptionElement(StructuredDescriptionResultHolder descriptiveResultHolder,
-            Set<? extends DescriptionBase<?>> descriptions) {
+    private void addDescriptionToResultHolder(StructuredDescriptionResultHolder descriptiveResultHolder,
+            Set<? extends DescriptionBase<?>> specimenLiteraturOrDefaultDescriptions,
+            AggregationMode aggregationMode) {
+
          boolean descriptionWasUsed = false;
-        for (DescriptionBase<?> desc:descriptions){
+        for (DescriptionBase<?> desc: specimenLiteraturOrDefaultDescriptions){
              for (DescriptionElementBase deb: desc.getElements()){
                  if (hasCharacterData(deb)){
                      if (deb.isInstanceOf(CategoricalData.class)){
                          addToCategorical(CdmBase.deproxy(deb, CategoricalData.class), descriptiveResultHolder);
                          descriptionWasUsed = true;
                      }else if (deb.isInstanceOf(QuantitativeData.class)){
-                        addToQuantitative(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
+                        addToQuantitativData(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
                          descriptionWasUsed = true;
                      }
                  }
              }
-            if(descriptionWasUsed){
-                descriptiveResultHolder.sourceDescriptions.add(desc);
+
+            //sources
+            AggregationSourceMode sourceMode = getConfig().getSourceMode(aggregationMode);
+            if(descriptionWasUsed && sourceMode != AggregationSourceMode.NONE){
+                IdentifiableSource source = IdentifiableSource.NewAggregationSourceInstance();
+                desc = CdmBase.deproxy(desc);
+
+                switch (sourceMode){
+                    case DESCRIPTION:
+                        DescriptionBase<?> clonedDesc = cloneNewSourceDescription(desc);
+                        source.setCdmSource(clonedDesc);
+                        break;
+                    case TAXON:
+                        if (desc instanceof TaxonDescription){
+                            Taxon taxon = ((TaxonDescription) desc).getTaxon();
+                            source.setCdmSource(taxon);
+                        }else {
+                            throw new AggregationException("Description type not yet supported for aggregation source mode TAXON: " + desc.getClass().getSimpleName() );
+                        }
+                        break;
+                    case NONE:
+                        source = null;
+                        break;
+                    case ALL: //not yet supported
+                        throw new AggregationException("Source mode not yet supported: " + sourceMode);
+                    case ALL_SAMEVALUE: //makes no sense
+                        throw new AggregationException("Illegal source mode: " + sourceMode);
+                    default:
+                        throw new AggregationException("Source mode not supported: " + sourceMode);
+                }
+                if (source != null){
+                    descriptiveResultHolder.sources.add(source);
+                }
              }
          }
      }
  
-    private void addToQuantitative(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
+    private void addToQuantitativData(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
          QuantitativeData aggregatedQuantitativeData = resultHolder.quantitativeMap.get(qd.getFeature());
          if(aggregatedQuantitativeData==null){
              // no QuantitativeData with this feature in aggregation
-            aggregatedQuantitativeData = aggregateSingleQuantitativeData(qd);
+            aggregatedQuantitativeData = aggregateWithinQuantitativeData(qd);
          }
          else{
-            aggregatedQuantitativeData = mergeQuantitativeData(aggregatedQuantitativeData, qd);
+            aggregatedQuantitativeData = addToExistingQuantitativeData(aggregatedQuantitativeData, qd);
          }
          if (aggregatedQuantitativeData != null){
              resultHolder.quantitativeMap.put(qd.getFeature(), aggregatedQuantitativeData);
@@ -258,16 +566,18 @@ public class StructuredDescriptionAggregation
  
      private void addToCategorical(CategoricalData cd, StructuredDescriptionResultHolder resultHolder) {
          CategoricalData aggregatedCategoricalData = resultHolder.categoricalMap.get(cd.getFeature());
-        if(aggregatedCategoricalData==null){
+        if(aggregatedCategoricalData == null){
              // no CategoricalData with this feature in aggregation
              aggregatedCategoricalData = cd.clone();
              // set count to 1 if not set
-            aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
-            resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
+            if (!aggregatedCategoricalData.getStatesOnly().isEmpty()){
+                aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
+                resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
+            }
          }
          else{
              // split all StateData into those where the state already exists and those where it doesn't
-            List<State> statesOnly = aggregatedCategoricalData.getStatesOnly();
+            List<DefinedTermBase<?>> statesOnly = aggregatedCategoricalData.getStatesOnly();
              List<StateData> sdWithExistingStateInAggregation = cd.getStateData().stream().filter(sd->statesOnly.contains(sd.getState())).collect(Collectors.toList());
              List<StateData> sdWithNoExistingStateInAggregation = cd.getStateData().stream().filter(sd->!statesOnly.contains(sd.getState())).collect(Collectors.toList());
  
@@ -282,8 +592,8 @@ public class StructuredDescriptionAggregation
  
              for (StateData sdExist : sdWithExistingStateInAggregation) {
                  List<StateData> aggregatedSameStateData = aggregatedCategoricalData.getStateData().stream()
-                .filter(sd->hasSameState(sdExist, sd))
-                .collect(Collectors.toList());
+                        .filter(sd->hasSameState(sdExist, sd))
+                        .collect(Collectors.toList());
                  for (StateData stateData : aggregatedSameStateData) {
                      if(sdExist.getCount()==null){
                          stateData.incrementCount();
@@ -301,52 +611,81 @@ public class StructuredDescriptionAggregation
          return new StructuredDescriptionResultHolder();
      }
  
-    private class StructuredDescriptionResultHolder implements ResultHolder{
-        Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
-        Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
-        Set<DescriptionBase<?>> sourceDescriptions = new HashSet<>();
+    private class StructuredDescriptionResultHolder extends ResultHolder{
+        private Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
+        private Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
+        private Set<IdentifiableSource> sources = new HashSet<>();
+        @Override
+        public String toString() {
+            return "SDResultHolder [categoricals=" + categoricalMap.size()
+                + ", quantitatives=" + quantitativeMap.size()
+                + ", sources=" + sources.size()
+                + ", descriptionsToDelete=" + this.descriptionsToDelete.size()
+                + "]";
+        }
      }
  
-    /*
-     * Static utility methods
-     */
-    private static Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
+    private Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
          Set<TaxonDescription> result = new HashSet<>();
          List<TaxonNode> childNodes = taxonNode.getChildNodes();
          for (TaxonNode childNode : childNodes) {
-            result.addAll(childNode.getTaxon().getDescriptions().stream()
-            .filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
-            .filter(desc->dataSet.getDescriptions().contains(desc))
-            .collect(Collectors.toSet()));
+            Set<TaxonDescription> childDescriptions = childNode.getTaxon().getDescriptions();
+            result.addAll(childDescriptions.stream()
+                .filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
+                .filter(desc->dataSet.getDescriptions().contains(desc))
+                .collect(Collectors.toSet()));
          }
          return result;
      }
  
-    private static Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
+    /**
+     * Computes all specimens attached to the given taxon within the given dataSet.
+     * For these secimens it returns all attache
+     * */
+    private Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
          Set<SpecimenDescription> result = new HashSet<>();
+        //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
          for (TaxonDescription taxonDesc: taxon.getDescriptions()){
              for (DescriptionElementBase taxonDeb : taxonDesc.getElements()){
                  if (taxonDeb.isInstanceOf(IndividualsAssociation.class)){
                      IndividualsAssociation indAss = CdmBase.deproxy(taxonDeb, IndividualsAssociation.class);
                      SpecimenOrObservationBase<?> specimen = indAss.getAssociatedSpecimenOrObservation();
-                     Set<SpecimenDescription> descriptions = (Set)specimen.getDescriptions();
-                     for(SpecimenDescription specimenDescription : descriptions){
-                         if(dataSet.getDescriptions().contains(specimenDescription) && specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
-                             result.add(specimenDescription);
-                         }
-                     }
+                    Set<SpecimenDescription> descriptions = specimen.getSpecimenDescriptions();
+                    for(SpecimenDescription specimenDescription : descriptions){
+                        if(dataSet.getDescriptions().contains(specimenDescription) &&
+                                specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
+                            result.add(specimenDescription);
+                        }
+                    }
                  }
              }
          }
          return result;
      }
  
-    private QuantitativeData aggregateSingleQuantitativeData(QuantitativeData sourceQd){
+    private Set<TaxonDescription> getLiteratureDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
+        Set<TaxonDescription> result = new HashSet<>();
+        //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
+        for(TaxonDescription taxonDescription : taxon.getDescriptions()){
+            if(dataSet.getDescriptions().contains(taxonDescription)
+                    && taxonDescription.getTypes().stream().anyMatch(type->type.equals(DescriptionType.SECONDARY_DATA))
+                    && taxonDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE)) ){
+                result.add(taxonDescription);
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Evaluates statistics for exact values collection and handles missing min and max values
+     */
+    private QuantitativeData aggregateWithinQuantitativeData(QuantitativeData sourceQd){
          QuantitativeData aggQD = QuantitativeData.NewInstance(sourceQd.getFeature());
+        aggQD.setUnit(sourceQd.getUnit());
          Set<BigDecimal> exactValues = sourceQd.getExactValues();
          if(!exactValues.isEmpty()){
-            Comparator<BigDecimal> comp = Comparator.naturalOrder();
              // qd is not already aggregated
+            Comparator<BigDecimal> comp = Comparator.naturalOrder();
              int exactValueSampleSize = exactValues.size();
              BigDecimal exactValueMin = exactValues.stream().min(comp).get();
              BigDecimal exactValueMax = exactValues.stream().max(comp).get();
@@ -368,11 +707,14 @@ public class StructuredDescriptionAggregation
      }
  
      private QuantitativeData handleMissingValues(QuantitativeData qd) {
+        //min max
          qd = handleMissingMinOrMax(qd);
+        //average
          if (qd != null && qd.getAverage() == null){
              BigDecimal n = qd.getSampleSize();
              if(n != null && !n.equals(0f)){
-                qd.setAverage((qd.getMax().add(qd.getMin())).divide(n), null);
+                BigDecimal average = (qd.getMax().add(qd.getMin())).divide(n);
+                qd.setAverage(average, null);
              }
          }
          return qd;
@@ -382,7 +724,6 @@ public class StructuredDescriptionAggregation
          return handleMissingMinOrMax(qd, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
      }
  
-
      public static QuantitativeData handleMissingMinOrMax(QuantitativeData aggQD, MissingMinimumMode missingMinMode,
              MissingMaximumMode missingMaxMode) {
          if(aggQD.getMin() == null && aggQD.getMax() != null){
@@ -404,9 +745,9 @@ public class StructuredDescriptionAggregation
          return aggQD;
      }
  
-    private QuantitativeData mergeQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
+    private QuantitativeData addToExistingQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
  
-        newQd = aggregateSingleQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a cleear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
+        newQd = aggregateWithinQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a clear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
  
          BigDecimal min = null;
          BigDecimal max = null;
@@ -422,8 +763,10 @@ public class StructuredDescriptionAggregation
              sampleSize = newQd.getSampleSize().add(aggQd.getSampleSize());
          }
          if (sampleSize != null && !sampleSize.equals(0f) && aggQd.getAverage() != null && newQd.getAverage() != null){
-            BigDecimal totalSum = aggQd.getAverage().multiply(aggQd.getSampleSize()).add(newQd.getAverage().multiply(newQd.getSampleSize()));
-            average = totalSum.divide(sampleSize);
+            BigDecimal aggTotalSum = aggQd.getAverage().multiply(aggQd.getSampleSize(), MathContext.DECIMAL32);
+            BigDecimal newTotalSum = newQd.getAverage().multiply(newQd.getSampleSize(), MathContext.DECIMAL32);
+            BigDecimal totalSum = aggTotalSum.add(newTotalSum);
+            average = totalSum.divide(sampleSize, MathContext.DECIMAL32).stripTrailingZeros();  //to be discussed if we really want to reduce precision here, however, due to the current way to compute average we do not have exact precision anyway
          }
          aggQd.setMinimum(min, null);
          aggQd.setMaximum(max, null);
@@ -441,6 +784,10 @@ public class StructuredDescriptionAggregation
      }
  
      private static boolean hasSameState(StateData sd1, StateData sd2) {
-        return sd2.getState().getUuid().equals(sd1.getState().getUuid());
+        if (sd2.getState() == null || sd1.getState() == null){
+            return false;
+        }else{
+            return sd2.getState().getUuid().equals(sd1.getState().getUuid());
+        }
      }
  }