package eu.etaxonomy.cdm.api.service.description;
import java.math.BigDecimal;
+import java.math.MathContext;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Map.Entry;
+import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
import eu.etaxonomy.cdm.common.BigDecimalUtil;
+import eu.etaxonomy.cdm.common.CdmUtils;
+import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
import eu.etaxonomy.cdm.model.common.CdmBase;
+import eu.etaxonomy.cdm.model.common.ICdmBase;
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
import eu.etaxonomy.cdm.model.description.CategoricalData;
import eu.etaxonomy.cdm.model.description.DescriptionBase;
import eu.etaxonomy.cdm.model.description.DescriptionType;
import eu.etaxonomy.cdm.model.description.DescriptiveDataSet;
import eu.etaxonomy.cdm.model.description.Feature;
+import eu.etaxonomy.cdm.model.description.IDescribable;
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
import eu.etaxonomy.cdm.model.description.QuantitativeData;
import eu.etaxonomy.cdm.model.description.SpecimenDescription;
-import eu.etaxonomy.cdm.model.description.State;
import eu.etaxonomy.cdm.model.description.StateData;
import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
+import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
import eu.etaxonomy.cdm.model.description.TaxonDescription;
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
import eu.etaxonomy.cdm.model.taxon.Taxon;
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
+import eu.etaxonomy.cdm.model.term.DefinedTermBase;
/**
* Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
public class StructuredDescriptionAggregation
extends DescriptionAggregationBase<StructuredDescriptionAggregation, StructuredDescriptionAggregationConfiguration>{
+ private static final Logger logger = LogManager.getLogger();
+
private DescriptiveDataSet dataSet;
@Override
}
@Override
- protected void preAggregate() {
- subTask("preAccumulate - nothing to do");
+ protected void preAggregate(IProgressMonitor monitor) {
+ monitor.subTask("preAccumulate - nothing to do");
// take start time for performance testing
double start = System.currentTimeMillis();
logger.info("Time elapsed for pre-accumulate() : " + (end1 - start) / (1000) + "s");
}
+ @Override
+ protected void verifyConfiguration(IProgressMonitor monitor){
+ if (!AggregationSourceMode.list(AggregationMode.ToParent, AggregationType.StructuredDescription)
+ .contains(getConfig().getToParentSourceMode())){
+ throw new AggregationException("Unsupported source mode for to-parent aggregation: " + getConfig().getToParentSourceMode());
+ }
+ if (!AggregationSourceMode.list(AggregationMode.WithinTaxon, AggregationType.StructuredDescription)
+ .contains(getConfig().getWithinTaxonSourceMode())){
+ throw new AggregationException("Unsupported source mode for within-taxon aggregation: " + getConfig().getWithinTaxonSourceMode());
+ }
+ }
private boolean hasCharacterData(DescriptionElementBase element) {
return hasCategoricalData(element) || hasQuantitativeData(element);
@Override
protected TaxonDescription createNewDescription(Taxon taxon) {
String title = taxon.getTitleCache();
- logger.debug("creating new description for " + title);
+ if (logger.isDebugEnabled()){logger.debug("creating new description for " + title);}
TaxonDescription description = TaxonDescription.NewInstance(taxon);
description.addType(DescriptionType.AGGREGATED_STRUC_DESC);
setDescriptionTitle(description, taxon);
}
@Override
- protected void addAggregationResultToDescription(TaxonDescription targetDescription,
+ protected boolean mergeAggregationResultIntoTargetDescription(TaxonDescription targetDescription,
ResultHolder resultHolder) {
- StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
- replaceExistingDescriptionElements(targetDescription, structuredResultHolder.categoricalMap);
- replaceExistingDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap);
- addAggregationSources(targetDescription, structuredResultHolder);
+ StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
+ boolean updated = mergeDescriptionElements(targetDescription, structuredResultHolder.categoricalMap, CategoricalData.class);
+ updated |= mergeDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap, QuantitativeData.class);
+ updated |= mergeDescriptionSources(targetDescription, structuredResultHolder);
if(!targetDescription.getElements().isEmpty()){
dataSet.addDescription(targetDescription);
+ }else{
+ dataSet.removeDescription(targetDescription);
}
+ return updated;
+ }
+
+ @Override
+ protected boolean isRelevantDescriptionElement(DescriptionElementBase deb){
+ return deb.isInstanceOf(CategoricalData.class) || deb.isInstanceOf(QuantitativeData.class);
}
- private void addAggregationSources(TaxonDescription targetDescription,
- StructuredDescriptionResultHolder structuredResultHolder) {
- //FIXME Re-use sources if possible
+ private <T extends DescriptionBase<?>> boolean mergeDescriptionSources(TaxonDescription targetDescription,
+ StructuredDescriptionResultHolder structuredResultHolder) {
+
+ boolean updated = false;
//Remove sources from description
Set<IdentifiableSource> sourcesToRemove = targetDescription.getSources().stream()
.filter(source->source.getType().equals(OriginalSourceType.Aggregation))
.collect(Collectors.toSet());
- for (IdentifiableSource source : sourcesToRemove) {
- targetDescription.removeSource(source);
+ Set<IdentifiableSource> newSources = structuredResultHolder.sources;
+ for (IdentifiableSource newSource : newSources) {
+ IdentifiableSource mergeSourceCandidate = findSourceCandidate(targetDescription, newSource);
+ if (mergeSourceCandidate == null){
+ addNewSource(targetDescription, newSource);
+ updated = true;
+ }else{
+ updated |= mergeSource(mergeSourceCandidate, newSource);
+ sourcesToRemove.remove(mergeSourceCandidate);
+ }
}
- Set<DescriptionBase<?>> sourceDescriptions = structuredResultHolder.sourceDescriptions;
- for (DescriptionBase<?> descriptionBase : sourceDescriptions) {
- DescriptionBase<?> sourceDescription = null;
- if(descriptionBase.isInstanceOf(SpecimenDescription.class)){
- DescriptionBase<?> clone = descriptionBase.clone();
- clone.removeDescriptiveDataSet(dataSet);
- clone.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
- SpecimenOrObservationBase<?> specimen = CdmBase.deproxy(descriptionBase, SpecimenDescription.class).getDescribedSpecimenOrObservation();
- specimen.addDescription(CdmBase.deproxy(clone, SpecimenDescription.class));
- sourceDescription=clone;
+ //remove remaining sources-to-be-removed
+ for (IdentifiableSource sourceToRemove : sourcesToRemove) {
+ targetDescription.removeSource(sourceToRemove);
+ updated |= sourceToRemove.isPersisted();
+ ICdmBase target = CdmBase.deproxy(sourceToRemove.getCdmSource());
+ if (target != null){
+ sourceToRemove.setCdmSource(null); //workaround for missing orphan removal #9801
+ if (target instanceof DescriptionBase){
+ @SuppressWarnings("unchecked")
+ T descriptionToDelete = (T)target;
+ if (descriptionToDelete.isCloneForSource()){
+ //TODO maybe this is not really needed as it is later done anyway with .deltedDescription
+ //but currently this still leads to a re-saved by cascade exception
+ ((IDescribable<T>)descriptionToDelete.describedEntity()).removeDescription(descriptionToDelete);
+ structuredResultHolder.descriptionsToDelete.add(descriptionToDelete);
+ }
+ } else if (target.isInstanceOf(Taxon.class)){
+ //nothing to do for now
+ } else {
+ throw new AggregationException("CdmLink target type not yet supported: " + target.getClass().getSimpleName());
+ }
}
- else if(descriptionBase.isInstanceOf(TaxonDescription.class)){
- Taxon taxon = CdmBase.deproxy(descriptionBase, TaxonDescription.class).getTaxon();
- taxon.addDescription(CdmBase.deproxy(descriptionBase, TaxonDescription.class));
- sourceDescription=descriptionBase;
+ }
+ return updated;
+ }
+
+ private <T extends DescriptionBase<?>> void addNewSource(TaxonDescription targetDescription,
+ IdentifiableSource newSource) {
+
+ //add source
+ targetDescription.addSource(newSource);
+ //if it is a description add it to the described entity (specimen, taxon)
+ ICdmBase target = newSource.getCdmSource();
+ if (target != null){
+ if (target.isInstanceOf(DescriptionBase.class)){
+ @SuppressWarnings("unchecked")
+ T description = (T)CdmBase.deproxy(target);
+ ((IDescribable<T>)description.describedEntity()).addDescription(description);
}
- if(sourceDescription!=null){
- targetDescription.addAggregationSource(sourceDescription);
+ }
+ }
+
+ //mergeablity has been checked before
+ private <T extends DescriptionBase<?>> boolean mergeSource(IdentifiableSource mergeCandidate, IdentifiableSource newSource) {
+
+ boolean updated = false;
+ ICdmBase newTarget = newSource.getCdmSource();
+ if (newTarget != null){
+ newTarget = CdmBase.deproxy(newTarget);
+ if (newTarget instanceof DescriptionBase){
+ @SuppressWarnings("unchecked")
+ T newTargetDesc = (T)newTarget;
+ @SuppressWarnings("unchecked")
+ T existingTargetDesc = CdmBase.deproxy((T)mergeCandidate.getCdmSource());
+ updated |= mergeSourceDescription(existingTargetDesc, newTargetDesc);
+ ((IDescribable<T>)existingTargetDesc.describedEntity()).addDescription(existingTargetDesc);
+ if (!existingTargetDesc.equals(newTargetDesc)){
+ ((IDescribable<T>)newTargetDesc.describedEntity()).removeDescription(newTargetDesc);
+ }
+ }else if (newTarget instanceof Taxon){
+ //nothing to do for now (we do not support reuse of sources linking to different taxa yet)
+ }else{
+ throw new AggregationException("Sources not linking to a description or a taxon instance currently not yet supported.");
}
+ }else{
+ throw new AggregationException("Sources not linking to another CdmBase instance currently not yet supported.");
+ }
+ return updated;
+ }
+
+ private <T extends DescriptionBase<?>> boolean mergeSourceDescription(T existingSourceDescription, T newSourceDescription) {
+
+ boolean updated = false;
+ Set<DescriptionElementBase> elementsToRemove = new HashSet<>(existingSourceDescription.getElements());
+ Set<DescriptionElementBase> newElements = new HashSet<>(newSourceDescription.getElements());
+
+ for (DescriptionElementBase newElement : newElements){
+ DescriptionElementBase newElementClone = newElement.clone();
+ Optional<DescriptionElementBase> matchingElement = elementsToRemove.stream()
+ .filter(e->e.getFeature()!= null
+ && e.getFeature().equals(newElementClone.getFeature()))
+ .findFirst();
+ if (matchingElement.isPresent()){
+ updated |= mergeDescriptionElement(matchingElement.get(), newElementClone);
+ elementsToRemove.remove(matchingElement.get());
+ }else{
+ existingSourceDescription.addElement(newElementClone);
+ updated = true;
+ }
+ }
+ updated |= addSourceDescriptionToDescribedEntity(newSourceDescription);
+ existingSourceDescription.setTitleCache(newSourceDescription.getTitleCache(), true);
+
+ for (DescriptionElementBase debToRemove : elementsToRemove){
+ existingSourceDescription.removeElement(debToRemove);
+ updated |= debToRemove.isPersisted();
+ }
+ return updated;
+ }
+
+ @SuppressWarnings("unchecked")
+ private <T extends DescriptionBase<?>> boolean addSourceDescriptionToDescribedEntity(T sourceDescription) {
+ boolean updated = false;
+ IDescribable<T> describedEntity = ((IDescribable<T>)sourceDescription.describedEntity());
+ if (describedEntity.getDescriptions().contains(sourceDescription)){
+ describedEntity.addDescription(sourceDescription);
+ updated = true;
+ }
+ return updated;
+ }
+
+ private IdentifiableSource findSourceCandidate(TaxonDescription targetDescription, IdentifiableSource newSource) {
+ for (IdentifiableSource existingSource : targetDescription.getSources()){
+ boolean isCandidate = isCandidateForSourceReuse(existingSource, newSource);
+ if (isCandidate){
+ return existingSource;
+ }
+ }
+ return null;
+ }
+
+ private boolean isCandidateForSourceReuse(IdentifiableSource existingSource, IdentifiableSource newSource) {
+ if (newSource.getCdmSource()!= null){
+ if (existingSource.getCdmSource() == null){
+ return false;
+ }else {
+ ICdmBase newTarget = CdmBase.deproxy(newSource.getCdmSource());
+ ICdmBase existingTarget = CdmBase.deproxy((CdmBase)existingSource.getCdmSource());
+ if (!newTarget.getClass().equals(existingTarget.getClass())){
+ return false;
+ }else{
+ if (newTarget instanceof SpecimenDescription){
+ SpecimenOrObservationBase<?> newSob = ((SpecimenDescription)newTarget).getDescribedSpecimenOrObservation();
+ SpecimenOrObservationBase<?> existingSob = ((SpecimenDescription)existingTarget).getDescribedSpecimenOrObservation();
+ //for now reuse is possible if both are descriptions for the same specimen
+ return newSob != null && newSob.equals(existingSob);
+ }else if (newTarget instanceof TaxonDescription){
+ Taxon newTaxon = ((TaxonDescription)newTarget).getTaxon();
+ Taxon existingTaxon = ((TaxonDescription)existingTarget).getTaxon();
+ //for now reuse is possible if both are descriptions for the same taxon
+ return newTaxon != null && newTaxon.equals(existingTaxon);
+ }else if (newTarget instanceof Taxon){
+ return newTarget.equals(existingTarget);
+ }else{
+ throw new AggregationException("Other classes then SpecimenDescription and TaxonDescription are not yet supported. But was: " + newTarget.getClass());
+ }
+ }
+ }
+ }
+
+ return false;
+ }
+
+ private <T extends DescriptionBase<?>> T cloneNewSourceDescription(T newSourceDescription) {
+ if (!getConfig().isCloneAggregatedSourceDescriptions() && newSourceDescription.isAggregatedStructuredDescription()){
+ return newSourceDescription;
+ }
+ @SuppressWarnings("unchecked")
+ T clonedDescription = (T)newSourceDescription.clone();
+// clonedDescription.removeSources();
+ clonedDescription.removeDescriptiveDataSet(dataSet);
+ clonedDescription.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
+ clonedDescription.setTitleCache("Clone: " + clonedDescription.getTitleCache(), true);
+ return clonedDescription;
+ }
+
+ @Override
+ protected <S extends DescriptionElementBase> boolean mergeDescriptionElement(S targetElement,
+ S newElement) {
+
+ boolean updated = false;
+ targetElement = CdmBase.deproxy(targetElement);
+ newElement = CdmBase.deproxy(newElement);
+ if (targetElement instanceof CategoricalData){
+ updated |= mergeDescriptionElement((CategoricalData)targetElement, (CategoricalData)newElement);
+ }else if (targetElement.isInstanceOf(QuantitativeData.class)){
+ updated |= mergeDescriptionElement((QuantitativeData)targetElement, (QuantitativeData)newElement);
+ }else{
+ throw new AggregationException("Class not supported: " + targetElement.getClass().getName());
}
+ return updated;
}
- private void replaceExistingDescriptionElements(TaxonDescription targetDescription,
- Map<Feature, ? extends DescriptionElementBase> elementMap) {
- for (Entry<Feature, ? extends DescriptionElementBase> entry : elementMap.entrySet()) {
- DescriptionElementBase elementToRemove = null;
- DescriptionElementBase elementReplacement = null;
- for (DescriptionElementBase descriptionElementBase : targetDescription.getElements()) {
- if(descriptionElementBase.getFeature().equals(entry.getKey())){
- elementToRemove = descriptionElementBase;
- elementReplacement = entry.getValue();
- break;
+ private boolean mergeDescriptionElement(CategoricalData elementToStay,
+ CategoricalData newElement) {
+
+ boolean updated = false;
+ List<StateData> dataToRemove = new ArrayList<>(elementToStay.getStateData());
+ List<StateData> newData = new ArrayList<>(newElement.getStateData());
+ for (StateData newStateData : newData){
+ DefinedTermBase<?> state = newStateData.getState();
+ StateData oldStateData = firstByState(state, dataToRemove);
+ if (oldStateData != null){
+ //for now only state and count is used for aggregation, below code needs to be adapted if this changes
+ if (!CdmUtils.nullSafeEqual(oldStateData.getCount(), newStateData.getCount())){
+ oldStateData.setCount(newStateData.getCount());
+// getResult().addUpdatedUuid(oldStateData);
+ updated = true;
}
+ dataToRemove.remove(oldStateData);
+ }else{
+ elementToStay.addStateData(newStateData);
+ updated = true;
}
- if(elementToRemove!=null && elementReplacement!=null){
- targetDescription.removeElement(elementToRemove);
- targetDescription.addElement(elementReplacement);
+ }
+ for (StateData stateDataToRemove : dataToRemove){
+ elementToStay.removeStateData(stateDataToRemove);
+ updated |= stateDataToRemove.isPersisted();
+ }
+ return updated;
+ }
+
+ private StateData firstByState(DefinedTermBase<?> state, List<StateData> oldData) {
+ if (state == null){
+ return null;
+ }
+ for (StateData sd : oldData){
+ if (state.equals(sd.getState())){
+ return sd;
}
- else{
- targetDescription.addElement(entry.getValue());
+ }
+ return null;
+ }
+
+ private boolean mergeDescriptionElement(QuantitativeData elementToStay,
+ QuantitativeData newElement) {
+
+ boolean updated = false;
+
+ Set<StatisticalMeasurementValue> oldValues = new HashSet<>(elementToStay.getStatisticalValues());
+ Set<StatisticalMeasurementValue> newValues = new HashSet<>(newElement.getStatisticalValues());
+ for (StatisticalMeasurementValue newValue : newValues){
+ StatisticalMeasure type = newValue.getType();
+ StatisticalMeasurementValue oldValue = firstValueByType(type, oldValues);
+ if (oldValue != null){
+ //for now only state and count is used for aggregation, below code needs to be adapted if this changes
+ if (!CdmUtils.nullSafeEqual(oldValue.getValue(), newValue.getValue())){
+ oldValue.setValue(newValue.getValue());
+ updated = true;
+ }
+ oldValues.remove(oldValue);
+ }else{
+ elementToStay.addStatisticalValue(newValue);
+ updated = true;
}
}
+ for (StatisticalMeasurementValue valueToRemove : oldValues){
+ elementToStay.removeStatisticalValue(valueToRemove);
+ updated |= valueToRemove.isPersisted();
+ }
+ return updated;
+ }
+
+ private StatisticalMeasurementValue firstValueByType(StatisticalMeasure type, Set<StatisticalMeasurementValue> oldValues) {
+ if (type == null){
+ return null;
+ }
+ for (StatisticalMeasurementValue value : oldValues){
+ if (type.equals(value.getType())){
+ return value;
+ }
+ }
+ return null;
}
@Override
}
@Override
- protected void removeDescriptionIfEmpty(TaxonDescription description) {
- super.removeDescriptionIfEmpty(description);
+ protected void removeDescriptionIfEmpty(TaxonDescription description, ResultHolder resultHolder) {
+ super.removeDescriptionIfEmpty(description, resultHolder);
if (description.getElements().isEmpty()){
dataSet.removeDescription(description);
}
ResultHolder resultHolder,
Set<TaxonDescription> excludedDescriptions) {
StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
- addDescriptionElement(descriptiveResultHolder, getChildTaxonDescriptions(taxonNode, dataSet));
+ Set<TaxonDescription> childDescriptions = getChildTaxonDescriptions(taxonNode, dataSet);
+ addDescriptionToResultHolder(descriptiveResultHolder, childDescriptions, AggregationMode.ToParent);
}
@Override
ResultHolder resultHolder,
Set<TaxonDescription> excludedDescriptions) {
StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
- addDescriptionElement(descriptiveResultHolder, getSpecimenDescriptions(taxon, dataSet));
+
+ //specimen descriptions
+ Set<SpecimenDescription> specimenDescriptions = getSpecimenDescriptions(taxon, dataSet);
+ addDescriptionToResultHolder(descriptiveResultHolder, specimenDescriptions, AggregationMode.WithinTaxon);
+
+ //"literature" descriptions
+ if (getConfig().isIncludeLiterature()){
+ Set<TaxonDescription> literatureDescriptions = getLiteratureDescriptions(taxon, dataSet);
+ addDescriptionToResultHolder(descriptiveResultHolder, literatureDescriptions, AggregationMode.WithinTaxon);
+ }
+
+ //"default" descriptions
+ //TODO add default descriptions
+ //xxx
+
}
- private void addDescriptionElement(StructuredDescriptionResultHolder descriptiveResultHolder,
- Set<? extends DescriptionBase<?>> descriptions) {
+ private void addDescriptionToResultHolder(StructuredDescriptionResultHolder descriptiveResultHolder,
+ Set<? extends DescriptionBase<?>> specimenLiteraturOrDefaultDescriptions,
+ AggregationMode aggregationMode) {
+
boolean descriptionWasUsed = false;
- for (DescriptionBase<?> desc:descriptions){
+ for (DescriptionBase<?> desc: specimenLiteraturOrDefaultDescriptions){
for (DescriptionElementBase deb: desc.getElements()){
if (hasCharacterData(deb)){
if (deb.isInstanceOf(CategoricalData.class)){
addToCategorical(CdmBase.deproxy(deb, CategoricalData.class), descriptiveResultHolder);
descriptionWasUsed = true;
}else if (deb.isInstanceOf(QuantitativeData.class)){
- addToQuantitative(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
+ addToQuantitativData(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
descriptionWasUsed = true;
}
}
}
- if(descriptionWasUsed){
- descriptiveResultHolder.sourceDescriptions.add(desc);
+
+ //sources
+ AggregationSourceMode sourceMode = getConfig().getSourceMode(aggregationMode);
+ if(descriptionWasUsed && sourceMode != AggregationSourceMode.NONE){
+ IdentifiableSource source = IdentifiableSource.NewAggregationSourceInstance();
+ desc = CdmBase.deproxy(desc);
+
+ switch (sourceMode){
+ case DESCRIPTION:
+ DescriptionBase<?> clonedDesc = cloneNewSourceDescription(desc);
+ source.setCdmSource(clonedDesc);
+ break;
+ case TAXON:
+ if (desc instanceof TaxonDescription){
+ Taxon taxon = ((TaxonDescription) desc).getTaxon();
+ source.setCdmSource(taxon);
+ }else {
+ throw new AggregationException("Description type not yet supported for aggregation source mode TAXON: " + desc.getClass().getSimpleName() );
+ }
+ break;
+ case NONE:
+ source = null;
+ break;
+ case ALL: //not yet supported
+ throw new AggregationException("Source mode not yet supported: " + sourceMode);
+ case ALL_SAMEVALUE: //makes no sense
+ throw new AggregationException("Illegal source mode: " + sourceMode);
+ default:
+ throw new AggregationException("Source mode not supported: " + sourceMode);
+ }
+ if (source != null){
+ descriptiveResultHolder.sources.add(source);
+ }
}
}
}
- private void addToQuantitative(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
+ private void addToQuantitativData(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
QuantitativeData aggregatedQuantitativeData = resultHolder.quantitativeMap.get(qd.getFeature());
if(aggregatedQuantitativeData==null){
// no QuantitativeData with this feature in aggregation
- aggregatedQuantitativeData = aggregateSingleQuantitativeData(qd);
+ aggregatedQuantitativeData = aggregateWithinQuantitativeData(qd);
}
else{
- aggregatedQuantitativeData = mergeQuantitativeData(aggregatedQuantitativeData, qd);
+ aggregatedQuantitativeData = addToExistingQuantitativeData(aggregatedQuantitativeData, qd);
}
if (aggregatedQuantitativeData != null){
resultHolder.quantitativeMap.put(qd.getFeature(), aggregatedQuantitativeData);
private void addToCategorical(CategoricalData cd, StructuredDescriptionResultHolder resultHolder) {
CategoricalData aggregatedCategoricalData = resultHolder.categoricalMap.get(cd.getFeature());
- if(aggregatedCategoricalData==null){
+ if(aggregatedCategoricalData == null){
// no CategoricalData with this feature in aggregation
aggregatedCategoricalData = cd.clone();
// set count to 1 if not set
- aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
- resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
+ if (!aggregatedCategoricalData.getStatesOnly().isEmpty()){
+ aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
+ resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
+ }
}
else{
// split all StateData into those where the state already exists and those where it doesn't
- List<State> statesOnly = aggregatedCategoricalData.getStatesOnly();
+ List<DefinedTermBase<?>> statesOnly = aggregatedCategoricalData.getStatesOnly();
List<StateData> sdWithExistingStateInAggregation = cd.getStateData().stream().filter(sd->statesOnly.contains(sd.getState())).collect(Collectors.toList());
List<StateData> sdWithNoExistingStateInAggregation = cd.getStateData().stream().filter(sd->!statesOnly.contains(sd.getState())).collect(Collectors.toList());
for (StateData sdExist : sdWithExistingStateInAggregation) {
List<StateData> aggregatedSameStateData = aggregatedCategoricalData.getStateData().stream()
- .filter(sd->hasSameState(sdExist, sd))
- .collect(Collectors.toList());
+ .filter(sd->hasSameState(sdExist, sd))
+ .collect(Collectors.toList());
for (StateData stateData : aggregatedSameStateData) {
if(sdExist.getCount()==null){
stateData.incrementCount();
return new StructuredDescriptionResultHolder();
}
- private class StructuredDescriptionResultHolder implements ResultHolder{
- Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
- Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
- Set<DescriptionBase<?>> sourceDescriptions = new HashSet<>();
+ private class StructuredDescriptionResultHolder extends ResultHolder{
+ private Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
+ private Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
+ private Set<IdentifiableSource> sources = new HashSet<>();
+ @Override
+ public String toString() {
+ return "SDResultHolder [categoricals=" + categoricalMap.size()
+ + ", quantitatives=" + quantitativeMap.size()
+ + ", sources=" + sources.size()
+ + ", descriptionsToDelete=" + this.descriptionsToDelete.size()
+ + "]";
+ }
}
- /*
- * Static utility methods
- */
- private static Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
+ private Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
Set<TaxonDescription> result = new HashSet<>();
List<TaxonNode> childNodes = taxonNode.getChildNodes();
for (TaxonNode childNode : childNodes) {
- result.addAll(childNode.getTaxon().getDescriptions().stream()
- .filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
- .filter(desc->dataSet.getDescriptions().contains(desc))
- .collect(Collectors.toSet()));
+ Set<TaxonDescription> childDescriptions = childNode.getTaxon().getDescriptions();
+ result.addAll(childDescriptions.stream()
+ .filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
+ .filter(desc->dataSet.getDescriptions().contains(desc))
+ .collect(Collectors.toSet()));
}
return result;
}
- private static Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
+ /**
+ * Computes all specimens attached to the given taxon within the given dataSet.
+ * For these secimens it returns all attache
+ * */
+ private Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
Set<SpecimenDescription> result = new HashSet<>();
+ //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
for (TaxonDescription taxonDesc: taxon.getDescriptions()){
for (DescriptionElementBase taxonDeb : taxonDesc.getElements()){
if (taxonDeb.isInstanceOf(IndividualsAssociation.class)){
IndividualsAssociation indAss = CdmBase.deproxy(taxonDeb, IndividualsAssociation.class);
SpecimenOrObservationBase<?> specimen = indAss.getAssociatedSpecimenOrObservation();
- Set<SpecimenDescription> descriptions = (Set)specimen.getDescriptions();
- for(SpecimenDescription specimenDescription : descriptions){
- if(dataSet.getDescriptions().contains(specimenDescription) && specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
- result.add(specimenDescription);
- }
- }
+ Set<SpecimenDescription> descriptions = specimen.getSpecimenDescriptions();
+ for(SpecimenDescription specimenDescription : descriptions){
+ if(dataSet.getDescriptions().contains(specimenDescription) &&
+ specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
+ result.add(specimenDescription);
+ }
+ }
}
}
}
return result;
}
- private QuantitativeData aggregateSingleQuantitativeData(QuantitativeData sourceQd){
+ private Set<TaxonDescription> getLiteratureDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
+ Set<TaxonDescription> result = new HashSet<>();
+ //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
+ for(TaxonDescription taxonDescription : taxon.getDescriptions()){
+ if(dataSet.getDescriptions().contains(taxonDescription)
+ && taxonDescription.getTypes().stream().anyMatch(type->type.equals(DescriptionType.SECONDARY_DATA))
+ && taxonDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE)) ){
+ result.add(taxonDescription);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Evaluates statistics for exact values collection and handles missing min and max values
+ */
+ private QuantitativeData aggregateWithinQuantitativeData(QuantitativeData sourceQd){
QuantitativeData aggQD = QuantitativeData.NewInstance(sourceQd.getFeature());
+ aggQD.setUnit(sourceQd.getUnit());
Set<BigDecimal> exactValues = sourceQd.getExactValues();
if(!exactValues.isEmpty()){
- Comparator<BigDecimal> comp = Comparator.naturalOrder();
// qd is not already aggregated
+ Comparator<BigDecimal> comp = Comparator.naturalOrder();
int exactValueSampleSize = exactValues.size();
BigDecimal exactValueMin = exactValues.stream().min(comp).get();
BigDecimal exactValueMax = exactValues.stream().max(comp).get();
}
private QuantitativeData handleMissingValues(QuantitativeData qd) {
+ //min max
qd = handleMissingMinOrMax(qd);
+ //average
if (qd != null && qd.getAverage() == null){
BigDecimal n = qd.getSampleSize();
if(n != null && !n.equals(0f)){
- qd.setAverage((qd.getMax().add(qd.getMin())).divide(n), null);
+ BigDecimal average = (qd.getMax().add(qd.getMin())).divide(n);
+ qd.setAverage(average, null);
}
}
return qd;
return handleMissingMinOrMax(qd, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
}
-
public static QuantitativeData handleMissingMinOrMax(QuantitativeData aggQD, MissingMinimumMode missingMinMode,
MissingMaximumMode missingMaxMode) {
if(aggQD.getMin() == null && aggQD.getMax() != null){
return aggQD;
}
- private QuantitativeData mergeQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
+ private QuantitativeData addToExistingQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
- newQd = aggregateSingleQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a cleear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
+ newQd = aggregateWithinQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a clear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
BigDecimal min = null;
BigDecimal max = null;
sampleSize = newQd.getSampleSize().add(aggQd.getSampleSize());
}
if (sampleSize != null && !sampleSize.equals(0f) && aggQd.getAverage() != null && newQd.getAverage() != null){
- BigDecimal totalSum = aggQd.getAverage().multiply(aggQd.getSampleSize()).add(newQd.getAverage().multiply(newQd.getSampleSize()));
- average = totalSum.divide(sampleSize);
+ BigDecimal aggTotalSum = aggQd.getAverage().multiply(aggQd.getSampleSize(), MathContext.DECIMAL32);
+ BigDecimal newTotalSum = newQd.getAverage().multiply(newQd.getSampleSize(), MathContext.DECIMAL32);
+ BigDecimal totalSum = aggTotalSum.add(newTotalSum);
+ average = totalSum.divide(sampleSize, MathContext.DECIMAL32).stripTrailingZeros(); //to be discussed if we really want to reduce precision here, however, due to the current way to compute average we do not have exact precision anyway
}
aggQd.setMinimum(min, null);
aggQd.setMaximum(max, null);
}
private static boolean hasSameState(StateData sd1, StateData sd2) {
- return sd2.getState().getUuid().equals(sd1.getState().getUuid());
+ if (sd2.getState() == null || sd1.getState() == null){
+ return false;
+ }else{
+ return sd2.getState().getUuid().equals(sd1.getState().getUuid());
+ }
}
}