Project

General

Profile

Download (20.2 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2019 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.api.service.description;
10

    
11
import java.math.BigDecimal;
12
import java.util.ArrayList;
13
import java.util.Comparator;
14
import java.util.HashMap;
15
import java.util.HashSet;
16
import java.util.List;
17
import java.util.Map;
18
import java.util.Map.Entry;
19
import java.util.Set;
20
import java.util.stream.Collectors;
21

    
22
import eu.etaxonomy.cdm.common.BigDecimalUtil;
23
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
24
import eu.etaxonomy.cdm.model.common.CdmBase;
25
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
26
import eu.etaxonomy.cdm.model.description.CategoricalData;
27
import eu.etaxonomy.cdm.model.description.DescriptionBase;
28
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
29
import eu.etaxonomy.cdm.model.description.DescriptionType;
30
import eu.etaxonomy.cdm.model.description.DescriptiveDataSet;
31
import eu.etaxonomy.cdm.model.description.Feature;
32
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
33
import eu.etaxonomy.cdm.model.description.QuantitativeData;
34
import eu.etaxonomy.cdm.model.description.SpecimenDescription;
35
import eu.etaxonomy.cdm.model.description.State;
36
import eu.etaxonomy.cdm.model.description.StateData;
37
import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
38
import eu.etaxonomy.cdm.model.description.TaxonDescription;
39
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
40
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
41
import eu.etaxonomy.cdm.model.taxon.Taxon;
42
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
43

    
44
/**
45
 * Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
46
 * <br>
47
 * For all {@link SpecimenDescription}s belonging to this data set a new
48
 * aggregated {@link TaxonDescription}s are created for every taxon the
49
 * specimens are directly associated with.<BR>
50
 * Also lower rank taxon descriptions are aggregated to upper rank taxa.
51
 *
52
 * @author a.mueller
53
 * @author p.plitzner
54
 * @since 03.11.2019
55
 */
56
public class StructuredDescriptionAggregation
57
        extends DescriptionAggregationBase<StructuredDescriptionAggregation, StructuredDescriptionAggregationConfiguration>{
58

    
59
    private DescriptiveDataSet dataSet;
60

    
61
    @Override
62
    protected String pluralDataType(){
63
        return "structured descriptive data";
64
    }
65

    
66
    @Override
67
    protected void preAggregate(IProgressMonitor monitor) {
68
        monitor.subTask("preAccumulate - nothing to do");
69

    
70
        // take start time for performance testing
71
        double start = System.currentTimeMillis();
72

    
73
        getResult().setCdmEntity(getDescriptiveDatasetService().load(getConfig().getDatasetUuid()));
74

    
75
        double end1 = System.currentTimeMillis();
76
        logger.info("Time elapsed for pre-accumulate() : " + (end1 - start) / (1000) + "s");
77
    }
78

    
79

    
80
    private boolean hasCharacterData(DescriptionElementBase element) {
81
        return hasCategoricalData(element) || hasQuantitativeData(element);
82
    }
83

    
84
    private boolean hasQuantitativeData(DescriptionElementBase element) {
85
        if(element instanceof QuantitativeData
86
                && !((QuantitativeData) element).getStatisticalValues().isEmpty()){
87
            QuantitativeData quantitativeData = (QuantitativeData)element;
88
            return !getExactValues(quantitativeData).isEmpty()
89
                    || quantitativeData.getMin()!=null
90
                    || quantitativeData.getMax()!=null;
91
        }
92
        return false;
93
    }
94

    
95
    private boolean hasCategoricalData(DescriptionElementBase element) {
96
        return element instanceof CategoricalData && !((CategoricalData) element).getStatesOnly().isEmpty();
97
    }
98

    
99
    @Override
100
    protected void setDescriptionTitle(TaxonDescription description, Taxon taxon) {
101
        String title = taxon.getName() != null? taxon.getName().getTitleCache() : taxon.getTitleCache();
102
        description.setTitleCache("Aggregated description for " + title, true);
103
        return;
104
    }
105

    
106
    @Override
107
    protected TaxonDescription createNewDescription(Taxon taxon) {
108
        String title = taxon.getTitleCache();
109
        logger.debug("creating new description for " + title);
110
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
111
        description.addType(DescriptionType.AGGREGATED_STRUC_DESC);
112
        setDescriptionTitle(description, taxon);
113
        return description;
114
    }
115

    
116
    @Override
117
    protected boolean hasDescriptionType(TaxonDescription description) {
118
        return dataSet.getDescriptions().contains(description) && description.isAggregatedStructuredDescription();
119
    }
120

    
121
    @Override
122
    protected List<String> descriptionInitStrategy() {
123
        return new ArrayList<>();
124
    }
125

    
126
    @Override
127
    protected void addAggregationResultToDescription(TaxonDescription targetDescription,
128
            ResultHolder resultHolder) {
129
        StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
130

    
131
        replaceExistingDescriptionElements(targetDescription, structuredResultHolder.categoricalMap);
132
        replaceExistingDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap);
133
        addAggregationSources(targetDescription, structuredResultHolder);
134

    
135
        if(!targetDescription.getElements().isEmpty()){
136
            dataSet.addDescription(targetDescription);
137
        }
138
    }
139

    
140
    private void addAggregationSources(TaxonDescription targetDescription,
141
            StructuredDescriptionResultHolder structuredResultHolder) {
142
        //FIXME Re-use sources if possible
143
        //Remove sources from description
144
        Set<IdentifiableSource> sourcesToRemove = targetDescription.getSources().stream()
145
                .filter(source->source.getType().equals(OriginalSourceType.Aggregation))
146
                .collect(Collectors.toSet());
147

    
148
        for (IdentifiableSource source : sourcesToRemove) {
149
            targetDescription.removeSource(source);
150
        }
151

    
152
        Set<DescriptionBase<?>> sourceDescriptions = structuredResultHolder.sourceDescriptions;
153
        for (DescriptionBase<?> descriptionBase : sourceDescriptions) {
154
            DescriptionBase<?> sourceDescription = null;
155
            if(descriptionBase.isInstanceOf(SpecimenDescription.class)){
156
                DescriptionBase<?> clone = descriptionBase.clone();
157
                clone.removeDescriptiveDataSet(dataSet);
158
                clone.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
159
                SpecimenOrObservationBase<?> specimen = CdmBase.deproxy(descriptionBase, SpecimenDescription.class).getDescribedSpecimenOrObservation();
160
                specimen.addDescription(CdmBase.deproxy(clone, SpecimenDescription.class));
161
                sourceDescription=clone;
162
            }
163
            else if(descriptionBase.isInstanceOf(TaxonDescription.class)){
164
                Taxon taxon = CdmBase.deproxy(descriptionBase, TaxonDescription.class).getTaxon();
165
                taxon.addDescription(CdmBase.deproxy(descriptionBase, TaxonDescription.class));
166
                sourceDescription=descriptionBase;
167
            }
168
            if(sourceDescription!=null){
169
                targetDescription.addAggregationSource(sourceDescription);
170
            }
171
        }
172
    }
173

    
174
    private void replaceExistingDescriptionElements(TaxonDescription targetDescription,
175
            Map<Feature, ? extends DescriptionElementBase> elementMap) {
176
        for (Entry<Feature, ? extends DescriptionElementBase> entry : elementMap.entrySet()) {
177
            DescriptionElementBase elementToRemove = null;
178
            DescriptionElementBase elementReplacement = null;
179
            for (DescriptionElementBase descriptionElementBase : targetDescription.getElements()) {
180
                if(descriptionElementBase.getFeature().equals(entry.getKey())){
181
                    elementToRemove = descriptionElementBase;
182
                    elementReplacement = entry.getValue();
183
                    break;
184
                }
185
            }
186
            if(elementToRemove!=null && elementReplacement!=null){
187
                targetDescription.removeElement(elementToRemove);
188
                targetDescription.addElement(elementReplacement);
189
            }
190
            else{
191
                targetDescription.addElement(entry.getValue());
192
            }
193
        }
194
    }
195

    
196
    @Override
197
    protected void initTransaction() {
198
        dataSet = getDescriptiveDatasetService().load(getConfig().getDatasetUuid());
199
    }
200

    
201
    @Override
202
    protected void removeDescriptionIfEmpty(TaxonDescription description) {
203
        super.removeDescriptionIfEmpty(description);
204
        if (description.getElements().isEmpty()){
205
            dataSet.removeDescription(description);
206
        }
207
    }
208

    
209
    @Override
210
    protected void aggregateToParentTaxon(TaxonNode taxonNode,
211
            ResultHolder resultHolder,
212
            Set<TaxonDescription> excludedDescriptions) {
213
        StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
214
        addDescriptionElement(descriptiveResultHolder, getChildTaxonDescriptions(taxonNode, dataSet));
215
    }
216

    
217
    @Override
218
    protected void aggregateWithinSingleTaxon(Taxon taxon,
219
            ResultHolder resultHolder,
220
            Set<TaxonDescription> excludedDescriptions) {
221
        StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
222
        addDescriptionElement(descriptiveResultHolder, getSpecimenDescriptions(taxon, dataSet));
223
    }
224

    
225
    private void addDescriptionElement(StructuredDescriptionResultHolder descriptiveResultHolder,
226
            Set<? extends DescriptionBase<?>> descriptions) {
227
        boolean descriptionWasUsed = false;
228
        for (DescriptionBase<?> desc:descriptions){
229
            for (DescriptionElementBase deb: desc.getElements()){
230
                if (hasCharacterData(deb)){
231
                    if (deb.isInstanceOf(CategoricalData.class)){
232
                        addToCategorical(CdmBase.deproxy(deb, CategoricalData.class), descriptiveResultHolder);
233
                        descriptionWasUsed = true;
234
                    }else if (deb.isInstanceOf(QuantitativeData.class)){
235
                        addToQuantitative(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
236
                        descriptionWasUsed = true;
237
                    }
238
                }
239
            }
240
            if(descriptionWasUsed){
241
                descriptiveResultHolder.sourceDescriptions.add(desc);
242
            }
243
        }
244
    }
245

    
246
    private void addToQuantitative(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
247
        QuantitativeData aggregatedQuantitativeData = resultHolder.quantitativeMap.get(qd.getFeature());
248
        if(aggregatedQuantitativeData==null){
249
            // no QuantitativeData with this feature in aggregation
250
            aggregatedQuantitativeData = aggregateSingleQuantitativeData(qd);
251
        }
252
        else{
253
            aggregatedQuantitativeData = mergeQuantitativeData(aggregatedQuantitativeData, qd);
254
        }
255
        if (aggregatedQuantitativeData != null){
256
            resultHolder.quantitativeMap.put(qd.getFeature(), aggregatedQuantitativeData);
257
        }
258
    }
259

    
260
    private void addToCategorical(CategoricalData cd, StructuredDescriptionResultHolder resultHolder) {
261
        CategoricalData aggregatedCategoricalData = resultHolder.categoricalMap.get(cd.getFeature());
262
        if(aggregatedCategoricalData==null){
263
            // no CategoricalData with this feature in aggregation
264
            aggregatedCategoricalData = cd.clone();
265
            // set count to 1 if not set
266
            aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
267
            resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
268
        }
269
        else{
270
            // split all StateData into those where the state already exists and those where it doesn't
271
            List<State> statesOnly = aggregatedCategoricalData.getStatesOnly();
272
            List<StateData> sdWithExistingStateInAggregation = cd.getStateData().stream().filter(sd->statesOnly.contains(sd.getState())).collect(Collectors.toList());
273
            List<StateData> sdWithNoExistingStateInAggregation = cd.getStateData().stream().filter(sd->!statesOnly.contains(sd.getState())).collect(Collectors.toList());
274

    
275
            for (StateData sd : sdWithNoExistingStateInAggregation) {
276
                StateData clone = sd.clone();
277
                // set count to 1 if not set
278
                if(clone.getCount()==null){
279
                    clone.incrementCount();
280
                }
281
                aggregatedCategoricalData.addStateData(clone);
282
            }
283

    
284
            for (StateData sdExist : sdWithExistingStateInAggregation) {
285
                List<StateData> aggregatedSameStateData = aggregatedCategoricalData.getStateData().stream()
286
                .filter(sd->hasSameState(sdExist, sd))
287
                .collect(Collectors.toList());
288
                for (StateData stateData : aggregatedSameStateData) {
289
                    if(sdExist.getCount()==null){
290
                        stateData.incrementCount();
291
                    }
292
                    else{
293
                        stateData.setCount(stateData.getCount()+sdExist.getCount());
294
                    }
295
                }
296
            }
297
        }
298
    }
299

    
300
    @Override
301
    protected StructuredDescriptionResultHolder createResultHolder() {
302
        return new StructuredDescriptionResultHolder();
303
    }
304

    
305
    private class StructuredDescriptionResultHolder implements ResultHolder{
306
        Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
307
        Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
308
        Set<DescriptionBase<?>> sourceDescriptions = new HashSet<>();
309
    }
310

    
311
    /*
312
     * Static utility methods
313
     */
314
    private static Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
315
        Set<TaxonDescription> result = new HashSet<>();
316
        List<TaxonNode> childNodes = taxonNode.getChildNodes();
317
        for (TaxonNode childNode : childNodes) {
318
            result.addAll(childNode.getTaxon().getDescriptions().stream()
319
            .filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
320
            .filter(desc->dataSet.getDescriptions().contains(desc))
321
            .collect(Collectors.toSet()));
322
        }
323
        return result;
324
    }
325

    
326
    private static Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
327
        Set<SpecimenDescription> result = new HashSet<>();
328
        for (TaxonDescription taxonDesc: taxon.getDescriptions()){
329
            for (DescriptionElementBase taxonDeb : taxonDesc.getElements()){
330
                if (taxonDeb.isInstanceOf(IndividualsAssociation.class)){
331
                    IndividualsAssociation indAss = CdmBase.deproxy(taxonDeb, IndividualsAssociation.class);
332
                    SpecimenOrObservationBase<?> specimen = indAss.getAssociatedSpecimenOrObservation();
333
                     Set<SpecimenDescription> descriptions = (Set)specimen.getDescriptions();
334
                     for(SpecimenDescription specimenDescription : descriptions){
335
                         if(dataSet.getDescriptions().contains(specimenDescription) && specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
336
                             result.add(specimenDescription);
337
                         }
338
                     }
339
                }
340
            }
341
        }
342
        return result;
343
    }
344

    
345
    private QuantitativeData aggregateSingleQuantitativeData(QuantitativeData sourceQd){
346
        QuantitativeData aggQD = QuantitativeData.NewInstance(sourceQd.getFeature());
347
        Set<BigDecimal> exactValues = sourceQd.getExactValues();
348
        if(!exactValues.isEmpty()){
349
            Comparator<BigDecimal> comp = Comparator.naturalOrder();
350
            // qd is not already aggregated
351
            int exactValueSampleSize = exactValues.size();
352
            BigDecimal exactValueMin = exactValues.stream().min(comp).get();
353
            BigDecimal exactValueMax = exactValues.stream().max(comp).get();
354
            BigDecimal exactValueAvg = BigDecimalUtil.average(exactValues);
355
            //TODO also check for typical boundary data
356
            if(sourceQd.getMin() == null && sourceQd.getMax() == null){
357
                aggQD.setSampleSize(new BigDecimal(exactValueSampleSize), null);
358
                aggQD.setAverage(exactValueAvg, null);
359
            }
360
            aggQD.setMinimum(sourceQd.getMin() == null ? exactValueMin: sourceQd.getMin().min(exactValueMin), null);
361
            aggQD.setMaximum(sourceQd.getMax() == null ? exactValueMax: sourceQd.getMax().max(exactValueMax), null);
362
        }
363
        else{
364
            // qd has only min, max, ... but no exact values
365
            aggQD = sourceQd.clone();
366
            aggQD = handleMissingValues(aggQD);
367
        }
368
        return aggQD;
369
    }
370

    
371
    private QuantitativeData handleMissingValues(QuantitativeData qd) {
372
        qd = handleMissingMinOrMax(qd);
373
        if (qd != null && qd.getAverage() == null){
374
            BigDecimal n = qd.getSampleSize();
375
            if(n != null && !n.equals(0f)){
376
                qd.setAverage((qd.getMax().add(qd.getMin())).divide(n), null);
377
            }
378
        }
379
        return qd;
380
    }
381

    
382
    private QuantitativeData handleMissingMinOrMax(QuantitativeData qd) {
383
        return handleMissingMinOrMax(qd, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
384
    }
385

    
386

    
387
    public static QuantitativeData handleMissingMinOrMax(QuantitativeData aggQD, MissingMinimumMode missingMinMode,
388
            MissingMaximumMode missingMaxMode) {
389
        if(aggQD.getMin() == null && aggQD.getMax() != null){
390
            if (missingMinMode == MissingMinimumMode.MinToZero) {
391
                aggQD.setMinimum(BigDecimal.valueOf(0f), null);
392
            }else if (missingMinMode == MissingMinimumMode.MinToMax){
393
                aggQD.setMinimum(aggQD.getMax(), null);
394
            }else if (missingMinMode == MissingMinimumMode.SkipRecord){
395
                return null;
396
            }
397
        }
398
        if(aggQD.getMax() == null && aggQD.getMin() != null){
399
            if (missingMaxMode == MissingMaximumMode.MaxToMin){
400
                aggQD.setMaximum(aggQD.getMin(), null);
401
            }else if (missingMaxMode == MissingMaximumMode.SkipRecord){
402
                return null;
403
            }
404
        }
405
        return aggQD;
406
    }
407

    
408
    private QuantitativeData mergeQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
409

    
410
        newQd = aggregateSingleQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a cleear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
411

    
412
        BigDecimal min = null;
413
        BigDecimal max = null;
414
        BigDecimal average = null;
415
        BigDecimal sampleSize = null;
416
        newQd = handleMissingValues(newQd);
417
        if (newQd == null){
418
            return aggQd;
419
        }
420
        min = aggQd.getMin().min(newQd.getMin());
421
        max = aggQd.getMax().max(newQd.getMax());
422
        if (newQd.getSampleSize() != null && aggQd.getSampleSize() != null){
423
            sampleSize = newQd.getSampleSize().add(aggQd.getSampleSize());
424
        }
425
        if (sampleSize != null && !sampleSize.equals(0f) && aggQd.getAverage() != null && newQd.getAverage() != null){
426
            BigDecimal totalSum = aggQd.getAverage().multiply(aggQd.getSampleSize()).add(newQd.getAverage().multiply(newQd.getSampleSize()));
427
            average = totalSum.divide(sampleSize);
428
        }
429
        aggQd.setMinimum(min, null);
430
        aggQd.setMaximum(max, null);
431
        aggQd.setSampleSize(sampleSize, null);
432
        aggQd.setAverage(average, null);
433
        return aggQd;
434
    }
435

    
436
    private static List<BigDecimal> getExactValues(QuantitativeData qd) {
437
        List<BigDecimal> exactValues = qd.getStatisticalValues().stream()
438
                .filter(value->value.getType().equals(StatisticalMeasure.EXACT_VALUE()))
439
                .map(exact->exact.getValue())
440
                .collect(Collectors.toList());
441
        return exactValues;
442
    }
443

    
444
    private static boolean hasSameState(StateData sd1, StateData sd2) {
445
        return sd2.getState().getUuid().equals(sd1.getState().getUuid());
446
    }
447
}
(10-10/11)