Project

General

Profile

Download (37 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2019 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.api.service.description;
10

    
11
import java.math.BigDecimal;
12
import java.math.MathContext;
13
import java.util.ArrayList;
14
import java.util.Comparator;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Optional;
20
import java.util.Set;
21
import java.util.stream.Collectors;
22

    
23
import eu.etaxonomy.cdm.common.BigDecimalUtil;
24
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
25
import eu.etaxonomy.cdm.model.common.CdmBase;
26
import eu.etaxonomy.cdm.model.common.ICdmBase;
27
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
28
import eu.etaxonomy.cdm.model.description.CategoricalData;
29
import eu.etaxonomy.cdm.model.description.DescriptionBase;
30
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
31
import eu.etaxonomy.cdm.model.description.DescriptionType;
32
import eu.etaxonomy.cdm.model.description.DescriptiveDataSet;
33
import eu.etaxonomy.cdm.model.description.Feature;
34
import eu.etaxonomy.cdm.model.description.IDescribable;
35
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
36
import eu.etaxonomy.cdm.model.description.QuantitativeData;
37
import eu.etaxonomy.cdm.model.description.SpecimenDescription;
38
import eu.etaxonomy.cdm.model.description.State;
39
import eu.etaxonomy.cdm.model.description.StateData;
40
import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
41
import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
42
import eu.etaxonomy.cdm.model.description.TaxonDescription;
43
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
44
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
45
import eu.etaxonomy.cdm.model.taxon.Taxon;
46
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
47

    
48
/**
49
 * Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
50
 * <br>
51
 * For all {@link SpecimenDescription}s belonging to this data set a new
52
 * aggregated {@link TaxonDescription}s are created for every taxon the
53
 * specimens are directly associated with.<BR>
54
 * Also lower rank taxon descriptions are aggregated to upper rank taxa.
55
 *
56
 * @author a.mueller
57
 * @author p.plitzner
58
 * @since 03.11.2019
59
 */
60
public class StructuredDescriptionAggregation
61
        extends DescriptionAggregationBase<StructuredDescriptionAggregation, StructuredDescriptionAggregationConfiguration>{
62

    
63
    private DescriptiveDataSet dataSet;
64

    
65
    @Override
66
    protected String pluralDataType(){
67
        return "structured descriptive data";
68
    }
69

    
70
    @Override
71
    protected void preAggregate(IProgressMonitor monitor) {
72
        monitor.subTask("preAccumulate - nothing to do");
73

    
74
        // take start time for performance testing
75
        double start = System.currentTimeMillis();
76

    
77
        getResult().setCdmEntity(getDescriptiveDatasetService().load(getConfig().getDatasetUuid()));
78

    
79
        double end1 = System.currentTimeMillis();
80
        logger.info("Time elapsed for pre-accumulate() : " + (end1 - start) / (1000) + "s");
81
    }
82

    
83
    @Override
84
    protected void verifyConfiguration(IProgressMonitor monitor){
85
        if (!AggregationSourceMode.list(AggregationMode.ToParent, AggregationType.StructuredDescription)
86
            .contains(getConfig().getToParentSourceMode())){
87
            throw new AggregationException("Unsupported source mode for to-parent aggregation: " + getConfig().getToParentSourceMode());
88
        }
89
        if (!AggregationSourceMode.list(AggregationMode.WithinTaxon, AggregationType.StructuredDescription)
90
                .contains(getConfig().getWithinTaxonSourceMode())){
91
                throw new AggregationException("Unsupported source mode for within-taxon aggregation: " + getConfig().getWithinTaxonSourceMode());
92
        }
93
    }
94

    
95
    private boolean hasCharacterData(DescriptionElementBase element) {
96
        return hasCategoricalData(element) || hasQuantitativeData(element);
97
    }
98

    
99
    private boolean hasQuantitativeData(DescriptionElementBase element) {
100
        if(element instanceof QuantitativeData
101
                && !((QuantitativeData) element).getStatisticalValues().isEmpty()){
102
            QuantitativeData quantitativeData = (QuantitativeData)element;
103
            return !getExactValues(quantitativeData).isEmpty()
104
                    || quantitativeData.getMin()!=null
105
                    || quantitativeData.getMax()!=null;
106
        }
107
        return false;
108
    }
109

    
110
    private boolean hasCategoricalData(DescriptionElementBase element) {
111
        return element instanceof CategoricalData && !((CategoricalData) element).getStatesOnly().isEmpty();
112
    }
113

    
114
    @Override
115
    protected void setDescriptionTitle(TaxonDescription description, Taxon taxon) {
116
        String title = taxon.getName() != null? taxon.getName().getTitleCache() : taxon.getTitleCache();
117
        description.setTitleCache("Aggregated description for " + title, true);
118
        return;
119
    }
120

    
121
    @Override
122
    protected TaxonDescription createNewDescription(Taxon taxon) {
123
        String title = taxon.getTitleCache();
124
        if (logger.isDebugEnabled()){logger.debug("creating new description for " + title);}
125
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
126
        description.addType(DescriptionType.AGGREGATED_STRUC_DESC);
127
        setDescriptionTitle(description, taxon);
128
        return description;
129
    }
130

    
131
    @Override
132
    protected boolean hasDescriptionType(TaxonDescription description) {
133
        return dataSet.getDescriptions().contains(description) && description.isAggregatedStructuredDescription();
134
    }
135

    
136
    @Override
137
    protected List<String> descriptionInitStrategy() {
138
        return new ArrayList<>();
139
    }
140

    
141
    @Override
142
    protected void addAggregationResultToDescription(TaxonDescription targetDescription,
143
            ResultHolder resultHolder) {
144

    
145
        StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
146
        mergeDescriptionElements(targetDescription, structuredResultHolder.categoricalMap, CategoricalData.class);
147
        mergeDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap, QuantitativeData.class);
148
        addAggregationSources(targetDescription, structuredResultHolder);
149

    
150
        if(!targetDescription.getElements().isEmpty()){
151
            dataSet.addDescription(targetDescription);
152
        }else{
153
            dataSet.removeDescription(targetDescription);
154
        }
155
    }
156

    
157
    private <T extends DescriptionBase<?>> void addAggregationSources(TaxonDescription targetDescription,
158
                StructuredDescriptionResultHolder structuredResultHolder) {
159

    
160
        //Remove sources from description
161
        Set<IdentifiableSource> sourcesToRemove = targetDescription.getSources().stream()
162
                .filter(source->source.getType().equals(OriginalSourceType.Aggregation))
163
                .collect(Collectors.toSet());
164

    
165
        Set<IdentifiableSource> newSources = structuredResultHolder.sources;
166
        for (IdentifiableSource newSource : newSources) {
167
            IdentifiableSource mergeSourceCandidate = findSourceCandidate(targetDescription, newSource);
168
            if (mergeSourceCandidate == null){
169
                addNewSource(targetDescription, newSource);
170
            }else{
171
                mergeSource(mergeSourceCandidate, newSource);
172
                sourcesToRemove.remove(mergeSourceCandidate);
173
            }
174
        }
175

    
176
        //remove remaining sources-to-be-removed
177
        for (IdentifiableSource sourceToRemove : sourcesToRemove) {
178
            targetDescription.removeSource(sourceToRemove);
179
            ICdmBase target = CdmBase.deproxy(sourceToRemove.getCdmSource());
180
            if (target != null){
181
                sourceToRemove.setCdmSource(null); //workaround for missing orphan removal #9801
182
                if (target instanceof DescriptionBase){
183
                    @SuppressWarnings("unchecked")
184
                    T descriptionToDelete = (T)target;
185
                    if (descriptionToDelete.isCloneForSource()){
186
                        //TODO maybe this is not really needed as it is later done anyway with .deltedDescription
187
                        //but currently this still leads to an re-saved by cascade exception
188
                        ((IDescribable<T>)descriptionToDelete.describedEntity()).removeDescription(descriptionToDelete);
189
                        structuredResultHolder.descriptionsToDelete.add(descriptionToDelete);
190
                    }
191
                }else if (target.isInstanceOf(Taxon.class)){
192
                    //nothing to do for now
193
                } else {
194
                    throw new AggregationException("CdmLink target type not yet supported: " + target.getClass().getSimpleName());
195
                }
196
            }
197
        }
198
    }
199

    
200
    private <T extends DescriptionBase<?>> void addNewSource(TaxonDescription targetDescription,
201
            IdentifiableSource newSource) {
202

    
203
        //add source
204
        targetDescription.addSource(newSource);
205
        //if it is a description add it to the described entity (specimen, taxon)
206
        ICdmBase target = newSource.getCdmSource();
207
        if (target != null){
208
            if (target.isInstanceOf(DescriptionBase.class)){
209
                @SuppressWarnings("unchecked")
210
                T description = (T)CdmBase.deproxy(target);
211
                ((IDescribable<T>)description.describedEntity()).addDescription(description);
212
            }
213
        }
214
    }
215

    
216
    //mergeablity has been checked before
217
    private <T extends DescriptionBase<?>> void mergeSource(IdentifiableSource mergeCandidate, IdentifiableSource newSource) {
218

    
219
        ICdmBase newTarget = newSource.getCdmSource();
220
        if (newTarget != null){
221
            newTarget = CdmBase.deproxy(newTarget);
222
            if (newTarget instanceof DescriptionBase){
223
                @SuppressWarnings("unchecked")
224
                T newTargetDesc = (T)newTarget;
225
                @SuppressWarnings("unchecked")
226
                T existingTargetDesc = CdmBase.deproxy((T)mergeCandidate.getCdmSource());
227
                mergeSourceDescription(existingTargetDesc, newTargetDesc);
228
                ((IDescribable<T>)existingTargetDesc.describedEntity()).addDescription(existingTargetDesc);
229
                if (!existingTargetDesc.equals(newTargetDesc)){
230
                    ((IDescribable<T>)newTargetDesc.describedEntity()).removeDescription(newTargetDesc);
231
                }
232
            }else if (newTarget instanceof Taxon){
233
                //nothing to do for now (we do not support reuse of sources linking to different taxa yet)
234
            }else{
235
                throw new AggregationException("Sources not linking to a description or a taxon instance currently not yet supported.");
236
            }
237
        }else{
238
            throw new AggregationException("Sources not linking to another CdmBase instance currently not yet supported.");
239
        }
240
    }
241

    
242
    private <T extends DescriptionBase<?>> void mergeSourceDescription(T existingSourceDescription, T newSourceDescription) {
243

    
244
        Set<DescriptionElementBase> elementsToRemove = new HashSet<>(existingSourceDescription.getElements());
245
        Set<DescriptionElementBase> newElements = new HashSet<>(newSourceDescription.getElements());
246

    
247
        for (DescriptionElementBase newElement : newElements){
248
            DescriptionElementBase newElementClone = newElement.clone();
249
            Optional<DescriptionElementBase> matchingElement = elementsToRemove.stream()
250
                    .filter(e->e.getFeature()!= null
251
                        && e.getFeature().equals(newElementClone.getFeature()))
252
                    .findFirst();
253
            if (matchingElement.isPresent()){
254
                mergeDescriptionElement(matchingElement.get(), newElementClone);
255
                elementsToRemove.remove(matchingElement.get());
256
            }else{
257
                existingSourceDescription.addElement(newElementClone);
258
            }
259
        }
260
        addSourceDescriptionToDescribedEntity(newSourceDescription);
261
        existingSourceDescription.setTitleCache(newSourceDescription.getTitleCache(), true);
262

    
263
        for (DescriptionElementBase debToRemove : elementsToRemove){
264
            existingSourceDescription.removeElement(debToRemove);
265
        }
266

    
267
    }
268

    
269
    @SuppressWarnings("unchecked")
270
    private <T extends DescriptionBase<?>> void addSourceDescriptionToDescribedEntity(T sourceDescription) {
271
        ((IDescribable<T>)sourceDescription.describedEntity()).addDescription(sourceDescription);
272
    }
273
    @SuppressWarnings("unchecked")
274
    private <T extends DescriptionBase<?>> void removeSourceDescriptionFromDescribedEntity(T sourceDescription) {
275
        ((IDescribable<T>)sourceDescription.describedEntity()).removeDescription(sourceDescription);
276
    }
277

    
278
    private IdentifiableSource findSourceCandidate(TaxonDescription targetDescription, IdentifiableSource newSource) {
279
        for (IdentifiableSource existingSource : targetDescription.getSources()){
280
            boolean isCandidate = isCandidateForSourceReuse(existingSource, newSource);
281
            if (isCandidate){
282
                return existingSource;
283
            }
284
        }
285
        return null;
286
    }
287

    
288
    private boolean isCandidateForSourceReuse(IdentifiableSource existingSource, IdentifiableSource newSource) {
289
        if (newSource.getCdmSource()!= null){
290
            if (existingSource.getCdmSource() == null){
291
                return false;
292
            }else {
293
                ICdmBase newTarget = CdmBase.deproxy(newSource.getCdmSource());
294
                ICdmBase existingTarget = CdmBase.deproxy((CdmBase)existingSource.getCdmSource());
295
                if (!newTarget.getClass().equals(existingTarget.getClass())){
296
                    return false;
297
                }else{
298
                    if (newTarget instanceof SpecimenDescription){
299
                        SpecimenOrObservationBase<?> newSob = ((SpecimenDescription)newTarget).getDescribedSpecimenOrObservation();
300
                        SpecimenOrObservationBase<?> existingSob = ((SpecimenDescription)existingTarget).getDescribedSpecimenOrObservation();
301
                        //for now reuse is possible if both are descriptions for the same specimen
302
                        return newSob != null && newSob.equals(existingSob);
303
                    }else if (newTarget instanceof TaxonDescription){
304
                        Taxon newTaxon = ((TaxonDescription)newTarget).getTaxon();
305
                        Taxon existingTaxon = ((TaxonDescription)existingTarget).getTaxon();
306
                        //for now reuse is possible if both are descriptions for the same taxon
307
                        return newTaxon != null && newTaxon.equals(existingTaxon);
308
                    }else if (newTarget instanceof Taxon){
309
                        return newTarget.equals(existingTarget);
310
                    }else{
311
                        throw new AggregationException("Other classes then SpecimenDescription and TaxonDescription are not yet supported. But was: " + newTarget.getClass());
312
                    }
313
                }
314
            }
315
        }
316

    
317
        return false;
318
    }
319

    
320
    private <T extends DescriptionBase<?>> T cloneNewSourceDescription(T newSourceDescription) {
321
        if (!getConfig().isCloneAggregatedSourceDescriptions() && newSourceDescription.isAggregatedStructuredDescription()){
322
            return newSourceDescription;
323
        }
324
        @SuppressWarnings("unchecked")
325
        T clonedDescription = (T)newSourceDescription.clone();
326
//        clonedDescription.removeSources();
327
        clonedDescription.removeDescriptiveDataSet(dataSet);
328
        clonedDescription.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
329
        clonedDescription.setTitleCache("Clone: " + clonedDescription.getTitleCache(), true);
330
        return clonedDescription;
331
    }
332

    
333
    private <S extends DescriptionElementBase> void mergeDescriptionElements(TaxonDescription targetDescription,
334
            Map<Feature, ? extends DescriptionElementBase> newElementsMap, Class<? extends DescriptionElementBase> debClass) {
335

    
336
        Set<DescriptionElementBase> elementsToRemove = new HashSet<>(
337
                targetDescription.getElements().stream()
338
                    .filter(el->el.isInstanceOf(debClass))
339
                    .collect(Collectors.toSet()));
340

    
341
        //for each character in "characters of new elements"
342
        for (Feature characterNew : newElementsMap.keySet()) {
343

    
344
            //if elements for this character exist in old data, remember any of them to keep (in clean data there should be only max. 1
345
            DescriptionElementBase elementToStay = null;
346
            for (DescriptionElementBase existingDeb : elementsToRemove) {
347
                if(existingDeb.getFeature().equals(characterNew)){
348
                    elementToStay = existingDeb;
349
                    elementsToRemove.remove(existingDeb);
350
                    break;
351
                }
352
            }
353

    
354
            //if there is no element for this character in old data, add the new element for this character to the target description (otherwise reuse old element)
355
            if (elementToStay == null){
356
                targetDescription.addElement(newElementsMap.get(characterNew));
357
            }else{
358
                mergeDescriptionElement(elementToStay, newElementsMap.get(characterNew));
359
            }
360
        }
361

    
362
        //remove all elements not needed anymore
363
        for(DescriptionElementBase elementToRemove : elementsToRemove){
364
            targetDescription.removeElement(elementToRemove);
365
        }
366
    }
367

    
368
    private void mergeDescriptionElement(DescriptionElementBase targetElement,
369
            DescriptionElementBase newElement) {
370

    
371
        targetElement = CdmBase.deproxy(targetElement);
372
        newElement = CdmBase.deproxy(newElement);
373
        if (targetElement instanceof CategoricalData){
374
            mergeDescriptionElement((CategoricalData)targetElement, (CategoricalData)newElement);
375
        }else if (targetElement.isInstanceOf(QuantitativeData.class)){
376
            mergeDescriptionElement((QuantitativeData)targetElement, (QuantitativeData)newElement);
377
        }else{
378
            throw new AggregationException("Class not supported: " + targetElement.getClass().getName());
379
        }
380
    }
381

    
382
    private void mergeDescriptionElement(CategoricalData elementToStay,
383
            CategoricalData newElement) {
384
        List<StateData> oldData = new ArrayList<>(elementToStay.getStateData());
385
        List<StateData> newData = new ArrayList<>(newElement.getStateData());
386
        for (StateData newStateData : newData){
387
            State state = newStateData.getState();
388
            StateData oldStateData = firstByState(state, oldData);
389
            if (oldStateData != null){
390
                //for now only state and count is used for aggregation, below code needs to be adapted if this changes
391
                oldStateData.setCount(newStateData.getCount());
392
                oldData.remove(oldStateData);
393
            }else{
394
                elementToStay.addStateData(newStateData);
395
            }
396
        }
397
        for (StateData stateDataToRemove : oldData){
398
            elementToStay.removeStateData(stateDataToRemove);
399
        }
400
    }
401

    
402
    private StateData firstByState(State state, List<StateData> oldData) {
403
        if (state == null){
404
            return null;
405
        }
406
        for (StateData sd : oldData){
407
            if (state.equals(sd.getState())){
408
                return sd;
409
            }
410
        }
411
        return null;
412
    }
413

    
414
    private void mergeDescriptionElement(QuantitativeData elementToStay,
415
            QuantitativeData newElement) {
416
        Set<StatisticalMeasurementValue> oldValues = new HashSet<>(elementToStay.getStatisticalValues());
417
        Set<StatisticalMeasurementValue> newValues = new HashSet<>(newElement.getStatisticalValues());
418
        for (StatisticalMeasurementValue newValue : newValues){
419
            StatisticalMeasure type = newValue.getType();
420
            StatisticalMeasurementValue oldValue = firstValueByType(type, oldValues);
421
            if (oldValue != null){
422
                //for now only state and count is used for aggregation, below code needs to be adapted if this changes
423
                oldValue.setValue(newValue.getValue());
424
                oldValues.remove(oldValue);
425
            }else{
426
                elementToStay.addStatisticalValue(newValue);
427
            }
428
        }
429
        for (StatisticalMeasurementValue valueToRemove : oldValues){
430
            elementToStay.removeStatisticalValue(valueToRemove);
431
        }
432
    }
433

    
434
    private StatisticalMeasurementValue firstValueByType(StatisticalMeasure type, Set<StatisticalMeasurementValue> oldValues) {
435
        if (type == null){
436
            return null;
437
        }
438
        for (StatisticalMeasurementValue value : oldValues){
439
            if (type.equals(value.getType())){
440
                return value;
441
            }
442
        }
443
        return null;
444
    }
445

    
446
    @Override
447
    protected void initTransaction() {
448
        dataSet = getDescriptiveDatasetService().load(getConfig().getDatasetUuid());
449
    }
450

    
451
    @Override
452
    protected void removeDescriptionIfEmpty(TaxonDescription description, ResultHolder resultHolder) {
453
        super.removeDescriptionIfEmpty(description, resultHolder);
454
        if (description.getElements().isEmpty()){
455
            dataSet.removeDescription(description);
456
        }
457
    }
458

    
459
    @Override
460
    protected void aggregateToParentTaxon(TaxonNode taxonNode,
461
            ResultHolder resultHolder,
462
            Set<TaxonDescription> excludedDescriptions) {
463
        StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
464
        Set<TaxonDescription> childDescriptions = getChildTaxonDescriptions(taxonNode, dataSet);
465
        addDescriptionToResultHolder(descriptiveResultHolder, childDescriptions, AggregationMode.ToParent);
466
    }
467

    
468
    @Override
469
    protected void aggregateWithinSingleTaxon(Taxon taxon,
470
            ResultHolder resultHolder,
471
            Set<TaxonDescription> excludedDescriptions) {
472
        StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
473

    
474
        //specimen descriptions
475
        Set<SpecimenDescription> specimenDescriptions = getSpecimenDescriptions(taxon, dataSet);
476
        addDescriptionToResultHolder(descriptiveResultHolder, specimenDescriptions, AggregationMode.WithinTaxon);
477

    
478
        //"literature" descriptions
479
        if (getConfig().isIncludeLiterature()){
480
            Set<TaxonDescription> literatureDescriptions = getLiteratureDescriptions(taxon, dataSet);
481
            addDescriptionToResultHolder(descriptiveResultHolder, literatureDescriptions, AggregationMode.WithinTaxon);
482
        }
483

    
484
        //"default" descriptions
485
        //TODO add default descriptions
486
        //xxx
487

    
488
    }
489

    
490
    private void addDescriptionToResultHolder(StructuredDescriptionResultHolder descriptiveResultHolder,
491
            Set<? extends DescriptionBase<?>> specimenLiteraturOrDefaultDescriptions,
492
            AggregationMode aggregationMode) {
493

    
494
        boolean descriptionWasUsed = false;
495
        for (DescriptionBase<?> desc: specimenLiteraturOrDefaultDescriptions){
496
            for (DescriptionElementBase deb: desc.getElements()){
497
                if (hasCharacterData(deb)){
498
                    if (deb.isInstanceOf(CategoricalData.class)){
499
                        addToCategorical(CdmBase.deproxy(deb, CategoricalData.class), descriptiveResultHolder);
500
                        descriptionWasUsed = true;
501
                    }else if (deb.isInstanceOf(QuantitativeData.class)){
502
                        addToQuantitativData(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
503
                        descriptionWasUsed = true;
504
                    }
505
                }
506
            }
507

    
508
            //sources
509
            AggregationSourceMode sourceMode = getConfig().getSourceMode(aggregationMode);
510
            if(descriptionWasUsed && sourceMode != AggregationSourceMode.NONE){
511
                IdentifiableSource source = IdentifiableSource.NewAggregationSourceInstance();
512
                desc = CdmBase.deproxy(desc);
513

    
514
                switch (sourceMode){
515
                    case DESCRIPTION:
516
                        DescriptionBase<?> clonedDesc = cloneNewSourceDescription(desc);
517
                        source.setCdmSource(clonedDesc);
518
                        break;
519
                    case TAXON:
520
                        if (desc instanceof TaxonDescription){
521
                            Taxon taxon = ((TaxonDescription) desc).getTaxon();
522
                            source.setCdmSource(taxon);
523
                        }else {
524
                            throw new AggregationException("Description type not yet supported for aggregation source mode TAXON: " + desc.getClass().getSimpleName() );
525
                        }
526
                        break;
527
                    case NONE:
528
                        source = null;
529
                        break;
530
                    case ALL: //not yet supported
531
                        throw new AggregationException("Source mode not yet supported: " + sourceMode);
532
                    case ALL_SAMEVALUE: //makes no sense
533
                        throw new AggregationException("Illegal source mode: " + sourceMode);
534
                    default:
535
                        throw new AggregationException("Source mode not supported: " + sourceMode);
536
                }
537
                if (source != null){
538
                    descriptiveResultHolder.sources.add(source);
539
                }
540
            }
541
        }
542
    }
543

    
544
    private void addToQuantitativData(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
545
        QuantitativeData aggregatedQuantitativeData = resultHolder.quantitativeMap.get(qd.getFeature());
546
        if(aggregatedQuantitativeData==null){
547
            // no QuantitativeData with this feature in aggregation
548
            aggregatedQuantitativeData = aggregateWithinQuantitativeData(qd);
549
        }
550
        else{
551
            aggregatedQuantitativeData = addToExistingQuantitativeData(aggregatedQuantitativeData, qd);
552
        }
553
        if (aggregatedQuantitativeData != null){
554
            resultHolder.quantitativeMap.put(qd.getFeature(), aggregatedQuantitativeData);
555
        }
556
    }
557

    
558
    private void addToCategorical(CategoricalData cd, StructuredDescriptionResultHolder resultHolder) {
559
        CategoricalData aggregatedCategoricalData = resultHolder.categoricalMap.get(cd.getFeature());
560
        if(aggregatedCategoricalData==null){
561
            // no CategoricalData with this feature in aggregation
562
            aggregatedCategoricalData = cd.clone();
563
            // set count to 1 if not set
564
            aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
565
            resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
566
        }
567
        else{
568
            // split all StateData into those where the state already exists and those where it doesn't
569
            List<State> statesOnly = aggregatedCategoricalData.getStatesOnly();
570
            List<StateData> sdWithExistingStateInAggregation = cd.getStateData().stream().filter(sd->statesOnly.contains(sd.getState())).collect(Collectors.toList());
571
            List<StateData> sdWithNoExistingStateInAggregation = cd.getStateData().stream().filter(sd->!statesOnly.contains(sd.getState())).collect(Collectors.toList());
572

    
573
            for (StateData sd : sdWithNoExistingStateInAggregation) {
574
                StateData clone = sd.clone();
575
                // set count to 1 if not set
576
                if(clone.getCount()==null){
577
                    clone.incrementCount();
578
                }
579
                aggregatedCategoricalData.addStateData(clone);
580
            }
581

    
582
            for (StateData sdExist : sdWithExistingStateInAggregation) {
583
                List<StateData> aggregatedSameStateData = aggregatedCategoricalData.getStateData().stream()
584
                        .filter(sd->hasSameState(sdExist, sd))
585
                        .collect(Collectors.toList());
586
                for (StateData stateData : aggregatedSameStateData) {
587
                    if(sdExist.getCount()==null){
588
                        stateData.incrementCount();
589
                    }
590
                    else{
591
                        stateData.setCount(stateData.getCount()+sdExist.getCount());
592
                    }
593
                }
594
            }
595
        }
596
    }
597

    
598
    @Override
599
    protected StructuredDescriptionResultHolder createResultHolder() {
600
        return new StructuredDescriptionResultHolder();
601
    }
602

    
603
    private class StructuredDescriptionResultHolder extends ResultHolder{
604
        private Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
605
        private Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
606
        private Set<IdentifiableSource> sources = new HashSet<>();
607
        @Override
608
        public String toString() {
609
            return "SDResultHolder [categoricals=" + categoricalMap.size()
610
                + ", quantitatives=" + quantitativeMap.size()
611
                + ", sources=" + sources.size()
612
                + ", descriptionsToDelete=" + this.descriptionsToDelete.size()
613
                + "]";
614
        }
615
    }
616

    
617
    private Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
618
        Set<TaxonDescription> result = new HashSet<>();
619
        List<TaxonNode> childNodes = taxonNode.getChildNodes();
620
        for (TaxonNode childNode : childNodes) {
621
            Set<TaxonDescription> childDescriptions = childNode.getTaxon().getDescriptions();
622
            result.addAll(childDescriptions.stream()
623
                .filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
624
                .filter(desc->dataSet.getDescriptions().contains(desc))
625
                .collect(Collectors.toSet()));
626
        }
627
        return result;
628
    }
629

    
630
    /**
631
     * Computes all specimen attached to the given taxon within the given dataSet.
632
     * For these secimen it returns all attache
633
     * */
634
    private Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
635
        Set<SpecimenDescription> result = new HashSet<>();
636
        //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
637
        for (TaxonDescription taxonDesc: taxon.getDescriptions()){
638
            for (DescriptionElementBase taxonDeb : taxonDesc.getElements()){
639
                if (taxonDeb.isInstanceOf(IndividualsAssociation.class)){
640
                    IndividualsAssociation indAss = CdmBase.deproxy(taxonDeb, IndividualsAssociation.class);
641
                    SpecimenOrObservationBase<?> specimen = indAss.getAssociatedSpecimenOrObservation();
642
                    Set<SpecimenDescription> descriptions = specimen.getSpecimenDescriptions();
643
                    for(SpecimenDescription specimenDescription : descriptions){
644
                        if(dataSet.getDescriptions().contains(specimenDescription) &&
645
                                specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
646
                            result.add(specimenDescription);
647
                        }
648
                    }
649
                }
650
            }
651
        }
652
        return result;
653
    }
654

    
655
    private Set<TaxonDescription> getLiteratureDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
656
        Set<TaxonDescription> result = new HashSet<>();
657
        //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
658
        for(TaxonDescription taxonDescription : taxon.getDescriptions()){
659
            if(dataSet.getDescriptions().contains(taxonDescription)
660
                    && taxonDescription.getTypes().stream().anyMatch(type->type.equals(DescriptionType.SECONDARY_DATA))
661
                    && taxonDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE)) ){
662
                result.add(taxonDescription);
663
            }
664
        }
665
        return result;
666
    }
667

    
668
    /**
669
     * Evaluates statistics for exact values collection and handles missing min and max values
670
     */
671
    private QuantitativeData aggregateWithinQuantitativeData(QuantitativeData sourceQd){
672
        QuantitativeData aggQD = QuantitativeData.NewInstance(sourceQd.getFeature());
673
        aggQD.setUnit(sourceQd.getUnit());
674
        Set<BigDecimal> exactValues = sourceQd.getExactValues();
675
        if(!exactValues.isEmpty()){
676
            // qd is not already aggregated
677
            Comparator<BigDecimal> comp = Comparator.naturalOrder();
678
            int exactValueSampleSize = exactValues.size();
679
            BigDecimal exactValueMin = exactValues.stream().min(comp).get();
680
            BigDecimal exactValueMax = exactValues.stream().max(comp).get();
681
            BigDecimal exactValueAvg = BigDecimalUtil.average(exactValues);
682
            //TODO also check for typical boundary data
683
            if(sourceQd.getMin() == null && sourceQd.getMax() == null){
684
                aggQD.setSampleSize(new BigDecimal(exactValueSampleSize), null);
685
                aggQD.setAverage(exactValueAvg, null);
686
            }
687
            aggQD.setMinimum(sourceQd.getMin() == null ? exactValueMin: sourceQd.getMin().min(exactValueMin), null);
688
            aggQD.setMaximum(sourceQd.getMax() == null ? exactValueMax: sourceQd.getMax().max(exactValueMax), null);
689
        }
690
        else{
691
            // qd has only min, max, ... but no exact values
692
            aggQD = sourceQd.clone();
693
            aggQD = handleMissingValues(aggQD);
694
        }
695
        return aggQD;
696
    }
697

    
698
    private QuantitativeData handleMissingValues(QuantitativeData qd) {
699
        //min max
700
        qd = handleMissingMinOrMax(qd);
701
        //average
702
        if (qd != null && qd.getAverage() == null){
703
            BigDecimal n = qd.getSampleSize();
704
            if(n != null && !n.equals(0f)){
705
                BigDecimal average = (qd.getMax().add(qd.getMin())).divide(n);
706
                qd.setAverage(average, null);
707
            }
708
        }
709
        return qd;
710
    }
711

    
712
    private QuantitativeData handleMissingMinOrMax(QuantitativeData qd) {
713
        return handleMissingMinOrMax(qd, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
714
    }
715

    
716
    public static QuantitativeData handleMissingMinOrMax(QuantitativeData aggQD, MissingMinimumMode missingMinMode,
717
            MissingMaximumMode missingMaxMode) {
718
        if(aggQD.getMin() == null && aggQD.getMax() != null){
719
            if (missingMinMode == MissingMinimumMode.MinToZero) {
720
                aggQD.setMinimum(BigDecimal.valueOf(0f), null);
721
            }else if (missingMinMode == MissingMinimumMode.MinToMax){
722
                aggQD.setMinimum(aggQD.getMax(), null);
723
            }else if (missingMinMode == MissingMinimumMode.SkipRecord){
724
                return null;
725
            }
726
        }
727
        if(aggQD.getMax() == null && aggQD.getMin() != null){
728
            if (missingMaxMode == MissingMaximumMode.MaxToMin){
729
                aggQD.setMaximum(aggQD.getMin(), null);
730
            }else if (missingMaxMode == MissingMaximumMode.SkipRecord){
731
                return null;
732
            }
733
        }
734
        return aggQD;
735
    }
736

    
737
    private QuantitativeData addToExistingQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
738

    
739
        newQd = aggregateWithinQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a clear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
740

    
741
        BigDecimal min = null;
742
        BigDecimal max = null;
743
        BigDecimal average = null;
744
        BigDecimal sampleSize = null;
745
        newQd = handleMissingValues(newQd);
746
        if (newQd == null){
747
            return aggQd;
748
        }
749
        min = aggQd.getMin().min(newQd.getMin());
750
        max = aggQd.getMax().max(newQd.getMax());
751
        if (newQd.getSampleSize() != null && aggQd.getSampleSize() != null){
752
            sampleSize = newQd.getSampleSize().add(aggQd.getSampleSize());
753
        }
754
        if (sampleSize != null && !sampleSize.equals(0f) && aggQd.getAverage() != null && newQd.getAverage() != null){
755
            BigDecimal aggTotalSum = aggQd.getAverage().multiply(aggQd.getSampleSize(), MathContext.DECIMAL32);
756
            BigDecimal newTotalSum = newQd.getAverage().multiply(newQd.getSampleSize(), MathContext.DECIMAL32);
757
            BigDecimal totalSum = aggTotalSum.add(newTotalSum);
758
            average = totalSum.divide(sampleSize, MathContext.DECIMAL32).stripTrailingZeros();  //to be discussed if we really want to reduce precision here, however, due to the current way to compute average we do not have exact precision anyway
759
        }
760
        aggQd.setMinimum(min, null);
761
        aggQd.setMaximum(max, null);
762
        aggQd.setSampleSize(sampleSize, null);
763
        aggQd.setAverage(average, null);
764
        return aggQd;
765
    }
766

    
767
    private static List<BigDecimal> getExactValues(QuantitativeData qd) {
768
        List<BigDecimal> exactValues = qd.getStatisticalValues().stream()
769
                .filter(value->value.getType().equals(StatisticalMeasure.EXACT_VALUE()))
770
                .map(exact->exact.getValue())
771
                .collect(Collectors.toList());
772
        return exactValues;
773
    }
774

    
775
    private static boolean hasSameState(StateData sd1, StateData sd2) {
776
        if (sd2.getState() == null || sd1.getState() == null){
777
            return false;
778
        }else{
779
            return sd2.getState().getUuid().equals(sd1.getState().getUuid());
780
        }
781
    }
782
}
(11-11/12)