Project

General

Profile

Download (37.1 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2019 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.api.service.description;
10

    
11
import java.math.BigDecimal;
12
import java.math.MathContext;
13
import java.util.ArrayList;
14
import java.util.Comparator;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Optional;
20
import java.util.Set;
21
import java.util.stream.Collectors;
22

    
23
import eu.etaxonomy.cdm.common.BigDecimalUtil;
24
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
25
import eu.etaxonomy.cdm.model.common.CdmBase;
26
import eu.etaxonomy.cdm.model.common.ICdmBase;
27
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
28
import eu.etaxonomy.cdm.model.description.CategoricalData;
29
import eu.etaxonomy.cdm.model.description.DescriptionBase;
30
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
31
import eu.etaxonomy.cdm.model.description.DescriptionType;
32
import eu.etaxonomy.cdm.model.description.DescriptiveDataSet;
33
import eu.etaxonomy.cdm.model.description.Feature;
34
import eu.etaxonomy.cdm.model.description.IDescribable;
35
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
36
import eu.etaxonomy.cdm.model.description.QuantitativeData;
37
import eu.etaxonomy.cdm.model.description.SpecimenDescription;
38
import eu.etaxonomy.cdm.model.description.State;
39
import eu.etaxonomy.cdm.model.description.StateData;
40
import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
41
import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
42
import eu.etaxonomy.cdm.model.description.TaxonDescription;
43
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
44
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
45
import eu.etaxonomy.cdm.model.taxon.Taxon;
46
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
47

    
48
/**
49
 * Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
50
 * <br>
51
 * For all {@link SpecimenDescription}s belonging to this data set a new
52
 * aggregated {@link TaxonDescription}s are created for every taxon the
53
 * specimens are directly associated with.<BR>
54
 * Also lower rank taxon descriptions are aggregated to upper rank taxa.
55
 *
56
 * @author a.mueller
57
 * @author p.plitzner
58
 * @since 03.11.2019
59
 */
60
public class StructuredDescriptionAggregation
61
        extends DescriptionAggregationBase<StructuredDescriptionAggregation, StructuredDescriptionAggregationConfiguration>{
62

    
63
    private DescriptiveDataSet dataSet;
64

    
65
    @Override
66
    protected String pluralDataType(){
67
        return "structured descriptive data";
68
    }
69

    
70
    @Override
71
    protected void preAggregate(IProgressMonitor monitor) {
72
        monitor.subTask("preAccumulate - nothing to do");
73

    
74
        // take start time for performance testing
75
        double start = System.currentTimeMillis();
76

    
77
        getResult().setCdmEntity(getDescriptiveDatasetService().load(getConfig().getDatasetUuid()));
78

    
79
        double end1 = System.currentTimeMillis();
80
        logger.info("Time elapsed for pre-accumulate() : " + (end1 - start) / (1000) + "s");
81
    }
82

    
83
    @Override
84
    protected void verifyConfiguration(IProgressMonitor monitor){
85
        if (!AggregationSourceMode.list(AggregationMode.ToParent, AggregationType.StructuredDescription)
86
            .contains(getConfig().getToParentSourceMode())){
87
            throw new AggregationException("Unsupported source mode for to-parent aggregation: " + getConfig().getToParentSourceMode());
88
        }
89
        if (!AggregationSourceMode.list(AggregationMode.WithinTaxon, AggregationType.StructuredDescription)
90
                .contains(getConfig().getWithinTaxonSourceMode())){
91
                throw new AggregationException("Unsupported source mode for within-taxon aggregation: " + getConfig().getWithinTaxonSourceMode());
92
        }
93
    }
94

    
95
    private boolean hasCharacterData(DescriptionElementBase element) {
96
        return hasCategoricalData(element) || hasQuantitativeData(element);
97
    }
98

    
99
    private boolean hasQuantitativeData(DescriptionElementBase element) {
100
        if(element instanceof QuantitativeData
101
                && !((QuantitativeData) element).getStatisticalValues().isEmpty()){
102
            QuantitativeData quantitativeData = (QuantitativeData)element;
103
            return !getExactValues(quantitativeData).isEmpty()
104
                    || quantitativeData.getMin()!=null
105
                    || quantitativeData.getMax()!=null;
106
        }
107
        return false;
108
    }
109

    
110
    private boolean hasCategoricalData(DescriptionElementBase element) {
111
        return element instanceof CategoricalData && !((CategoricalData) element).getStatesOnly().isEmpty();
112
    }
113

    
114
    @Override
115
    protected void setDescriptionTitle(TaxonDescription description, Taxon taxon) {
116
        String title = taxon.getName() != null? taxon.getName().getTitleCache() : taxon.getTitleCache();
117
        description.setTitleCache("Aggregated description for " + title, true);
118
        return;
119
    }
120

    
121
    @Override
122
    protected TaxonDescription createNewDescription(Taxon taxon) {
123
        String title = taxon.getTitleCache();
124
        if (logger.isDebugEnabled()){logger.debug("creating new description for " + title);}
125
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
126
        description.addType(DescriptionType.AGGREGATED_STRUC_DESC);
127
        setDescriptionTitle(description, taxon);
128
        return description;
129
    }
130

    
131
    @Override
132
    protected boolean hasDescriptionType(TaxonDescription description) {
133
        return dataSet.getDescriptions().contains(description) && description.isAggregatedStructuredDescription();
134
    }
135

    
136
    @Override
137
    protected List<String> descriptionInitStrategy() {
138
        return new ArrayList<>();
139
    }
140

    
141
    @Override
142
    protected void addAggregationResultToDescription(TaxonDescription targetDescription,
143
            ResultHolder resultHolder) {
144

    
145
        StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
146
        mergeDescriptionElements(targetDescription, structuredResultHolder.categoricalMap, CategoricalData.class);
147
        mergeDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap, QuantitativeData.class);
148
        addAggregationSources(targetDescription, structuredResultHolder);
149

    
150
        if(!targetDescription.getElements().isEmpty()){
151
            dataSet.addDescription(targetDescription);
152
        }else{
153
            dataSet.removeDescription(targetDescription);
154
        }
155
    }
156

    
157
    private <T extends DescriptionBase<?>> void addAggregationSources(TaxonDescription targetDescription,
158
                StructuredDescriptionResultHolder structuredResultHolder) {
159

    
160
        //Remove sources from description
161
        Set<IdentifiableSource> sourcesToRemove = targetDescription.getSources().stream()
162
                .filter(source->source.getType().equals(OriginalSourceType.Aggregation))
163
                .collect(Collectors.toSet());
164

    
165
        Set<IdentifiableSource> newSources = structuredResultHolder.sources;
166
        for (IdentifiableSource newSource : newSources) {
167
            IdentifiableSource mergeSourceCandidate = findSourceCandidate(targetDescription, newSource);
168
            if (mergeSourceCandidate == null){
169
                addNewSource(targetDescription, newSource);
170
            }else{
171
                mergeSource(mergeSourceCandidate, newSource);
172
                sourcesToRemove.remove(mergeSourceCandidate);
173
            }
174
        }
175

    
176
        //remove remaining sources-to-be-removed
177
        for (IdentifiableSource sourceToRemove : sourcesToRemove) {
178
            targetDescription.removeSource(sourceToRemove);
179
            ICdmBase target = CdmBase.deproxy(sourceToRemove.getCdmSource());
180
            if (target != null){
181
                sourceToRemove.setCdmSource(null); //workaround for missing orphan removal #9801
182
                if (target instanceof DescriptionBase){
183
                    @SuppressWarnings("unchecked")
184
                    T descriptionToDelete = (T)target;
185
                    if (descriptionToDelete.isCloneForSource()){
186
                        //TODO maybe this is not really needed as it is later done anyway with .deltedDescription
187
                        //but currently this still leads to an re-saved by cascade exception
188
                        ((IDescribable<T>)descriptionToDelete.describedEntity()).removeDescription(descriptionToDelete);
189
                        structuredResultHolder.descriptionsToDelete.add(descriptionToDelete);
190
                    }
191
                }else if (target.isInstanceOf(Taxon.class)){
192
                    //nothing to do for now
193
                } else {
194
                    throw new AggregationException("CdmLink target type not yet supported: " + target.getClass().getSimpleName());
195
                }
196
            }
197
        }
198
    }
199

    
200
    private <T extends DescriptionBase<?>> void addNewSource(TaxonDescription targetDescription,
201
            IdentifiableSource newSource) {
202

    
203
        //add source
204
        targetDescription.addSource(newSource);
205
        //if it is a description add it to the described entity (specimen, taxon)
206
        ICdmBase target = newSource.getCdmSource();
207
        if (target != null){
208
            if (target.isInstanceOf(DescriptionBase.class)){
209
                @SuppressWarnings("unchecked")
210
                T description = (T)CdmBase.deproxy(target);
211
                ((IDescribable<T>)description.describedEntity()).addDescription(description);
212
            }
213
        }
214
    }
215

    
216
    //mergeablity has been checked before
217
    private <T extends DescriptionBase<?>> void mergeSource(IdentifiableSource mergeCandidate, IdentifiableSource newSource) {
218

    
219
        ICdmBase newTarget = newSource.getCdmSource();
220
        if (newTarget != null){
221
            newTarget = CdmBase.deproxy(newTarget);
222
            if (newTarget instanceof DescriptionBase){
223
                @SuppressWarnings("unchecked")
224
                T newTargetDesc = (T)newTarget;
225
                @SuppressWarnings("unchecked")
226
                T existingTargetDesc = CdmBase.deproxy((T)mergeCandidate.getCdmSource());
227
                mergeSourceDescription(existingTargetDesc, newTargetDesc);
228
                ((IDescribable<T>)existingTargetDesc.describedEntity()).addDescription(existingTargetDesc);
229
                if (!existingTargetDesc.equals(newTargetDesc)){
230
                    ((IDescribable<T>)newTargetDesc.describedEntity()).removeDescription(newTargetDesc);
231
                }
232
            }else if (newTarget instanceof Taxon){
233
                //nothing to do for now (we do not support reuse of sources linking to different taxa yet)
234
            }else{
235
                throw new AggregationException("Sources not linking to a description or a taxon instance currently not yet supported.");
236
            }
237
        }else{
238
            throw new AggregationException("Sources not linking to another CdmBase instance currently not yet supported.");
239
        }
240
    }
241

    
242
    private <T extends DescriptionBase<?>> void mergeSourceDescription(T existingSourceDescription, T newSourceDescription) {
243

    
244
        Set<DescriptionElementBase> elementsToRemove = new HashSet<>(existingSourceDescription.getElements());
245
        Set<DescriptionElementBase> newElements = new HashSet<>(newSourceDescription.getElements());
246

    
247
        for (DescriptionElementBase newElement : newElements){
248
            DescriptionElementBase newElementClone = newElement.clone();
249
            Optional<DescriptionElementBase> matchingElement = elementsToRemove.stream()
250
                    .filter(e->e.getFeature()!= null
251
                        && e.getFeature().equals(newElementClone.getFeature()))
252
                    .findFirst();
253
            if (matchingElement.isPresent()){
254
                mergeDescriptionElement(matchingElement.get(), newElementClone);
255
                elementsToRemove.remove(matchingElement.get());
256
            }else{
257
                existingSourceDescription.addElement(newElementClone);
258
            }
259
        }
260
        addSourceDescriptionToDescribedEntity(newSourceDescription);
261
        existingSourceDescription.setTitleCache(newSourceDescription.getTitleCache(), true);
262

    
263
        for (DescriptionElementBase debToRemove : elementsToRemove){
264
            existingSourceDescription.removeElement(debToRemove);
265
        }
266

    
267
    }
268

    
269
    @SuppressWarnings("unchecked")
270
    private <T extends DescriptionBase<?>> void addSourceDescriptionToDescribedEntity(T sourceDescription) {
271
        ((IDescribable<T>)sourceDescription.describedEntity()).addDescription(sourceDescription);
272
    }
273
    @SuppressWarnings("unchecked")
274
    private <T extends DescriptionBase<?>> void removeSourceDescriptionFromDescribedEntity(T sourceDescription) {
275
        ((IDescribable<T>)sourceDescription.describedEntity()).removeDescription(sourceDescription);
276
    }
277

    
278
    private IdentifiableSource findSourceCandidate(TaxonDescription targetDescription, IdentifiableSource newSource) {
279
        for (IdentifiableSource existingSource : targetDescription.getSources()){
280
            boolean isCandidate = isCandidateForSourceReuse(existingSource, newSource);
281
            if (isCandidate){
282
                return existingSource;
283
            }
284
        }
285
        return null;
286
    }
287

    
288
    private boolean isCandidateForSourceReuse(IdentifiableSource existingSource, IdentifiableSource newSource) {
289
        if (newSource.getCdmSource()!= null){
290
            if (existingSource.getCdmSource() == null){
291
                return false;
292
            }else {
293
                ICdmBase newTarget = CdmBase.deproxy(newSource.getCdmSource());
294
                ICdmBase existingTarget = CdmBase.deproxy((CdmBase)existingSource.getCdmSource());
295
                if (!newTarget.getClass().equals(existingTarget.getClass())){
296
                    return false;
297
                }else{
298
                    if (newTarget instanceof SpecimenDescription){
299
                        SpecimenOrObservationBase<?> newSob = ((SpecimenDescription)newTarget).getDescribedSpecimenOrObservation();
300
                        SpecimenOrObservationBase<?> existingSob = ((SpecimenDescription)existingTarget).getDescribedSpecimenOrObservation();
301
                        //for now reuse is possible if both are descriptions for the same specimen
302
                        return newSob != null && newSob.equals(existingSob);
303
                    }else if (newTarget instanceof TaxonDescription){
304
                        Taxon newTaxon = ((TaxonDescription)newTarget).getTaxon();
305
                        Taxon existingTaxon = ((TaxonDescription)existingTarget).getTaxon();
306
                        //for now reuse is possible if both are descriptions for the same taxon
307
                        return newTaxon != null && newTaxon.equals(existingTaxon);
308
                    }else if (newTarget instanceof Taxon){
309
                        return newTarget.equals(existingTarget);
310
                    }else{
311
                        throw new AggregationException("Other classes then SpecimenDescription and TaxonDescription are not yet supported. But was: " + newTarget.getClass());
312
                    }
313
                }
314
            }
315
        }
316

    
317
        return false;
318
    }
319

    
320
    private <T extends DescriptionBase<?>> T cloneNewSourceDescription(T newSourceDescription) {
321
        if (!getConfig().isCloneAggregatedSourceDescriptions() && newSourceDescription.isAggregatedStructuredDescription()){
322
            return newSourceDescription;
323
        }
324
        @SuppressWarnings("unchecked")
325
        T clonedDescription = (T)newSourceDescription.clone();
326
//        clonedDescription.removeSources();
327
        clonedDescription.removeDescriptiveDataSet(dataSet);
328
        clonedDescription.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
329
        clonedDescription.setTitleCache("Clone: " + clonedDescription.getTitleCache(), true);
330
        return clonedDescription;
331
    }
332

    
333
    private <S extends DescriptionElementBase> void mergeDescriptionElements(TaxonDescription targetDescription,
334
            Map<Feature, ? extends DescriptionElementBase> newElementsMap, Class<? extends DescriptionElementBase> debClass) {
335

    
336
        Set<DescriptionElementBase> elementsToRemove = new HashSet<>(
337
                targetDescription.getElements().stream()
338
                    .filter(el->el.isInstanceOf(debClass))
339
                    .collect(Collectors.toSet()));
340

    
341
        //for each character in "characters of new elements"
342
        for (Feature characterNew : newElementsMap.keySet()) {
343

    
344
            //if elements for this character exist in old data, remember any of them to keep (in clean data there should be only max. 1
345
            DescriptionElementBase elementToStay = null;
346
            for (DescriptionElementBase existingDeb : elementsToRemove) {
347
                if(existingDeb.getFeature().equals(characterNew)){
348
                    elementToStay = existingDeb;
349
                    elementsToRemove.remove(existingDeb);
350
                    break;
351
                }
352
            }
353

    
354
            //if there is no element for this character in old data, add the new element for this character to the target description (otherwise reuse old element)
355
            if (elementToStay == null){
356
                targetDescription.addElement(newElementsMap.get(characterNew));
357
            }else{
358
                mergeDescriptionElement(elementToStay, newElementsMap.get(characterNew));
359
            }
360
        }
361

    
362
        //remove all elements not needed anymore
363
        for(DescriptionElementBase elementToRemove : elementsToRemove){
364
            targetDescription.removeElement(elementToRemove);
365
        }
366
    }
367

    
368
    private void mergeDescriptionElement(DescriptionElementBase targetElement,
369
            DescriptionElementBase newElement) {
370

    
371
        targetElement = CdmBase.deproxy(targetElement);
372
        newElement = CdmBase.deproxy(newElement);
373
        if (targetElement instanceof CategoricalData){
374
            mergeDescriptionElement((CategoricalData)targetElement, (CategoricalData)newElement);
375
        }else if (targetElement.isInstanceOf(QuantitativeData.class)){
376
            mergeDescriptionElement((QuantitativeData)targetElement, (QuantitativeData)newElement);
377
        }else{
378
            throw new AggregationException("Class not supported: " + targetElement.getClass().getName());
379
        }
380
    }
381

    
382
    private void mergeDescriptionElement(CategoricalData elementToStay,
383
            CategoricalData newElement) {
384
        List<StateData> oldData = new ArrayList<>(elementToStay.getStateData());
385
        List<StateData> newData = new ArrayList<>(newElement.getStateData());
386
        for (StateData newStateData : newData){
387
            State state = newStateData.getState();
388
            StateData oldStateData = firstByState(state, oldData);
389
            if (oldStateData != null){
390
                //for now only state and count is used for aggregation, below code needs to be adapted if this changes
391
                oldStateData.setCount(newStateData.getCount());
392
                oldData.remove(oldStateData);
393
            }else{
394
                elementToStay.addStateData(newStateData);
395
            }
396
        }
397
        for (StateData stateDataToRemove : oldData){
398
            elementToStay.removeStateData(stateDataToRemove);
399
        }
400
    }
401

    
402
    private StateData firstByState(State state, List<StateData> oldData) {
403
        if (state == null){
404
            return null;
405
        }
406
        for (StateData sd : oldData){
407
            if (state.equals(sd.getState())){
408
                return sd;
409
            }
410
        }
411
        return null;
412
    }
413

    
414
    private void mergeDescriptionElement(QuantitativeData elementToStay,
415
            QuantitativeData newElement) {
416
        Set<StatisticalMeasurementValue> oldValues = new HashSet<>(elementToStay.getStatisticalValues());
417
        Set<StatisticalMeasurementValue> newValues = new HashSet<>(newElement.getStatisticalValues());
418
        for (StatisticalMeasurementValue newValue : newValues){
419
            StatisticalMeasure type = newValue.getType();
420
            StatisticalMeasurementValue oldValue = firstValueByType(type, oldValues);
421
            if (oldValue != null){
422
                //for now only state and count is used for aggregation, below code needs to be adapted if this changes
423
                oldValue.setValue(newValue.getValue());
424
                oldValues.remove(oldValue);
425
            }else{
426
                elementToStay.addStatisticalValue(newValue);
427
            }
428
        }
429
        for (StatisticalMeasurementValue valueToRemove : oldValues){
430
            elementToStay.removeStatisticalValue(valueToRemove);
431
        }
432
    }
433

    
434
    private StatisticalMeasurementValue firstValueByType(StatisticalMeasure type, Set<StatisticalMeasurementValue> oldValues) {
435
        if (type == null){
436
            return null;
437
        }
438
        for (StatisticalMeasurementValue value : oldValues){
439
            if (type.equals(value.getType())){
440
                return value;
441
            }
442
        }
443
        return null;
444
    }
445

    
446
    @Override
447
    protected void initTransaction() {
448
        dataSet = getDescriptiveDatasetService().load(getConfig().getDatasetUuid());
449
    }
450

    
451
    @Override
452
    protected void removeDescriptionIfEmpty(TaxonDescription description, ResultHolder resultHolder) {
453
        super.removeDescriptionIfEmpty(description, resultHolder);
454
        if (description.getElements().isEmpty()){
455
            dataSet.removeDescription(description);
456
        }
457
    }
458

    
459
    @Override
460
    protected void aggregateToParentTaxon(TaxonNode taxonNode,
461
            ResultHolder resultHolder,
462
            Set<TaxonDescription> excludedDescriptions) {
463
        StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
464
        Set<TaxonDescription> childDescriptions = getChildTaxonDescriptions(taxonNode, dataSet);
465
        addDescriptionToResultHolder(descriptiveResultHolder, childDescriptions, AggregationMode.ToParent);
466
    }
467

    
468
    @Override
469
    protected void aggregateWithinSingleTaxon(Taxon taxon,
470
            ResultHolder resultHolder,
471
            Set<TaxonDescription> excludedDescriptions) {
472
        StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
473

    
474
        //specimen descriptions
475
        Set<SpecimenDescription> specimenDescriptions = getSpecimenDescriptions(taxon, dataSet);
476
        addDescriptionToResultHolder(descriptiveResultHolder, specimenDescriptions, AggregationMode.WithinTaxon);
477

    
478
        //"literature" descriptions
479
        if (getConfig().isIncludeLiterature()){
480
            Set<TaxonDescription> literatureDescriptions = getLiteratureDescriptions(taxon, dataSet);
481
            addDescriptionToResultHolder(descriptiveResultHolder, literatureDescriptions, AggregationMode.WithinTaxon);
482
        }
483

    
484
        //"default" descriptions
485
        //TODO add default descriptions
486
        //xxx
487

    
488
    }
489

    
490
    private void addDescriptionToResultHolder(StructuredDescriptionResultHolder descriptiveResultHolder,
491
            Set<? extends DescriptionBase<?>> specimenLiteraturOrDefaultDescriptions,
492
            AggregationMode aggregationMode) {
493

    
494
        boolean descriptionWasUsed = false;
495
        for (DescriptionBase<?> desc: specimenLiteraturOrDefaultDescriptions){
496
            for (DescriptionElementBase deb: desc.getElements()){
497
                if (hasCharacterData(deb)){
498
                    if (deb.isInstanceOf(CategoricalData.class)){
499
                        addToCategorical(CdmBase.deproxy(deb, CategoricalData.class), descriptiveResultHolder);
500
                        descriptionWasUsed = true;
501
                    }else if (deb.isInstanceOf(QuantitativeData.class)){
502
                        addToQuantitativData(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
503
                        descriptionWasUsed = true;
504
                    }
505
                }
506
            }
507

    
508
            //sources
509
            AggregationSourceMode sourceMode = getConfig().getSourceMode(aggregationMode);
510
            if(descriptionWasUsed && sourceMode != AggregationSourceMode.NONE){
511
                IdentifiableSource source = IdentifiableSource.NewAggregationSourceInstance();
512
                desc = CdmBase.deproxy(desc);
513

    
514
                switch (sourceMode){
515
                    case DESCRIPTION:
516
                        DescriptionBase<?> clonedDesc = cloneNewSourceDescription(desc);
517
                        source.setCdmSource(clonedDesc);
518
                        break;
519
                    case TAXON:
520
                        if (desc instanceof TaxonDescription){
521
                            Taxon taxon = ((TaxonDescription) desc).getTaxon();
522
                            source.setCdmSource(taxon);
523
                        }else {
524
                            throw new AggregationException("Description type not yet supported for aggregation source mode TAXON: " + desc.getClass().getSimpleName() );
525
                        }
526
                        break;
527
                    case NONE:
528
                        source = null;
529
                        break;
530
                    case ALL: //not yet supported
531
                        throw new AggregationException("Source mode not yet supported: " + sourceMode);
532
                    case ALL_SAMEVALUE: //makes no sense
533
                        throw new AggregationException("Illegal source mode: " + sourceMode);
534
                    default:
535
                        throw new AggregationException("Source mode not supported: " + sourceMode);
536
                }
537
                if (source != null){
538
                    descriptiveResultHolder.sources.add(source);
539
                }
540
            }
541
        }
542
    }
543

    
544
    private void addToQuantitativData(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
545
        QuantitativeData aggregatedQuantitativeData = resultHolder.quantitativeMap.get(qd.getFeature());
546
        if(aggregatedQuantitativeData==null){
547
            // no QuantitativeData with this feature in aggregation
548
            aggregatedQuantitativeData = aggregateWithinQuantitativeData(qd);
549
        }
550
        else{
551
            aggregatedQuantitativeData = addToExistingQuantitativeData(aggregatedQuantitativeData, qd);
552
        }
553
        if (aggregatedQuantitativeData != null){
554
            resultHolder.quantitativeMap.put(qd.getFeature(), aggregatedQuantitativeData);
555
        }
556
    }
557

    
558
    private void addToCategorical(CategoricalData cd, StructuredDescriptionResultHolder resultHolder) {
559
        CategoricalData aggregatedCategoricalData = resultHolder.categoricalMap.get(cd.getFeature());
560
        if(aggregatedCategoricalData == null){
561
            // no CategoricalData with this feature in aggregation
562
            aggregatedCategoricalData = cd.clone();
563
            // set count to 1 if not set
564
            if (!aggregatedCategoricalData.getStatesOnly().isEmpty()){
565
                aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
566
                resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
567
            }
568
        }
569
        else{
570
            // split all StateData into those where the state already exists and those where it doesn't
571
            List<State> statesOnly = aggregatedCategoricalData.getStatesOnly();
572
            List<StateData> sdWithExistingStateInAggregation = cd.getStateData().stream().filter(sd->statesOnly.contains(sd.getState())).collect(Collectors.toList());
573
            List<StateData> sdWithNoExistingStateInAggregation = cd.getStateData().stream().filter(sd->!statesOnly.contains(sd.getState())).collect(Collectors.toList());
574

    
575
            for (StateData sd : sdWithNoExistingStateInAggregation) {
576
                StateData clone = sd.clone();
577
                // set count to 1 if not set
578
                if(clone.getCount()==null){
579
                    clone.incrementCount();
580
                }
581
                aggregatedCategoricalData.addStateData(clone);
582
            }
583

    
584
            for (StateData sdExist : sdWithExistingStateInAggregation) {
585
                List<StateData> aggregatedSameStateData = aggregatedCategoricalData.getStateData().stream()
586
                        .filter(sd->hasSameState(sdExist, sd))
587
                        .collect(Collectors.toList());
588
                for (StateData stateData : aggregatedSameStateData) {
589
                    if(sdExist.getCount()==null){
590
                        stateData.incrementCount();
591
                    }
592
                    else{
593
                        stateData.setCount(stateData.getCount()+sdExist.getCount());
594
                    }
595
                }
596
            }
597
        }
598
    }
599

    
600
    @Override
601
    protected StructuredDescriptionResultHolder createResultHolder() {
602
        return new StructuredDescriptionResultHolder();
603
    }
604

    
605
    private class StructuredDescriptionResultHolder extends ResultHolder{
606
        private Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
607
        private Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
608
        private Set<IdentifiableSource> sources = new HashSet<>();
609
        @Override
610
        public String toString() {
611
            return "SDResultHolder [categoricals=" + categoricalMap.size()
612
                + ", quantitatives=" + quantitativeMap.size()
613
                + ", sources=" + sources.size()
614
                + ", descriptionsToDelete=" + this.descriptionsToDelete.size()
615
                + "]";
616
        }
617
    }
618

    
619
    private Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
620
        Set<TaxonDescription> result = new HashSet<>();
621
        List<TaxonNode> childNodes = taxonNode.getChildNodes();
622
        for (TaxonNode childNode : childNodes) {
623
            Set<TaxonDescription> childDescriptions = childNode.getTaxon().getDescriptions();
624
            result.addAll(childDescriptions.stream()
625
                .filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
626
                .filter(desc->dataSet.getDescriptions().contains(desc))
627
                .collect(Collectors.toSet()));
628
        }
629
        return result;
630
    }
631

    
632
    /**
633
     * Computes all specimen attached to the given taxon within the given dataSet.
634
     * For these secimen it returns all attache
635
     * */
636
    private Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
637
        Set<SpecimenDescription> result = new HashSet<>();
638
        //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
639
        for (TaxonDescription taxonDesc: taxon.getDescriptions()){
640
            for (DescriptionElementBase taxonDeb : taxonDesc.getElements()){
641
                if (taxonDeb.isInstanceOf(IndividualsAssociation.class)){
642
                    IndividualsAssociation indAss = CdmBase.deproxy(taxonDeb, IndividualsAssociation.class);
643
                    SpecimenOrObservationBase<?> specimen = indAss.getAssociatedSpecimenOrObservation();
644
                    Set<SpecimenDescription> descriptions = specimen.getSpecimenDescriptions();
645
                    for(SpecimenDescription specimenDescription : descriptions){
646
                        if(dataSet.getDescriptions().contains(specimenDescription) &&
647
                                specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
648
                            result.add(specimenDescription);
649
                        }
650
                    }
651
                }
652
            }
653
        }
654
        return result;
655
    }
656

    
657
    private Set<TaxonDescription> getLiteratureDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
658
        Set<TaxonDescription> result = new HashSet<>();
659
        //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
660
        for(TaxonDescription taxonDescription : taxon.getDescriptions()){
661
            if(dataSet.getDescriptions().contains(taxonDescription)
662
                    && taxonDescription.getTypes().stream().anyMatch(type->type.equals(DescriptionType.SECONDARY_DATA))
663
                    && taxonDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE)) ){
664
                result.add(taxonDescription);
665
            }
666
        }
667
        return result;
668
    }
669

    
670
    /**
671
     * Evaluates statistics for exact values collection and handles missing min and max values
672
     */
673
    private QuantitativeData aggregateWithinQuantitativeData(QuantitativeData sourceQd){
674
        QuantitativeData aggQD = QuantitativeData.NewInstance(sourceQd.getFeature());
675
        aggQD.setUnit(sourceQd.getUnit());
676
        Set<BigDecimal> exactValues = sourceQd.getExactValues();
677
        if(!exactValues.isEmpty()){
678
            // qd is not already aggregated
679
            Comparator<BigDecimal> comp = Comparator.naturalOrder();
680
            int exactValueSampleSize = exactValues.size();
681
            BigDecimal exactValueMin = exactValues.stream().min(comp).get();
682
            BigDecimal exactValueMax = exactValues.stream().max(comp).get();
683
            BigDecimal exactValueAvg = BigDecimalUtil.average(exactValues);
684
            //TODO also check for typical boundary data
685
            if(sourceQd.getMin() == null && sourceQd.getMax() == null){
686
                aggQD.setSampleSize(new BigDecimal(exactValueSampleSize), null);
687
                aggQD.setAverage(exactValueAvg, null);
688
            }
689
            aggQD.setMinimum(sourceQd.getMin() == null ? exactValueMin: sourceQd.getMin().min(exactValueMin), null);
690
            aggQD.setMaximum(sourceQd.getMax() == null ? exactValueMax: sourceQd.getMax().max(exactValueMax), null);
691
        }
692
        else{
693
            // qd has only min, max, ... but no exact values
694
            aggQD = sourceQd.clone();
695
            aggQD = handleMissingValues(aggQD);
696
        }
697
        return aggQD;
698
    }
699

    
700
    private QuantitativeData handleMissingValues(QuantitativeData qd) {
701
        //min max
702
        qd = handleMissingMinOrMax(qd);
703
        //average
704
        if (qd != null && qd.getAverage() == null){
705
            BigDecimal n = qd.getSampleSize();
706
            if(n != null && !n.equals(0f)){
707
                BigDecimal average = (qd.getMax().add(qd.getMin())).divide(n);
708
                qd.setAverage(average, null);
709
            }
710
        }
711
        return qd;
712
    }
713

    
714
    private QuantitativeData handleMissingMinOrMax(QuantitativeData qd) {
715
        return handleMissingMinOrMax(qd, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
716
    }
717

    
718
    public static QuantitativeData handleMissingMinOrMax(QuantitativeData aggQD, MissingMinimumMode missingMinMode,
719
            MissingMaximumMode missingMaxMode) {
720
        if(aggQD.getMin() == null && aggQD.getMax() != null){
721
            if (missingMinMode == MissingMinimumMode.MinToZero) {
722
                aggQD.setMinimum(BigDecimal.valueOf(0f), null);
723
            }else if (missingMinMode == MissingMinimumMode.MinToMax){
724
                aggQD.setMinimum(aggQD.getMax(), null);
725
            }else if (missingMinMode == MissingMinimumMode.SkipRecord){
726
                return null;
727
            }
728
        }
729
        if(aggQD.getMax() == null && aggQD.getMin() != null){
730
            if (missingMaxMode == MissingMaximumMode.MaxToMin){
731
                aggQD.setMaximum(aggQD.getMin(), null);
732
            }else if (missingMaxMode == MissingMaximumMode.SkipRecord){
733
                return null;
734
            }
735
        }
736
        return aggQD;
737
    }
738

    
739
    private QuantitativeData addToExistingQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
740

    
741
        newQd = aggregateWithinQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a clear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
742

    
743
        BigDecimal min = null;
744
        BigDecimal max = null;
745
        BigDecimal average = null;
746
        BigDecimal sampleSize = null;
747
        newQd = handleMissingValues(newQd);
748
        if (newQd == null){
749
            return aggQd;
750
        }
751
        min = aggQd.getMin().min(newQd.getMin());
752
        max = aggQd.getMax().max(newQd.getMax());
753
        if (newQd.getSampleSize() != null && aggQd.getSampleSize() != null){
754
            sampleSize = newQd.getSampleSize().add(aggQd.getSampleSize());
755
        }
756
        if (sampleSize != null && !sampleSize.equals(0f) && aggQd.getAverage() != null && newQd.getAverage() != null){
757
            BigDecimal aggTotalSum = aggQd.getAverage().multiply(aggQd.getSampleSize(), MathContext.DECIMAL32);
758
            BigDecimal newTotalSum = newQd.getAverage().multiply(newQd.getSampleSize(), MathContext.DECIMAL32);
759
            BigDecimal totalSum = aggTotalSum.add(newTotalSum);
760
            average = totalSum.divide(sampleSize, MathContext.DECIMAL32).stripTrailingZeros();  //to be discussed if we really want to reduce precision here, however, due to the current way to compute average we do not have exact precision anyway
761
        }
762
        aggQd.setMinimum(min, null);
763
        aggQd.setMaximum(max, null);
764
        aggQd.setSampleSize(sampleSize, null);
765
        aggQd.setAverage(average, null);
766
        return aggQd;
767
    }
768

    
769
    private static List<BigDecimal> getExactValues(QuantitativeData qd) {
770
        List<BigDecimal> exactValues = qd.getStatisticalValues().stream()
771
                .filter(value->value.getType().equals(StatisticalMeasure.EXACT_VALUE()))
772
                .map(exact->exact.getValue())
773
                .collect(Collectors.toList());
774
        return exactValues;
775
    }
776

    
777
    private static boolean hasSameState(StateData sd1, StateData sd2) {
778
        if (sd2.getState() == null || sd1.getState() == null){
779
            return false;
780
        }else{
781
            return sd2.getState().getUuid().equals(sd1.getState().getUuid());
782
        }
783
    }
784
}
(11-11/12)