Project

General

Profile

Download (36.9 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2019 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.api.service.description;
10

    
11
import java.math.BigDecimal;
12
import java.math.MathContext;
13
import java.util.ArrayList;
14
import java.util.Comparator;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Optional;
20
import java.util.Set;
21
import java.util.stream.Collectors;
22

    
23
import eu.etaxonomy.cdm.common.BigDecimalUtil;
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
26
import eu.etaxonomy.cdm.model.common.CdmBase;
27
import eu.etaxonomy.cdm.model.common.ICdmBase;
28
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
29
import eu.etaxonomy.cdm.model.description.CategoricalData;
30
import eu.etaxonomy.cdm.model.description.DescriptionBase;
31
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
32
import eu.etaxonomy.cdm.model.description.DescriptionType;
33
import eu.etaxonomy.cdm.model.description.DescriptiveDataSet;
34
import eu.etaxonomy.cdm.model.description.Feature;
35
import eu.etaxonomy.cdm.model.description.IDescribable;
36
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
37
import eu.etaxonomy.cdm.model.description.QuantitativeData;
38
import eu.etaxonomy.cdm.model.description.SpecimenDescription;
39
import eu.etaxonomy.cdm.model.description.State;
40
import eu.etaxonomy.cdm.model.description.StateData;
41
import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
42
import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
43
import eu.etaxonomy.cdm.model.description.TaxonDescription;
44
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
45
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
46
import eu.etaxonomy.cdm.model.taxon.Taxon;
47
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
48

    
49
/**
50
 * Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
51
 * <br>
52
 * For all {@link SpecimenDescription}s belonging to this data set a new
53
 * aggregated {@link TaxonDescription}s are created for every taxon the
54
 * specimens are directly associated with.<BR>
55
 * Also lower rank taxon descriptions are aggregated to upper rank taxa.
56
 *
57
 * @author a.mueller
58
 * @author p.plitzner
59
 * @since 03.11.2019
60
 */
61
public class StructuredDescriptionAggregation
62
        extends DescriptionAggregationBase<StructuredDescriptionAggregation, StructuredDescriptionAggregationConfiguration>{
63

    
64
    private DescriptiveDataSet dataSet;
65

    
66
    @Override
67
    protected String pluralDataType(){
68
        return "structured descriptive data";
69
    }
70

    
71
    @Override
72
    protected void preAggregate(IProgressMonitor monitor) {
73
        monitor.subTask("preAccumulate - nothing to do");
74

    
75
        // take start time for performance testing
76
        double start = System.currentTimeMillis();
77

    
78
        getResult().setCdmEntity(getDescriptiveDatasetService().load(getConfig().getDatasetUuid()));
79

    
80
        double end1 = System.currentTimeMillis();
81
        logger.info("Time elapsed for pre-accumulate() : " + (end1 - start) / (1000) + "s");
82
    }
83

    
84
    @Override
85
    protected void verifyConfiguration(IProgressMonitor monitor){
86
        if (!AggregationSourceMode.list(AggregationMode.ToParent, AggregationType.StructuredDescription)
87
            .contains(getConfig().getToParentSourceMode())){
88
            throw new AggregationException("Unsupported source mode for to-parent aggregation: " + getConfig().getToParentSourceMode());
89
        }
90
        if (!AggregationSourceMode.list(AggregationMode.WithinTaxon, AggregationType.StructuredDescription)
91
                .contains(getConfig().getWithinTaxonSourceMode())){
92
                throw new AggregationException("Unsupported source mode for within-taxon aggregation: " + getConfig().getWithinTaxonSourceMode());
93
        }
94
    }
95

    
96
    private boolean hasCharacterData(DescriptionElementBase element) {
97
        return hasCategoricalData(element) || hasQuantitativeData(element);
98
    }
99

    
100
    private boolean hasQuantitativeData(DescriptionElementBase element) {
101
        if(element instanceof QuantitativeData
102
                && !((QuantitativeData) element).getStatisticalValues().isEmpty()){
103
            QuantitativeData quantitativeData = (QuantitativeData)element;
104
            return !getExactValues(quantitativeData).isEmpty()
105
                    || quantitativeData.getMin()!=null
106
                    || quantitativeData.getMax()!=null;
107
        }
108
        return false;
109
    }
110

    
111
    private boolean hasCategoricalData(DescriptionElementBase element) {
112
        return element instanceof CategoricalData && !((CategoricalData) element).getStatesOnly().isEmpty();
113
    }
114

    
115
    @Override
116
    protected void setDescriptionTitle(TaxonDescription description, Taxon taxon) {
117
        String title = taxon.getName() != null? taxon.getName().getTitleCache() : taxon.getTitleCache();
118
        description.setTitleCache("Aggregated description for " + title, true);
119
        return;
120
    }
121

    
122
    @Override
123
    protected TaxonDescription createNewDescription(Taxon taxon) {
124
        String title = taxon.getTitleCache();
125
        if (logger.isDebugEnabled()){logger.debug("creating new description for " + title);}
126
        TaxonDescription description = TaxonDescription.NewInstance(taxon);
127
        description.addType(DescriptionType.AGGREGATED_STRUC_DESC);
128
        setDescriptionTitle(description, taxon);
129
        return description;
130
    }
131

    
132
    @Override
133
    protected boolean hasDescriptionType(TaxonDescription description) {
134
        return dataSet.getDescriptions().contains(description) && description.isAggregatedStructuredDescription();
135
    }
136

    
137
    @Override
138
    protected List<String> descriptionInitStrategy() {
139
        return new ArrayList<>();
140
    }
141

    
142
    @Override
143
    protected boolean mergeAggregationResultIntoTargetDescription(TaxonDescription targetDescription,
144
            ResultHolder resultHolder) {
145

    
146
        StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
147
        boolean updated = mergeDescriptionElements(targetDescription, structuredResultHolder.categoricalMap, CategoricalData.class);
148
        updated |= mergeDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap, QuantitativeData.class);
149
        updated |= mergeDescriptionSources(targetDescription, structuredResultHolder);
150

    
151
        if(!targetDescription.getElements().isEmpty()){
152
            dataSet.addDescription(targetDescription);
153
        }else{
154
            dataSet.removeDescription(targetDescription);
155
        }
156
        return updated;
157
    }
158

    
159
    @Override
160
    protected boolean isRelevantDescriptionElement(DescriptionElementBase deb){
161
        return deb.isInstanceOf(CategoricalData.class) || deb.isInstanceOf(QuantitativeData.class);
162
    }
163

    
164
    private <T extends DescriptionBase<?>> boolean mergeDescriptionSources(TaxonDescription targetDescription,
165
                StructuredDescriptionResultHolder structuredResultHolder) {
166

    
167
        boolean updated = false;
168
        //Remove sources from description
169
        Set<IdentifiableSource> sourcesToRemove = targetDescription.getSources().stream()
170
                .filter(source->source.getType().equals(OriginalSourceType.Aggregation))
171
                .collect(Collectors.toSet());
172

    
173
        Set<IdentifiableSource> newSources = structuredResultHolder.sources;
174
        for (IdentifiableSource newSource : newSources) {
175
            IdentifiableSource mergeSourceCandidate = findSourceCandidate(targetDescription, newSource);
176
            if (mergeSourceCandidate == null){
177
                addNewSource(targetDescription, newSource);
178
                updated = true;
179
            }else{
180
                updated |= mergeSource(mergeSourceCandidate, newSource);
181
                sourcesToRemove.remove(mergeSourceCandidate);
182
            }
183
        }
184

    
185
        //remove remaining sources-to-be-removed
186
        for (IdentifiableSource sourceToRemove : sourcesToRemove) {
187
            targetDescription.removeSource(sourceToRemove);
188
            updated |= sourceToRemove.isPersited();
189
            ICdmBase target = CdmBase.deproxy(sourceToRemove.getCdmSource());
190
            if (target != null){
191
                sourceToRemove.setCdmSource(null); //workaround for missing orphan removal #9801
192
                if (target instanceof DescriptionBase){
193
                    @SuppressWarnings("unchecked")
194
                    T descriptionToDelete = (T)target;
195
                    if (descriptionToDelete.isCloneForSource()){
196
                        //TODO maybe this is not really needed as it is later done anyway with .deltedDescription
197
                        //but currently this still leads to a re-saved by cascade exception
198
                        ((IDescribable<T>)descriptionToDelete.describedEntity()).removeDescription(descriptionToDelete);
199
                        structuredResultHolder.descriptionsToDelete.add(descriptionToDelete);
200
                    }
201
                } else if (target.isInstanceOf(Taxon.class)){
202
                    //nothing to do for now
203
                } else {
204
                    throw new AggregationException("CdmLink target type not yet supported: " + target.getClass().getSimpleName());
205
                }
206
            }
207
        }
208
        return updated;
209
    }
210

    
211
    private <T extends DescriptionBase<?>> void addNewSource(TaxonDescription targetDescription,
212
            IdentifiableSource newSource) {
213

    
214
        //add source
215
        targetDescription.addSource(newSource);
216
        //if it is a description add it to the described entity (specimen, taxon)
217
        ICdmBase target = newSource.getCdmSource();
218
        if (target != null){
219
            if (target.isInstanceOf(DescriptionBase.class)){
220
                @SuppressWarnings("unchecked")
221
                T description = (T)CdmBase.deproxy(target);
222
                ((IDescribable<T>)description.describedEntity()).addDescription(description);
223
            }
224
        }
225
    }
226

    
227
    //mergeablity has been checked before
228
    private <T extends DescriptionBase<?>> boolean mergeSource(IdentifiableSource mergeCandidate, IdentifiableSource newSource) {
229

    
230
        boolean updated = false;
231
        ICdmBase newTarget = newSource.getCdmSource();
232
        if (newTarget != null){
233
            newTarget = CdmBase.deproxy(newTarget);
234
            if (newTarget instanceof DescriptionBase){
235
                @SuppressWarnings("unchecked")
236
                T newTargetDesc = (T)newTarget;
237
                @SuppressWarnings("unchecked")
238
                T existingTargetDesc = CdmBase.deproxy((T)mergeCandidate.getCdmSource());
239
                updated |= mergeSourceDescription(existingTargetDesc, newTargetDesc);
240
                ((IDescribable<T>)existingTargetDesc.describedEntity()).addDescription(existingTargetDesc);
241
                if (!existingTargetDesc.equals(newTargetDesc)){
242
                    ((IDescribable<T>)newTargetDesc.describedEntity()).removeDescription(newTargetDesc);
243
                }
244
            }else if (newTarget instanceof Taxon){
245
                //nothing to do for now (we do not support reuse of sources linking to different taxa yet)
246
            }else{
247
                throw new AggregationException("Sources not linking to a description or a taxon instance currently not yet supported.");
248
            }
249
        }else{
250
            throw new AggregationException("Sources not linking to another CdmBase instance currently not yet supported.");
251
        }
252
        return updated;
253
    }
254

    
255
    private <T extends DescriptionBase<?>> boolean mergeSourceDescription(T existingSourceDescription, T newSourceDescription) {
256

    
257
        boolean updated = false;
258
        Set<DescriptionElementBase> elementsToRemove = new HashSet<>(existingSourceDescription.getElements());
259
        Set<DescriptionElementBase> newElements = new HashSet<>(newSourceDescription.getElements());
260

    
261
        for (DescriptionElementBase newElement : newElements){
262
            DescriptionElementBase newElementClone = newElement.clone();
263
            Optional<DescriptionElementBase> matchingElement = elementsToRemove.stream()
264
                    .filter(e->e.getFeature()!= null
265
                        && e.getFeature().equals(newElementClone.getFeature()))
266
                    .findFirst();
267
            if (matchingElement.isPresent()){
268
                updated |= mergeDescriptionElement(matchingElement.get(), newElementClone);
269
                elementsToRemove.remove(matchingElement.get());
270
            }else{
271
                existingSourceDescription.addElement(newElementClone);
272
                updated = true;
273
            }
274
        }
275
        updated |= addSourceDescriptionToDescribedEntity(newSourceDescription);
276
        existingSourceDescription.setTitleCache(newSourceDescription.getTitleCache(), true);
277

    
278
        for (DescriptionElementBase debToRemove : elementsToRemove){
279
            existingSourceDescription.removeElement(debToRemove);
280
            updated |= debToRemove.isPersited();
281
        }
282
        return updated;
283
    }
284

    
285
    @SuppressWarnings("unchecked")
286
    private <T extends DescriptionBase<?>> boolean addSourceDescriptionToDescribedEntity(T sourceDescription) {
287
        boolean updated = false;
288
        IDescribable<T> describedEntity = ((IDescribable<T>)sourceDescription.describedEntity());
289
        if (describedEntity.getDescriptions().contains(sourceDescription)){
290
            describedEntity.addDescription(sourceDescription);
291
            updated = true;
292
        }
293
        return updated;
294
    }
295

    
296
    private IdentifiableSource findSourceCandidate(TaxonDescription targetDescription, IdentifiableSource newSource) {
297
        for (IdentifiableSource existingSource : targetDescription.getSources()){
298
            boolean isCandidate = isCandidateForSourceReuse(existingSource, newSource);
299
            if (isCandidate){
300
                return existingSource;
301
            }
302
        }
303
        return null;
304
    }
305

    
306
    private boolean isCandidateForSourceReuse(IdentifiableSource existingSource, IdentifiableSource newSource) {
307
        if (newSource.getCdmSource()!= null){
308
            if (existingSource.getCdmSource() == null){
309
                return false;
310
            }else {
311
                ICdmBase newTarget = CdmBase.deproxy(newSource.getCdmSource());
312
                ICdmBase existingTarget = CdmBase.deproxy((CdmBase)existingSource.getCdmSource());
313
                if (!newTarget.getClass().equals(existingTarget.getClass())){
314
                    return false;
315
                }else{
316
                    if (newTarget instanceof SpecimenDescription){
317
                        SpecimenOrObservationBase<?> newSob = ((SpecimenDescription)newTarget).getDescribedSpecimenOrObservation();
318
                        SpecimenOrObservationBase<?> existingSob = ((SpecimenDescription)existingTarget).getDescribedSpecimenOrObservation();
319
                        //for now reuse is possible if both are descriptions for the same specimen
320
                        return newSob != null && newSob.equals(existingSob);
321
                    }else if (newTarget instanceof TaxonDescription){
322
                        Taxon newTaxon = ((TaxonDescription)newTarget).getTaxon();
323
                        Taxon existingTaxon = ((TaxonDescription)existingTarget).getTaxon();
324
                        //for now reuse is possible if both are descriptions for the same taxon
325
                        return newTaxon != null && newTaxon.equals(existingTaxon);
326
                    }else if (newTarget instanceof Taxon){
327
                        return newTarget.equals(existingTarget);
328
                    }else{
329
                        throw new AggregationException("Other classes then SpecimenDescription and TaxonDescription are not yet supported. But was: " + newTarget.getClass());
330
                    }
331
                }
332
            }
333
        }
334

    
335
        return false;
336
    }
337

    
338
    private <T extends DescriptionBase<?>> T cloneNewSourceDescription(T newSourceDescription) {
339
        if (!getConfig().isCloneAggregatedSourceDescriptions() && newSourceDescription.isAggregatedStructuredDescription()){
340
            return newSourceDescription;
341
        }
342
        @SuppressWarnings("unchecked")
343
        T clonedDescription = (T)newSourceDescription.clone();
344
//        clonedDescription.removeSources();
345
        clonedDescription.removeDescriptiveDataSet(dataSet);
346
        clonedDescription.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
347
        clonedDescription.setTitleCache("Clone: " + clonedDescription.getTitleCache(), true);
348
        return clonedDescription;
349
    }
350

    
351
    @Override
352
    protected <S extends DescriptionElementBase> boolean mergeDescriptionElement(S targetElement,
353
            S newElement) {
354

    
355
        boolean updated = false;
356
        targetElement = CdmBase.deproxy(targetElement);
357
        newElement = CdmBase.deproxy(newElement);
358
        if (targetElement instanceof CategoricalData){
359
            updated |= mergeDescriptionElement((CategoricalData)targetElement, (CategoricalData)newElement);
360
        }else if (targetElement.isInstanceOf(QuantitativeData.class)){
361
            updated |= mergeDescriptionElement((QuantitativeData)targetElement, (QuantitativeData)newElement);
362
        }else{
363
            throw new AggregationException("Class not supported: " + targetElement.getClass().getName());
364
        }
365
        return updated;
366
    }
367

    
368
    private boolean mergeDescriptionElement(CategoricalData elementToStay,
369
            CategoricalData newElement) {
370

    
371
        boolean updated = false;
372
        List<StateData> dataToRemove = new ArrayList<>(elementToStay.getStateData());
373
        List<StateData> newData = new ArrayList<>(newElement.getStateData());
374
        for (StateData newStateData : newData){
375
            State state = newStateData.getState();
376
            StateData oldStateData = firstByState(state, dataToRemove);
377
            if (oldStateData != null){
378
                //for now only state and count is used for aggregation, below code needs to be adapted if this changes
379
                if (!CdmUtils.nullSafeEqual(oldStateData.getCount(), newStateData.getCount())){
380
                    oldStateData.setCount(newStateData.getCount());
381
//                    getResult().addUpdatedObject(oldStateData);
382
                    updated = true;
383
                }
384
                dataToRemove.remove(oldStateData);
385
            }else{
386
                elementToStay.addStateData(newStateData);
387
                updated = true;
388
            }
389
        }
390
        for (StateData stateDataToRemove : dataToRemove){
391
            elementToStay.removeStateData(stateDataToRemove);
392
            updated |= stateDataToRemove.isPersited();
393
        }
394
        return updated;
395
    }
396

    
397
    private StateData firstByState(State state, List<StateData> oldData) {
398
        if (state == null){
399
            return null;
400
        }
401
        for (StateData sd : oldData){
402
            if (state.equals(sd.getState())){
403
                return sd;
404
            }
405
        }
406
        return null;
407
    }
408

    
409
    private boolean mergeDescriptionElement(QuantitativeData elementToStay,
410
            QuantitativeData newElement) {
411

    
412
        boolean updated = false;
413

    
414
        Set<StatisticalMeasurementValue> oldValues = new HashSet<>(elementToStay.getStatisticalValues());
415
        Set<StatisticalMeasurementValue> newValues = new HashSet<>(newElement.getStatisticalValues());
416
        for (StatisticalMeasurementValue newValue : newValues){
417
            StatisticalMeasure type = newValue.getType();
418
            StatisticalMeasurementValue oldValue = firstValueByType(type, oldValues);
419
            if (oldValue != null){
420
                //for now only state and count is used for aggregation, below code needs to be adapted if this changes
421
                if (!CdmUtils.nullSafeEqual(oldValue.getValue(), newValue.getValue())){
422
                    oldValue.setValue(newValue.getValue());
423
                    updated = true;
424
                }
425
                oldValues.remove(oldValue);
426
            }else{
427
                elementToStay.addStatisticalValue(newValue);
428
                updated = true;
429
            }
430
        }
431
        for (StatisticalMeasurementValue valueToRemove : oldValues){
432
            elementToStay.removeStatisticalValue(valueToRemove);
433
            updated |= valueToRemove.isPersited();
434
        }
435
        return updated;
436
    }
437

    
438
    private StatisticalMeasurementValue firstValueByType(StatisticalMeasure type, Set<StatisticalMeasurementValue> oldValues) {
439
        if (type == null){
440
            return null;
441
        }
442
        for (StatisticalMeasurementValue value : oldValues){
443
            if (type.equals(value.getType())){
444
                return value;
445
            }
446
        }
447
        return null;
448
    }
449

    
450
    @Override
451
    protected void initTransaction() {
452
        dataSet = getDescriptiveDatasetService().load(getConfig().getDatasetUuid());
453
    }
454

    
455
    @Override
456
    protected void removeDescriptionIfEmpty(TaxonDescription description, ResultHolder resultHolder) {
457
        super.removeDescriptionIfEmpty(description, resultHolder);
458
        if (description.getElements().isEmpty()){
459
            dataSet.removeDescription(description);
460
        }
461
    }
462

    
463
    @Override
464
    protected void aggregateToParentTaxon(TaxonNode taxonNode,
465
            ResultHolder resultHolder,
466
            Set<TaxonDescription> excludedDescriptions) {
467
        StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
468
        Set<TaxonDescription> childDescriptions = getChildTaxonDescriptions(taxonNode, dataSet);
469
        addDescriptionToResultHolder(descriptiveResultHolder, childDescriptions, AggregationMode.ToParent);
470
    }
471

    
472
    @Override
473
    protected void aggregateWithinSingleTaxon(Taxon taxon,
474
            ResultHolder resultHolder,
475
            Set<TaxonDescription> excludedDescriptions) {
476
        StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
477

    
478
        //specimen descriptions
479
        Set<SpecimenDescription> specimenDescriptions = getSpecimenDescriptions(taxon, dataSet);
480
        addDescriptionToResultHolder(descriptiveResultHolder, specimenDescriptions, AggregationMode.WithinTaxon);
481

    
482
        //"literature" descriptions
483
        if (getConfig().isIncludeLiterature()){
484
            Set<TaxonDescription> literatureDescriptions = getLiteratureDescriptions(taxon, dataSet);
485
            addDescriptionToResultHolder(descriptiveResultHolder, literatureDescriptions, AggregationMode.WithinTaxon);
486
        }
487

    
488
        //"default" descriptions
489
        //TODO add default descriptions
490
        //xxx
491

    
492
    }
493

    
494
    private void addDescriptionToResultHolder(StructuredDescriptionResultHolder descriptiveResultHolder,
495
            Set<? extends DescriptionBase<?>> specimenLiteraturOrDefaultDescriptions,
496
            AggregationMode aggregationMode) {
497

    
498
        boolean descriptionWasUsed = false;
499
        for (DescriptionBase<?> desc: specimenLiteraturOrDefaultDescriptions){
500
            for (DescriptionElementBase deb: desc.getElements()){
501
                if (hasCharacterData(deb)){
502
                    if (deb.isInstanceOf(CategoricalData.class)){
503
                        addToCategorical(CdmBase.deproxy(deb, CategoricalData.class), descriptiveResultHolder);
504
                        descriptionWasUsed = true;
505
                    }else if (deb.isInstanceOf(QuantitativeData.class)){
506
                        addToQuantitativData(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
507
                        descriptionWasUsed = true;
508
                    }
509
                }
510
            }
511

    
512
            //sources
513
            AggregationSourceMode sourceMode = getConfig().getSourceMode(aggregationMode);
514
            if(descriptionWasUsed && sourceMode != AggregationSourceMode.NONE){
515
                IdentifiableSource source = IdentifiableSource.NewAggregationSourceInstance();
516
                desc = CdmBase.deproxy(desc);
517

    
518
                switch (sourceMode){
519
                    case DESCRIPTION:
520
                        DescriptionBase<?> clonedDesc = cloneNewSourceDescription(desc);
521
                        source.setCdmSource(clonedDesc);
522
                        break;
523
                    case TAXON:
524
                        if (desc instanceof TaxonDescription){
525
                            Taxon taxon = ((TaxonDescription) desc).getTaxon();
526
                            source.setCdmSource(taxon);
527
                        }else {
528
                            throw new AggregationException("Description type not yet supported for aggregation source mode TAXON: " + desc.getClass().getSimpleName() );
529
                        }
530
                        break;
531
                    case NONE:
532
                        source = null;
533
                        break;
534
                    case ALL: //not yet supported
535
                        throw new AggregationException("Source mode not yet supported: " + sourceMode);
536
                    case ALL_SAMEVALUE: //makes no sense
537
                        throw new AggregationException("Illegal source mode: " + sourceMode);
538
                    default:
539
                        throw new AggregationException("Source mode not supported: " + sourceMode);
540
                }
541
                if (source != null){
542
                    descriptiveResultHolder.sources.add(source);
543
                }
544
            }
545
        }
546
    }
547

    
548
    private void addToQuantitativData(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
549
        QuantitativeData aggregatedQuantitativeData = resultHolder.quantitativeMap.get(qd.getFeature());
550
        if(aggregatedQuantitativeData==null){
551
            // no QuantitativeData with this feature in aggregation
552
            aggregatedQuantitativeData = aggregateWithinQuantitativeData(qd);
553
        }
554
        else{
555
            aggregatedQuantitativeData = addToExistingQuantitativeData(aggregatedQuantitativeData, qd);
556
        }
557
        if (aggregatedQuantitativeData != null){
558
            resultHolder.quantitativeMap.put(qd.getFeature(), aggregatedQuantitativeData);
559
        }
560
    }
561

    
562
    private void addToCategorical(CategoricalData cd, StructuredDescriptionResultHolder resultHolder) {
563
        CategoricalData aggregatedCategoricalData = resultHolder.categoricalMap.get(cd.getFeature());
564
        if(aggregatedCategoricalData == null){
565
            // no CategoricalData with this feature in aggregation
566
            aggregatedCategoricalData = cd.clone();
567
            // set count to 1 if not set
568
            if (!aggregatedCategoricalData.getStatesOnly().isEmpty()){
569
                aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
570
                resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
571
            }
572
        }
573
        else{
574
            // split all StateData into those where the state already exists and those where it doesn't
575
            List<State> statesOnly = aggregatedCategoricalData.getStatesOnly();
576
            List<StateData> sdWithExistingStateInAggregation = cd.getStateData().stream().filter(sd->statesOnly.contains(sd.getState())).collect(Collectors.toList());
577
            List<StateData> sdWithNoExistingStateInAggregation = cd.getStateData().stream().filter(sd->!statesOnly.contains(sd.getState())).collect(Collectors.toList());
578

    
579
            for (StateData sd : sdWithNoExistingStateInAggregation) {
580
                StateData clone = sd.clone();
581
                // set count to 1 if not set
582
                if(clone.getCount()==null){
583
                    clone.incrementCount();
584
                }
585
                aggregatedCategoricalData.addStateData(clone);
586
            }
587

    
588
            for (StateData sdExist : sdWithExistingStateInAggregation) {
589
                List<StateData> aggregatedSameStateData = aggregatedCategoricalData.getStateData().stream()
590
                        .filter(sd->hasSameState(sdExist, sd))
591
                        .collect(Collectors.toList());
592
                for (StateData stateData : aggregatedSameStateData) {
593
                    if(sdExist.getCount()==null){
594
                        stateData.incrementCount();
595
                    }
596
                    else{
597
                        stateData.setCount(stateData.getCount()+sdExist.getCount());
598
                    }
599
                }
600
            }
601
        }
602
    }
603

    
604
    @Override
605
    protected StructuredDescriptionResultHolder createResultHolder() {
606
        return new StructuredDescriptionResultHolder();
607
    }
608

    
609
    private class StructuredDescriptionResultHolder extends ResultHolder{
610
        private Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
611
        private Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
612
        private Set<IdentifiableSource> sources = new HashSet<>();
613
        @Override
614
        public String toString() {
615
            return "SDResultHolder [categoricals=" + categoricalMap.size()
616
                + ", quantitatives=" + quantitativeMap.size()
617
                + ", sources=" + sources.size()
618
                + ", descriptionsToDelete=" + this.descriptionsToDelete.size()
619
                + "]";
620
        }
621
    }
622

    
623
    private Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
624
        Set<TaxonDescription> result = new HashSet<>();
625
        List<TaxonNode> childNodes = taxonNode.getChildNodes();
626
        for (TaxonNode childNode : childNodes) {
627
            Set<TaxonDescription> childDescriptions = childNode.getTaxon().getDescriptions();
628
            result.addAll(childDescriptions.stream()
629
                .filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
630
                .filter(desc->dataSet.getDescriptions().contains(desc))
631
                .collect(Collectors.toSet()));
632
        }
633
        return result;
634
    }
635

    
636
    /**
637
     * Computes all specimens attached to the given taxon within the given dataSet.
638
     * For these secimens it returns all attache
639
     * */
640
    private Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
641
        Set<SpecimenDescription> result = new HashSet<>();
642
        //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
643
        for (TaxonDescription taxonDesc: taxon.getDescriptions()){
644
            for (DescriptionElementBase taxonDeb : taxonDesc.getElements()){
645
                if (taxonDeb.isInstanceOf(IndividualsAssociation.class)){
646
                    IndividualsAssociation indAss = CdmBase.deproxy(taxonDeb, IndividualsAssociation.class);
647
                    SpecimenOrObservationBase<?> specimen = indAss.getAssociatedSpecimenOrObservation();
648
                    Set<SpecimenDescription> descriptions = specimen.getSpecimenDescriptions();
649
                    for(SpecimenDescription specimenDescription : descriptions){
650
                        if(dataSet.getDescriptions().contains(specimenDescription) &&
651
                                specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
652
                            result.add(specimenDescription);
653
                        }
654
                    }
655
                }
656
            }
657
        }
658
        return result;
659
    }
660

    
661
    private Set<TaxonDescription> getLiteratureDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
662
        Set<TaxonDescription> result = new HashSet<>();
663
        //TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
664
        for(TaxonDescription taxonDescription : taxon.getDescriptions()){
665
            if(dataSet.getDescriptions().contains(taxonDescription)
666
                    && taxonDescription.getTypes().stream().anyMatch(type->type.equals(DescriptionType.SECONDARY_DATA))
667
                    && taxonDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE)) ){
668
                result.add(taxonDescription);
669
            }
670
        }
671
        return result;
672
    }
673

    
674
    /**
675
     * Evaluates statistics for exact values collection and handles missing min and max values
676
     */
677
    private QuantitativeData aggregateWithinQuantitativeData(QuantitativeData sourceQd){
678
        QuantitativeData aggQD = QuantitativeData.NewInstance(sourceQd.getFeature());
679
        aggQD.setUnit(sourceQd.getUnit());
680
        Set<BigDecimal> exactValues = sourceQd.getExactValues();
681
        if(!exactValues.isEmpty()){
682
            // qd is not already aggregated
683
            Comparator<BigDecimal> comp = Comparator.naturalOrder();
684
            int exactValueSampleSize = exactValues.size();
685
            BigDecimal exactValueMin = exactValues.stream().min(comp).get();
686
            BigDecimal exactValueMax = exactValues.stream().max(comp).get();
687
            BigDecimal exactValueAvg = BigDecimalUtil.average(exactValues);
688
            //TODO also check for typical boundary data
689
            if(sourceQd.getMin() == null && sourceQd.getMax() == null){
690
                aggQD.setSampleSize(new BigDecimal(exactValueSampleSize), null);
691
                aggQD.setAverage(exactValueAvg, null);
692
            }
693
            aggQD.setMinimum(sourceQd.getMin() == null ? exactValueMin: sourceQd.getMin().min(exactValueMin), null);
694
            aggQD.setMaximum(sourceQd.getMax() == null ? exactValueMax: sourceQd.getMax().max(exactValueMax), null);
695
        }
696
        else{
697
            // qd has only min, max, ... but no exact values
698
            aggQD = sourceQd.clone();
699
            aggQD = handleMissingValues(aggQD);
700
        }
701
        return aggQD;
702
    }
703

    
704
    private QuantitativeData handleMissingValues(QuantitativeData qd) {
705
        //min max
706
        qd = handleMissingMinOrMax(qd);
707
        //average
708
        if (qd != null && qd.getAverage() == null){
709
            BigDecimal n = qd.getSampleSize();
710
            if(n != null && !n.equals(0f)){
711
                BigDecimal average = (qd.getMax().add(qd.getMin())).divide(n);
712
                qd.setAverage(average, null);
713
            }
714
        }
715
        return qd;
716
    }
717

    
718
    private QuantitativeData handleMissingMinOrMax(QuantitativeData qd) {
719
        return handleMissingMinOrMax(qd, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
720
    }
721

    
722
    public static QuantitativeData handleMissingMinOrMax(QuantitativeData aggQD, MissingMinimumMode missingMinMode,
723
            MissingMaximumMode missingMaxMode) {
724
        if(aggQD.getMin() == null && aggQD.getMax() != null){
725
            if (missingMinMode == MissingMinimumMode.MinToZero) {
726
                aggQD.setMinimum(BigDecimal.valueOf(0f), null);
727
            }else if (missingMinMode == MissingMinimumMode.MinToMax){
728
                aggQD.setMinimum(aggQD.getMax(), null);
729
            }else if (missingMinMode == MissingMinimumMode.SkipRecord){
730
                return null;
731
            }
732
        }
733
        if(aggQD.getMax() == null && aggQD.getMin() != null){
734
            if (missingMaxMode == MissingMaximumMode.MaxToMin){
735
                aggQD.setMaximum(aggQD.getMin(), null);
736
            }else if (missingMaxMode == MissingMaximumMode.SkipRecord){
737
                return null;
738
            }
739
        }
740
        return aggQD;
741
    }
742

    
743
    private QuantitativeData addToExistingQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
744

    
745
        newQd = aggregateWithinQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a clear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
746

    
747
        BigDecimal min = null;
748
        BigDecimal max = null;
749
        BigDecimal average = null;
750
        BigDecimal sampleSize = null;
751
        newQd = handleMissingValues(newQd);
752
        if (newQd == null){
753
            return aggQd;
754
        }
755
        min = aggQd.getMin().min(newQd.getMin());
756
        max = aggQd.getMax().max(newQd.getMax());
757
        if (newQd.getSampleSize() != null && aggQd.getSampleSize() != null){
758
            sampleSize = newQd.getSampleSize().add(aggQd.getSampleSize());
759
        }
760
        if (sampleSize != null && !sampleSize.equals(0f) && aggQd.getAverage() != null && newQd.getAverage() != null){
761
            BigDecimal aggTotalSum = aggQd.getAverage().multiply(aggQd.getSampleSize(), MathContext.DECIMAL32);
762
            BigDecimal newTotalSum = newQd.getAverage().multiply(newQd.getSampleSize(), MathContext.DECIMAL32);
763
            BigDecimal totalSum = aggTotalSum.add(newTotalSum);
764
            average = totalSum.divide(sampleSize, MathContext.DECIMAL32).stripTrailingZeros();  //to be discussed if we really want to reduce precision here, however, due to the current way to compute average we do not have exact precision anyway
765
        }
766
        aggQd.setMinimum(min, null);
767
        aggQd.setMaximum(max, null);
768
        aggQd.setSampleSize(sampleSize, null);
769
        aggQd.setAverage(average, null);
770
        return aggQd;
771
    }
772

    
773
    private static List<BigDecimal> getExactValues(QuantitativeData qd) {
774
        List<BigDecimal> exactValues = qd.getStatisticalValues().stream()
775
                .filter(value->value.getType().equals(StatisticalMeasure.EXACT_VALUE()))
776
                .map(exact->exact.getValue())
777
                .collect(Collectors.toList());
778
        return exactValues;
779
    }
780

    
781
    private static boolean hasSameState(StateData sd1, StateData sd2) {
782
        if (sd2.getState() == null || sd1.getState() == null){
783
            return false;
784
        }else{
785
            return sd2.getState().getUuid().equals(sd1.getState().getUuid());
786
        }
787
    }
788
}
(11-11/12)