1
|
/**
|
2
|
* Copyright (C) 2019 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.api.service.description;
|
10
|
|
11
|
import java.math.BigDecimal;
|
12
|
import java.math.MathContext;
|
13
|
import java.util.ArrayList;
|
14
|
import java.util.Comparator;
|
15
|
import java.util.HashMap;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
import java.util.Optional;
|
20
|
import java.util.Set;
|
21
|
import java.util.stream.Collectors;
|
22
|
|
23
|
import eu.etaxonomy.cdm.common.BigDecimalUtil;
|
24
|
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
|
25
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
26
|
import eu.etaxonomy.cdm.model.common.ICdmBase;
|
27
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
28
|
import eu.etaxonomy.cdm.model.description.CategoricalData;
|
29
|
import eu.etaxonomy.cdm.model.description.DescriptionBase;
|
30
|
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
31
|
import eu.etaxonomy.cdm.model.description.DescriptionType;
|
32
|
import eu.etaxonomy.cdm.model.description.DescriptiveDataSet;
|
33
|
import eu.etaxonomy.cdm.model.description.Feature;
|
34
|
import eu.etaxonomy.cdm.model.description.IDescribable;
|
35
|
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
|
36
|
import eu.etaxonomy.cdm.model.description.QuantitativeData;
|
37
|
import eu.etaxonomy.cdm.model.description.SpecimenDescription;
|
38
|
import eu.etaxonomy.cdm.model.description.State;
|
39
|
import eu.etaxonomy.cdm.model.description.StateData;
|
40
|
import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
|
41
|
import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
|
42
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
43
|
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
|
44
|
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
|
45
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
46
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
47
|
|
48
|
/**
|
49
|
* Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
|
50
|
* <br>
|
51
|
* For all {@link SpecimenDescription}s belonging to this data set a new
|
52
|
* aggregated {@link TaxonDescription}s are created for every taxon the
|
53
|
* specimens are directly associated with.<BR>
|
54
|
* Also lower rank taxon descriptions are aggregated to upper rank taxa.
|
55
|
*
|
56
|
* @author a.mueller
|
57
|
* @author p.plitzner
|
58
|
* @since 03.11.2019
|
59
|
*/
|
60
|
public class StructuredDescriptionAggregation
|
61
|
extends DescriptionAggregationBase<StructuredDescriptionAggregation, StructuredDescriptionAggregationConfiguration>{
|
62
|
|
63
|
private DescriptiveDataSet dataSet;
|
64
|
|
65
|
@Override
|
66
|
protected String pluralDataType(){
|
67
|
return "structured descriptive data";
|
68
|
}
|
69
|
|
70
|
@Override
|
71
|
protected void preAggregate(IProgressMonitor monitor) {
|
72
|
monitor.subTask("preAccumulate - nothing to do");
|
73
|
|
74
|
// take start time for performance testing
|
75
|
double start = System.currentTimeMillis();
|
76
|
|
77
|
getResult().setCdmEntity(getDescriptiveDatasetService().load(getConfig().getDatasetUuid()));
|
78
|
|
79
|
double end1 = System.currentTimeMillis();
|
80
|
logger.info("Time elapsed for pre-accumulate() : " + (end1 - start) / (1000) + "s");
|
81
|
}
|
82
|
|
83
|
@Override
|
84
|
protected void verifyConfiguration(IProgressMonitor monitor){
|
85
|
if (!AggregationSourceMode.list(AggregationMode.ToParent, AggregationType.StructuredDescription)
|
86
|
.contains(getConfig().getToParentSourceMode())){
|
87
|
throw new AggregationException("Unsupported source mode for to-parent aggregation: " + getConfig().getToParentSourceMode());
|
88
|
}
|
89
|
if (!AggregationSourceMode.list(AggregationMode.WithinTaxon, AggregationType.StructuredDescription)
|
90
|
.contains(getConfig().getWithinTaxonSourceMode())){
|
91
|
throw new AggregationException("Unsupported source mode for within-taxon aggregation: " + getConfig().getWithinTaxonSourceMode());
|
92
|
}
|
93
|
}
|
94
|
|
95
|
private boolean hasCharacterData(DescriptionElementBase element) {
|
96
|
return hasCategoricalData(element) || hasQuantitativeData(element);
|
97
|
}
|
98
|
|
99
|
private boolean hasQuantitativeData(DescriptionElementBase element) {
|
100
|
if(element instanceof QuantitativeData
|
101
|
&& !((QuantitativeData) element).getStatisticalValues().isEmpty()){
|
102
|
QuantitativeData quantitativeData = (QuantitativeData)element;
|
103
|
return !getExactValues(quantitativeData).isEmpty()
|
104
|
|| quantitativeData.getMin()!=null
|
105
|
|| quantitativeData.getMax()!=null;
|
106
|
}
|
107
|
return false;
|
108
|
}
|
109
|
|
110
|
private boolean hasCategoricalData(DescriptionElementBase element) {
|
111
|
return element instanceof CategoricalData && !((CategoricalData) element).getStatesOnly().isEmpty();
|
112
|
}
|
113
|
|
114
|
@Override
|
115
|
protected void setDescriptionTitle(TaxonDescription description, Taxon taxon) {
|
116
|
String title = taxon.getName() != null? taxon.getName().getTitleCache() : taxon.getTitleCache();
|
117
|
description.setTitleCache("Aggregated description for " + title, true);
|
118
|
return;
|
119
|
}
|
120
|
|
121
|
@Override
|
122
|
protected TaxonDescription createNewDescription(Taxon taxon) {
|
123
|
String title = taxon.getTitleCache();
|
124
|
if (logger.isDebugEnabled()){logger.debug("creating new description for " + title);}
|
125
|
TaxonDescription description = TaxonDescription.NewInstance(taxon);
|
126
|
description.addType(DescriptionType.AGGREGATED_STRUC_DESC);
|
127
|
setDescriptionTitle(description, taxon);
|
128
|
return description;
|
129
|
}
|
130
|
|
131
|
@Override
|
132
|
protected boolean hasDescriptionType(TaxonDescription description) {
|
133
|
return dataSet.getDescriptions().contains(description) && description.isAggregatedStructuredDescription();
|
134
|
}
|
135
|
|
136
|
@Override
|
137
|
protected List<String> descriptionInitStrategy() {
|
138
|
return new ArrayList<>();
|
139
|
}
|
140
|
|
141
|
@Override
|
142
|
protected void addAggregationResultToDescription(TaxonDescription targetDescription,
|
143
|
ResultHolder resultHolder) {
|
144
|
|
145
|
StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
|
146
|
mergeDescriptionElements(targetDescription, structuredResultHolder.categoricalMap, CategoricalData.class);
|
147
|
mergeDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap, QuantitativeData.class);
|
148
|
addAggregationSources(targetDescription, structuredResultHolder);
|
149
|
|
150
|
if(!targetDescription.getElements().isEmpty()){
|
151
|
dataSet.addDescription(targetDescription);
|
152
|
}else{
|
153
|
dataSet.removeDescription(targetDescription);
|
154
|
}
|
155
|
}
|
156
|
|
157
|
private <T extends DescriptionBase<?>> void addAggregationSources(TaxonDescription targetDescription,
|
158
|
StructuredDescriptionResultHolder structuredResultHolder) {
|
159
|
|
160
|
//Remove sources from description
|
161
|
Set<IdentifiableSource> sourcesToRemove = targetDescription.getSources().stream()
|
162
|
.filter(source->source.getType().equals(OriginalSourceType.Aggregation))
|
163
|
.collect(Collectors.toSet());
|
164
|
|
165
|
Set<IdentifiableSource> newSources = structuredResultHolder.sources;
|
166
|
for (IdentifiableSource newSource : newSources) {
|
167
|
IdentifiableSource mergeSourceCandidate = findSourceCandidate(targetDescription, newSource);
|
168
|
if (mergeSourceCandidate == null){
|
169
|
addNewSource(targetDescription, newSource);
|
170
|
}else{
|
171
|
mergeSource(mergeSourceCandidate, newSource);
|
172
|
sourcesToRemove.remove(mergeSourceCandidate);
|
173
|
}
|
174
|
}
|
175
|
|
176
|
//remove remaining sources-to-be-removed
|
177
|
for (IdentifiableSource sourceToRemove : sourcesToRemove) {
|
178
|
targetDescription.removeSource(sourceToRemove);
|
179
|
ICdmBase target = CdmBase.deproxy(sourceToRemove.getCdmSource());
|
180
|
if (target != null){
|
181
|
sourceToRemove.setCdmSource(null); //workaround for missing orphan removal #9801
|
182
|
if (target instanceof DescriptionBase){
|
183
|
@SuppressWarnings("unchecked")
|
184
|
T descriptionToDelete = (T)target;
|
185
|
if (descriptionToDelete.isCloneForSource()){
|
186
|
//TODO maybe this is not really needed as it is later done anyway with .deltedDescription
|
187
|
//but currently this still leads to an re-saved by cascade exception
|
188
|
((IDescribable<T>)descriptionToDelete.describedEntity()).removeDescription(descriptionToDelete);
|
189
|
structuredResultHolder.descriptionsToDelete.add(descriptionToDelete);
|
190
|
}
|
191
|
}else if (target.isInstanceOf(Taxon.class)){
|
192
|
//nothing to do for now
|
193
|
} else {
|
194
|
throw new AggregationException("CdmLink target type not yet supported: " + target.getClass().getSimpleName());
|
195
|
}
|
196
|
}
|
197
|
}
|
198
|
}
|
199
|
|
200
|
private <T extends DescriptionBase<?>> void addNewSource(TaxonDescription targetDescription,
|
201
|
IdentifiableSource newSource) {
|
202
|
|
203
|
//add source
|
204
|
targetDescription.addSource(newSource);
|
205
|
//if it is a description add it to the described entity (specimen, taxon)
|
206
|
ICdmBase target = newSource.getCdmSource();
|
207
|
if (target != null){
|
208
|
if (target.isInstanceOf(DescriptionBase.class)){
|
209
|
@SuppressWarnings("unchecked")
|
210
|
T description = (T)CdmBase.deproxy(target);
|
211
|
((IDescribable<T>)description.describedEntity()).addDescription(description);
|
212
|
}
|
213
|
}
|
214
|
}
|
215
|
|
216
|
//mergeablity has been checked before
|
217
|
private <T extends DescriptionBase<?>> void mergeSource(IdentifiableSource mergeCandidate, IdentifiableSource newSource) {
|
218
|
|
219
|
ICdmBase newTarget = newSource.getCdmSource();
|
220
|
if (newTarget != null){
|
221
|
newTarget = CdmBase.deproxy(newTarget);
|
222
|
if (newTarget instanceof DescriptionBase){
|
223
|
@SuppressWarnings("unchecked")
|
224
|
T newTargetDesc = (T)newTarget;
|
225
|
@SuppressWarnings("unchecked")
|
226
|
T existingTargetDesc = CdmBase.deproxy((T)mergeCandidate.getCdmSource());
|
227
|
mergeSourceDescription(existingTargetDesc, newTargetDesc);
|
228
|
((IDescribable<T>)existingTargetDesc.describedEntity()).addDescription(existingTargetDesc);
|
229
|
if (!existingTargetDesc.equals(newTargetDesc)){
|
230
|
((IDescribable<T>)newTargetDesc.describedEntity()).removeDescription(newTargetDesc);
|
231
|
}
|
232
|
}else if (newTarget instanceof Taxon){
|
233
|
//nothing to do for now (we do not support reuse of sources linking to different taxa yet)
|
234
|
}else{
|
235
|
throw new AggregationException("Sources not linking to a description or a taxon instance currently not yet supported.");
|
236
|
}
|
237
|
}else{
|
238
|
throw new AggregationException("Sources not linking to another CdmBase instance currently not yet supported.");
|
239
|
}
|
240
|
}
|
241
|
|
242
|
private <T extends DescriptionBase<?>> void mergeSourceDescription(T existingSourceDescription, T newSourceDescription) {
|
243
|
|
244
|
Set<DescriptionElementBase> elementsToRemove = new HashSet<>(existingSourceDescription.getElements());
|
245
|
Set<DescriptionElementBase> newElements = new HashSet<>(newSourceDescription.getElements());
|
246
|
|
247
|
for (DescriptionElementBase newElement : newElements){
|
248
|
DescriptionElementBase newElementClone = newElement.clone();
|
249
|
Optional<DescriptionElementBase> matchingElement = elementsToRemove.stream()
|
250
|
.filter(e->e.getFeature()!= null
|
251
|
&& e.getFeature().equals(newElementClone.getFeature()))
|
252
|
.findFirst();
|
253
|
if (matchingElement.isPresent()){
|
254
|
mergeDescriptionElement(matchingElement.get(), newElementClone);
|
255
|
elementsToRemove.remove(matchingElement.get());
|
256
|
}else{
|
257
|
existingSourceDescription.addElement(newElementClone);
|
258
|
}
|
259
|
}
|
260
|
addSourceDescriptionToDescribedEntity(newSourceDescription);
|
261
|
existingSourceDescription.setTitleCache(newSourceDescription.getTitleCache(), true);
|
262
|
|
263
|
for (DescriptionElementBase debToRemove : elementsToRemove){
|
264
|
existingSourceDescription.removeElement(debToRemove);
|
265
|
}
|
266
|
|
267
|
}
|
268
|
|
269
|
@SuppressWarnings("unchecked")
|
270
|
private <T extends DescriptionBase<?>> void addSourceDescriptionToDescribedEntity(T sourceDescription) {
|
271
|
((IDescribable<T>)sourceDescription.describedEntity()).addDescription(sourceDescription);
|
272
|
}
|
273
|
@SuppressWarnings("unchecked")
|
274
|
private <T extends DescriptionBase<?>> void removeSourceDescriptionFromDescribedEntity(T sourceDescription) {
|
275
|
((IDescribable<T>)sourceDescription.describedEntity()).removeDescription(sourceDescription);
|
276
|
}
|
277
|
|
278
|
private IdentifiableSource findSourceCandidate(TaxonDescription targetDescription, IdentifiableSource newSource) {
|
279
|
for (IdentifiableSource existingSource : targetDescription.getSources()){
|
280
|
boolean isCandidate = isCandidateForSourceReuse(existingSource, newSource);
|
281
|
if (isCandidate){
|
282
|
return existingSource;
|
283
|
}
|
284
|
}
|
285
|
return null;
|
286
|
}
|
287
|
|
288
|
private boolean isCandidateForSourceReuse(IdentifiableSource existingSource, IdentifiableSource newSource) {
|
289
|
if (newSource.getCdmSource()!= null){
|
290
|
if (existingSource.getCdmSource() == null){
|
291
|
return false;
|
292
|
}else {
|
293
|
ICdmBase newTarget = CdmBase.deproxy(newSource.getCdmSource());
|
294
|
ICdmBase existingTarget = CdmBase.deproxy((CdmBase)existingSource.getCdmSource());
|
295
|
if (!newTarget.getClass().equals(existingTarget.getClass())){
|
296
|
return false;
|
297
|
}else{
|
298
|
if (newTarget instanceof SpecimenDescription){
|
299
|
SpecimenOrObservationBase<?> newSob = ((SpecimenDescription)newTarget).getDescribedSpecimenOrObservation();
|
300
|
SpecimenOrObservationBase<?> existingSob = ((SpecimenDescription)existingTarget).getDescribedSpecimenOrObservation();
|
301
|
//for now reuse is possible if both are descriptions for the same specimen
|
302
|
return newSob != null && newSob.equals(existingSob);
|
303
|
}else if (newTarget instanceof TaxonDescription){
|
304
|
Taxon newTaxon = ((TaxonDescription)newTarget).getTaxon();
|
305
|
Taxon existingTaxon = ((TaxonDescription)existingTarget).getTaxon();
|
306
|
//for now reuse is possible if both are descriptions for the same taxon
|
307
|
return newTaxon != null && newTaxon.equals(existingTaxon);
|
308
|
}else if (newTarget instanceof Taxon){
|
309
|
return newTarget.equals(existingTarget);
|
310
|
}else{
|
311
|
throw new AggregationException("Other classes then SpecimenDescription and TaxonDescription are not yet supported. But was: " + newTarget.getClass());
|
312
|
}
|
313
|
}
|
314
|
}
|
315
|
}
|
316
|
|
317
|
return false;
|
318
|
}
|
319
|
|
320
|
private <T extends DescriptionBase<?>> T cloneNewSourceDescription(T newSourceDescription) {
|
321
|
if (!getConfig().isCloneAggregatedSourceDescriptions() && newSourceDescription.isAggregatedStructuredDescription()){
|
322
|
return newSourceDescription;
|
323
|
}
|
324
|
@SuppressWarnings("unchecked")
|
325
|
T clonedDescription = (T)newSourceDescription.clone();
|
326
|
// clonedDescription.removeSources();
|
327
|
clonedDescription.removeDescriptiveDataSet(dataSet);
|
328
|
clonedDescription.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
|
329
|
clonedDescription.setTitleCache("Clone: " + clonedDescription.getTitleCache(), true);
|
330
|
return clonedDescription;
|
331
|
}
|
332
|
|
333
|
private <S extends DescriptionElementBase> void mergeDescriptionElements(TaxonDescription targetDescription,
|
334
|
Map<Feature, ? extends DescriptionElementBase> newElementsMap, Class<? extends DescriptionElementBase> debClass) {
|
335
|
|
336
|
Set<DescriptionElementBase> elementsToRemove = new HashSet<>(
|
337
|
targetDescription.getElements().stream()
|
338
|
.filter(el->el.isInstanceOf(debClass))
|
339
|
.collect(Collectors.toSet()));
|
340
|
|
341
|
//for each character in "characters of new elements"
|
342
|
for (Feature characterNew : newElementsMap.keySet()) {
|
343
|
|
344
|
//if elements for this character exist in old data, remember any of them to keep (in clean data there should be only max. 1
|
345
|
DescriptionElementBase elementToStay = null;
|
346
|
for (DescriptionElementBase existingDeb : elementsToRemove) {
|
347
|
if(existingDeb.getFeature().equals(characterNew)){
|
348
|
elementToStay = existingDeb;
|
349
|
elementsToRemove.remove(existingDeb);
|
350
|
break;
|
351
|
}
|
352
|
}
|
353
|
|
354
|
//if there is no element for this character in old data, add the new element for this character to the target description (otherwise reuse old element)
|
355
|
if (elementToStay == null){
|
356
|
targetDescription.addElement(newElementsMap.get(characterNew));
|
357
|
}else{
|
358
|
mergeDescriptionElement(elementToStay, newElementsMap.get(characterNew));
|
359
|
}
|
360
|
}
|
361
|
|
362
|
//remove all elements not needed anymore
|
363
|
for(DescriptionElementBase elementToRemove : elementsToRemove){
|
364
|
targetDescription.removeElement(elementToRemove);
|
365
|
}
|
366
|
}
|
367
|
|
368
|
private void mergeDescriptionElement(DescriptionElementBase targetElement,
|
369
|
DescriptionElementBase newElement) {
|
370
|
|
371
|
targetElement = CdmBase.deproxy(targetElement);
|
372
|
newElement = CdmBase.deproxy(newElement);
|
373
|
if (targetElement instanceof CategoricalData){
|
374
|
mergeDescriptionElement((CategoricalData)targetElement, (CategoricalData)newElement);
|
375
|
}else if (targetElement.isInstanceOf(QuantitativeData.class)){
|
376
|
mergeDescriptionElement((QuantitativeData)targetElement, (QuantitativeData)newElement);
|
377
|
}else{
|
378
|
throw new AggregationException("Class not supported: " + targetElement.getClass().getName());
|
379
|
}
|
380
|
}
|
381
|
|
382
|
private void mergeDescriptionElement(CategoricalData elementToStay,
|
383
|
CategoricalData newElement) {
|
384
|
List<StateData> oldData = new ArrayList<>(elementToStay.getStateData());
|
385
|
List<StateData> newData = new ArrayList<>(newElement.getStateData());
|
386
|
for (StateData newStateData : newData){
|
387
|
State state = newStateData.getState();
|
388
|
StateData oldStateData = firstByState(state, oldData);
|
389
|
if (oldStateData != null){
|
390
|
//for now only state and count is used for aggregation, below code needs to be adapted if this changes
|
391
|
oldStateData.setCount(newStateData.getCount());
|
392
|
oldData.remove(oldStateData);
|
393
|
}else{
|
394
|
elementToStay.addStateData(newStateData);
|
395
|
}
|
396
|
}
|
397
|
for (StateData stateDataToRemove : oldData){
|
398
|
elementToStay.removeStateData(stateDataToRemove);
|
399
|
}
|
400
|
}
|
401
|
|
402
|
private StateData firstByState(State state, List<StateData> oldData) {
|
403
|
if (state == null){
|
404
|
return null;
|
405
|
}
|
406
|
for (StateData sd : oldData){
|
407
|
if (state.equals(sd.getState())){
|
408
|
return sd;
|
409
|
}
|
410
|
}
|
411
|
return null;
|
412
|
}
|
413
|
|
414
|
private void mergeDescriptionElement(QuantitativeData elementToStay,
|
415
|
QuantitativeData newElement) {
|
416
|
Set<StatisticalMeasurementValue> oldValues = new HashSet<>(elementToStay.getStatisticalValues());
|
417
|
Set<StatisticalMeasurementValue> newValues = new HashSet<>(newElement.getStatisticalValues());
|
418
|
for (StatisticalMeasurementValue newValue : newValues){
|
419
|
StatisticalMeasure type = newValue.getType();
|
420
|
StatisticalMeasurementValue oldValue = firstValueByType(type, oldValues);
|
421
|
if (oldValue != null){
|
422
|
//for now only state and count is used for aggregation, below code needs to be adapted if this changes
|
423
|
oldValue.setValue(newValue.getValue());
|
424
|
oldValues.remove(oldValue);
|
425
|
}else{
|
426
|
elementToStay.addStatisticalValue(newValue);
|
427
|
}
|
428
|
}
|
429
|
for (StatisticalMeasurementValue valueToRemove : oldValues){
|
430
|
elementToStay.removeStatisticalValue(valueToRemove);
|
431
|
}
|
432
|
}
|
433
|
|
434
|
private StatisticalMeasurementValue firstValueByType(StatisticalMeasure type, Set<StatisticalMeasurementValue> oldValues) {
|
435
|
if (type == null){
|
436
|
return null;
|
437
|
}
|
438
|
for (StatisticalMeasurementValue value : oldValues){
|
439
|
if (type.equals(value.getType())){
|
440
|
return value;
|
441
|
}
|
442
|
}
|
443
|
return null;
|
444
|
}
|
445
|
|
446
|
@Override
|
447
|
protected void initTransaction() {
|
448
|
dataSet = getDescriptiveDatasetService().load(getConfig().getDatasetUuid());
|
449
|
}
|
450
|
|
451
|
@Override
|
452
|
protected void removeDescriptionIfEmpty(TaxonDescription description, ResultHolder resultHolder) {
|
453
|
super.removeDescriptionIfEmpty(description, resultHolder);
|
454
|
if (description.getElements().isEmpty()){
|
455
|
dataSet.removeDescription(description);
|
456
|
}
|
457
|
}
|
458
|
|
459
|
@Override
|
460
|
protected void aggregateToParentTaxon(TaxonNode taxonNode,
|
461
|
ResultHolder resultHolder,
|
462
|
Set<TaxonDescription> excludedDescriptions) {
|
463
|
StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
|
464
|
Set<TaxonDescription> childDescriptions = getChildTaxonDescriptions(taxonNode, dataSet);
|
465
|
addDescriptionToResultHolder(descriptiveResultHolder, childDescriptions, AggregationMode.ToParent);
|
466
|
}
|
467
|
|
468
|
@Override
|
469
|
protected void aggregateWithinSingleTaxon(Taxon taxon,
|
470
|
ResultHolder resultHolder,
|
471
|
Set<TaxonDescription> excludedDescriptions) {
|
472
|
StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
|
473
|
|
474
|
//specimen descriptions
|
475
|
Set<SpecimenDescription> specimenDescriptions = getSpecimenDescriptions(taxon, dataSet);
|
476
|
addDescriptionToResultHolder(descriptiveResultHolder, specimenDescriptions, AggregationMode.WithinTaxon);
|
477
|
|
478
|
//"literature" descriptions
|
479
|
if (getConfig().isIncludeLiterature()){
|
480
|
Set<TaxonDescription> literatureDescriptions = getLiteratureDescriptions(taxon, dataSet);
|
481
|
addDescriptionToResultHolder(descriptiveResultHolder, literatureDescriptions, AggregationMode.WithinTaxon);
|
482
|
}
|
483
|
|
484
|
//"default" descriptions
|
485
|
//TODO add default descriptions
|
486
|
//xxx
|
487
|
|
488
|
}
|
489
|
|
490
|
private void addDescriptionToResultHolder(StructuredDescriptionResultHolder descriptiveResultHolder,
|
491
|
Set<? extends DescriptionBase<?>> specimenLiteraturOrDefaultDescriptions,
|
492
|
AggregationMode aggregationMode) {
|
493
|
|
494
|
boolean descriptionWasUsed = false;
|
495
|
for (DescriptionBase<?> desc: specimenLiteraturOrDefaultDescriptions){
|
496
|
for (DescriptionElementBase deb: desc.getElements()){
|
497
|
if (hasCharacterData(deb)){
|
498
|
if (deb.isInstanceOf(CategoricalData.class)){
|
499
|
addToCategorical(CdmBase.deproxy(deb, CategoricalData.class), descriptiveResultHolder);
|
500
|
descriptionWasUsed = true;
|
501
|
}else if (deb.isInstanceOf(QuantitativeData.class)){
|
502
|
addToQuantitativData(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
|
503
|
descriptionWasUsed = true;
|
504
|
}
|
505
|
}
|
506
|
}
|
507
|
|
508
|
//sources
|
509
|
AggregationSourceMode sourceMode = getConfig().getSourceMode(aggregationMode);
|
510
|
if(descriptionWasUsed && sourceMode != AggregationSourceMode.NONE){
|
511
|
IdentifiableSource source = IdentifiableSource.NewAggregationSourceInstance();
|
512
|
desc = CdmBase.deproxy(desc);
|
513
|
|
514
|
switch (sourceMode){
|
515
|
case DESCRIPTION:
|
516
|
DescriptionBase<?> clonedDesc = cloneNewSourceDescription(desc);
|
517
|
source.setCdmSource(clonedDesc);
|
518
|
break;
|
519
|
case TAXON:
|
520
|
if (desc instanceof TaxonDescription){
|
521
|
Taxon taxon = ((TaxonDescription) desc).getTaxon();
|
522
|
source.setCdmSource(taxon);
|
523
|
}else {
|
524
|
throw new AggregationException("Description type not yet supported for aggregation source mode TAXON: " + desc.getClass().getSimpleName() );
|
525
|
}
|
526
|
break;
|
527
|
case NONE:
|
528
|
source = null;
|
529
|
break;
|
530
|
case ALL: //not yet supported
|
531
|
throw new AggregationException("Source mode not yet supported: " + sourceMode);
|
532
|
case ALL_SAMEVALUE: //makes no sense
|
533
|
throw new AggregationException("Illegal source mode: " + sourceMode);
|
534
|
default:
|
535
|
throw new AggregationException("Source mode not supported: " + sourceMode);
|
536
|
}
|
537
|
if (source != null){
|
538
|
descriptiveResultHolder.sources.add(source);
|
539
|
}
|
540
|
}
|
541
|
}
|
542
|
}
|
543
|
|
544
|
private void addToQuantitativData(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
|
545
|
QuantitativeData aggregatedQuantitativeData = resultHolder.quantitativeMap.get(qd.getFeature());
|
546
|
if(aggregatedQuantitativeData==null){
|
547
|
// no QuantitativeData with this feature in aggregation
|
548
|
aggregatedQuantitativeData = aggregateWithinQuantitativeData(qd);
|
549
|
}
|
550
|
else{
|
551
|
aggregatedQuantitativeData = addToExistingQuantitativeData(aggregatedQuantitativeData, qd);
|
552
|
}
|
553
|
if (aggregatedQuantitativeData != null){
|
554
|
resultHolder.quantitativeMap.put(qd.getFeature(), aggregatedQuantitativeData);
|
555
|
}
|
556
|
}
|
557
|
|
558
|
private void addToCategorical(CategoricalData cd, StructuredDescriptionResultHolder resultHolder) {
|
559
|
CategoricalData aggregatedCategoricalData = resultHolder.categoricalMap.get(cd.getFeature());
|
560
|
if(aggregatedCategoricalData==null){
|
561
|
// no CategoricalData with this feature in aggregation
|
562
|
aggregatedCategoricalData = cd.clone();
|
563
|
// set count to 1 if not set
|
564
|
aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
|
565
|
resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
|
566
|
}
|
567
|
else{
|
568
|
// split all StateData into those where the state already exists and those where it doesn't
|
569
|
List<State> statesOnly = aggregatedCategoricalData.getStatesOnly();
|
570
|
List<StateData> sdWithExistingStateInAggregation = cd.getStateData().stream().filter(sd->statesOnly.contains(sd.getState())).collect(Collectors.toList());
|
571
|
List<StateData> sdWithNoExistingStateInAggregation = cd.getStateData().stream().filter(sd->!statesOnly.contains(sd.getState())).collect(Collectors.toList());
|
572
|
|
573
|
for (StateData sd : sdWithNoExistingStateInAggregation) {
|
574
|
StateData clone = sd.clone();
|
575
|
// set count to 1 if not set
|
576
|
if(clone.getCount()==null){
|
577
|
clone.incrementCount();
|
578
|
}
|
579
|
aggregatedCategoricalData.addStateData(clone);
|
580
|
}
|
581
|
|
582
|
for (StateData sdExist : sdWithExistingStateInAggregation) {
|
583
|
List<StateData> aggregatedSameStateData = aggregatedCategoricalData.getStateData().stream()
|
584
|
.filter(sd->hasSameState(sdExist, sd))
|
585
|
.collect(Collectors.toList());
|
586
|
for (StateData stateData : aggregatedSameStateData) {
|
587
|
if(sdExist.getCount()==null){
|
588
|
stateData.incrementCount();
|
589
|
}
|
590
|
else{
|
591
|
stateData.setCount(stateData.getCount()+sdExist.getCount());
|
592
|
}
|
593
|
}
|
594
|
}
|
595
|
}
|
596
|
}
|
597
|
|
598
|
@Override
|
599
|
protected StructuredDescriptionResultHolder createResultHolder() {
|
600
|
return new StructuredDescriptionResultHolder();
|
601
|
}
|
602
|
|
603
|
private class StructuredDescriptionResultHolder extends ResultHolder{
|
604
|
private Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
|
605
|
private Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
|
606
|
private Set<IdentifiableSource> sources = new HashSet<>();
|
607
|
@Override
|
608
|
public String toString() {
|
609
|
return "SDResultHolder [categoricals=" + categoricalMap.size()
|
610
|
+ ", quantitatives=" + quantitativeMap.size()
|
611
|
+ ", sources=" + sources.size()
|
612
|
+ ", descriptionsToDelete=" + this.descriptionsToDelete.size()
|
613
|
+ "]";
|
614
|
}
|
615
|
}
|
616
|
|
617
|
private Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
|
618
|
Set<TaxonDescription> result = new HashSet<>();
|
619
|
List<TaxonNode> childNodes = taxonNode.getChildNodes();
|
620
|
for (TaxonNode childNode : childNodes) {
|
621
|
Set<TaxonDescription> childDescriptions = childNode.getTaxon().getDescriptions();
|
622
|
result.addAll(childDescriptions.stream()
|
623
|
.filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
|
624
|
.filter(desc->dataSet.getDescriptions().contains(desc))
|
625
|
.collect(Collectors.toSet()));
|
626
|
}
|
627
|
return result;
|
628
|
}
|
629
|
|
630
|
/**
|
631
|
* Computes all specimen attached to the given taxon within the given dataSet.
|
632
|
* For these secimen it returns all attache
|
633
|
* */
|
634
|
private Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
|
635
|
Set<SpecimenDescription> result = new HashSet<>();
|
636
|
//TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
|
637
|
for (TaxonDescription taxonDesc: taxon.getDescriptions()){
|
638
|
for (DescriptionElementBase taxonDeb : taxonDesc.getElements()){
|
639
|
if (taxonDeb.isInstanceOf(IndividualsAssociation.class)){
|
640
|
IndividualsAssociation indAss = CdmBase.deproxy(taxonDeb, IndividualsAssociation.class);
|
641
|
SpecimenOrObservationBase<?> specimen = indAss.getAssociatedSpecimenOrObservation();
|
642
|
Set<SpecimenDescription> descriptions = specimen.getSpecimenDescriptions();
|
643
|
for(SpecimenDescription specimenDescription : descriptions){
|
644
|
if(dataSet.getDescriptions().contains(specimenDescription) &&
|
645
|
specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
|
646
|
result.add(specimenDescription);
|
647
|
}
|
648
|
}
|
649
|
}
|
650
|
}
|
651
|
}
|
652
|
return result;
|
653
|
}
|
654
|
|
655
|
private Set<TaxonDescription> getLiteratureDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
|
656
|
Set<TaxonDescription> result = new HashSet<>();
|
657
|
//TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
|
658
|
for(TaxonDescription taxonDescription : taxon.getDescriptions()){
|
659
|
if(dataSet.getDescriptions().contains(taxonDescription)
|
660
|
&& taxonDescription.getTypes().stream().anyMatch(type->type.equals(DescriptionType.SECONDARY_DATA))
|
661
|
&& taxonDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE)) ){
|
662
|
result.add(taxonDescription);
|
663
|
}
|
664
|
}
|
665
|
return result;
|
666
|
}
|
667
|
|
668
|
/**
|
669
|
* Evaluates statistics for exact values collection and handles missing min and max values
|
670
|
*/
|
671
|
private QuantitativeData aggregateWithinQuantitativeData(QuantitativeData sourceQd){
|
672
|
QuantitativeData aggQD = QuantitativeData.NewInstance(sourceQd.getFeature());
|
673
|
aggQD.setUnit(sourceQd.getUnit());
|
674
|
Set<BigDecimal> exactValues = sourceQd.getExactValues();
|
675
|
if(!exactValues.isEmpty()){
|
676
|
// qd is not already aggregated
|
677
|
Comparator<BigDecimal> comp = Comparator.naturalOrder();
|
678
|
int exactValueSampleSize = exactValues.size();
|
679
|
BigDecimal exactValueMin = exactValues.stream().min(comp).get();
|
680
|
BigDecimal exactValueMax = exactValues.stream().max(comp).get();
|
681
|
BigDecimal exactValueAvg = BigDecimalUtil.average(exactValues);
|
682
|
//TODO also check for typical boundary data
|
683
|
if(sourceQd.getMin() == null && sourceQd.getMax() == null){
|
684
|
aggQD.setSampleSize(new BigDecimal(exactValueSampleSize), null);
|
685
|
aggQD.setAverage(exactValueAvg, null);
|
686
|
}
|
687
|
aggQD.setMinimum(sourceQd.getMin() == null ? exactValueMin: sourceQd.getMin().min(exactValueMin), null);
|
688
|
aggQD.setMaximum(sourceQd.getMax() == null ? exactValueMax: sourceQd.getMax().max(exactValueMax), null);
|
689
|
}
|
690
|
else{
|
691
|
// qd has only min, max, ... but no exact values
|
692
|
aggQD = sourceQd.clone();
|
693
|
aggQD = handleMissingValues(aggQD);
|
694
|
}
|
695
|
return aggQD;
|
696
|
}
|
697
|
|
698
|
private QuantitativeData handleMissingValues(QuantitativeData qd) {
|
699
|
//min max
|
700
|
qd = handleMissingMinOrMax(qd);
|
701
|
//average
|
702
|
if (qd != null && qd.getAverage() == null){
|
703
|
BigDecimal n = qd.getSampleSize();
|
704
|
if(n != null && !n.equals(0f)){
|
705
|
BigDecimal average = (qd.getMax().add(qd.getMin())).divide(n);
|
706
|
qd.setAverage(average, null);
|
707
|
}
|
708
|
}
|
709
|
return qd;
|
710
|
}
|
711
|
|
712
|
private QuantitativeData handleMissingMinOrMax(QuantitativeData qd) {
|
713
|
return handleMissingMinOrMax(qd, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
|
714
|
}
|
715
|
|
716
|
public static QuantitativeData handleMissingMinOrMax(QuantitativeData aggQD, MissingMinimumMode missingMinMode,
|
717
|
MissingMaximumMode missingMaxMode) {
|
718
|
if(aggQD.getMin() == null && aggQD.getMax() != null){
|
719
|
if (missingMinMode == MissingMinimumMode.MinToZero) {
|
720
|
aggQD.setMinimum(BigDecimal.valueOf(0f), null);
|
721
|
}else if (missingMinMode == MissingMinimumMode.MinToMax){
|
722
|
aggQD.setMinimum(aggQD.getMax(), null);
|
723
|
}else if (missingMinMode == MissingMinimumMode.SkipRecord){
|
724
|
return null;
|
725
|
}
|
726
|
}
|
727
|
if(aggQD.getMax() == null && aggQD.getMin() != null){
|
728
|
if (missingMaxMode == MissingMaximumMode.MaxToMin){
|
729
|
aggQD.setMaximum(aggQD.getMin(), null);
|
730
|
}else if (missingMaxMode == MissingMaximumMode.SkipRecord){
|
731
|
return null;
|
732
|
}
|
733
|
}
|
734
|
return aggQD;
|
735
|
}
|
736
|
|
737
|
private QuantitativeData addToExistingQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
|
738
|
|
739
|
newQd = aggregateWithinQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a clear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
|
740
|
|
741
|
BigDecimal min = null;
|
742
|
BigDecimal max = null;
|
743
|
BigDecimal average = null;
|
744
|
BigDecimal sampleSize = null;
|
745
|
newQd = handleMissingValues(newQd);
|
746
|
if (newQd == null){
|
747
|
return aggQd;
|
748
|
}
|
749
|
min = aggQd.getMin().min(newQd.getMin());
|
750
|
max = aggQd.getMax().max(newQd.getMax());
|
751
|
if (newQd.getSampleSize() != null && aggQd.getSampleSize() != null){
|
752
|
sampleSize = newQd.getSampleSize().add(aggQd.getSampleSize());
|
753
|
}
|
754
|
if (sampleSize != null && !sampleSize.equals(0f) && aggQd.getAverage() != null && newQd.getAverage() != null){
|
755
|
BigDecimal aggTotalSum = aggQd.getAverage().multiply(aggQd.getSampleSize(), MathContext.DECIMAL32);
|
756
|
BigDecimal newTotalSum = newQd.getAverage().multiply(newQd.getSampleSize(), MathContext.DECIMAL32);
|
757
|
BigDecimal totalSum = aggTotalSum.add(newTotalSum);
|
758
|
average = totalSum.divide(sampleSize, MathContext.DECIMAL32).stripTrailingZeros(); //to be discussed if we really want to reduce precision here, however, due to the current way to compute average we do not have exact precision anyway
|
759
|
}
|
760
|
aggQd.setMinimum(min, null);
|
761
|
aggQd.setMaximum(max, null);
|
762
|
aggQd.setSampleSize(sampleSize, null);
|
763
|
aggQd.setAverage(average, null);
|
764
|
return aggQd;
|
765
|
}
|
766
|
|
767
|
private static List<BigDecimal> getExactValues(QuantitativeData qd) {
|
768
|
List<BigDecimal> exactValues = qd.getStatisticalValues().stream()
|
769
|
.filter(value->value.getType().equals(StatisticalMeasure.EXACT_VALUE()))
|
770
|
.map(exact->exact.getValue())
|
771
|
.collect(Collectors.toList());
|
772
|
return exactValues;
|
773
|
}
|
774
|
|
775
|
private static boolean hasSameState(StateData sd1, StateData sd2) {
|
776
|
if (sd2.getState() == null || sd1.getState() == null){
|
777
|
return false;
|
778
|
}else{
|
779
|
return sd2.getState().getUuid().equals(sd1.getState().getUuid());
|
780
|
}
|
781
|
}
|
782
|
}
|