1
|
/**
|
2
|
* Copyright (C) 2019 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.api.service.description;
|
10
|
|
11
|
import java.math.BigDecimal;
|
12
|
import java.math.MathContext;
|
13
|
import java.util.ArrayList;
|
14
|
import java.util.Comparator;
|
15
|
import java.util.HashMap;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
import java.util.Optional;
|
20
|
import java.util.Set;
|
21
|
import java.util.stream.Collectors;
|
22
|
|
23
|
import eu.etaxonomy.cdm.common.BigDecimalUtil;
|
24
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
25
|
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
|
26
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
27
|
import eu.etaxonomy.cdm.model.common.ICdmBase;
|
28
|
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
|
29
|
import eu.etaxonomy.cdm.model.description.CategoricalData;
|
30
|
import eu.etaxonomy.cdm.model.description.DescriptionBase;
|
31
|
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
32
|
import eu.etaxonomy.cdm.model.description.DescriptionType;
|
33
|
import eu.etaxonomy.cdm.model.description.DescriptiveDataSet;
|
34
|
import eu.etaxonomy.cdm.model.description.Feature;
|
35
|
import eu.etaxonomy.cdm.model.description.IDescribable;
|
36
|
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
|
37
|
import eu.etaxonomy.cdm.model.description.QuantitativeData;
|
38
|
import eu.etaxonomy.cdm.model.description.SpecimenDescription;
|
39
|
import eu.etaxonomy.cdm.model.description.State;
|
40
|
import eu.etaxonomy.cdm.model.description.StateData;
|
41
|
import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
|
42
|
import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
|
43
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
44
|
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
|
45
|
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
|
46
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
47
|
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
|
48
|
|
49
|
/**
|
50
|
* Aggregates the character data for a given {@link DescriptiveDataSet}.<br>
|
51
|
* <br>
|
52
|
* For all {@link SpecimenDescription}s belonging to this data set a new
|
53
|
* aggregated {@link TaxonDescription}s are created for every taxon the
|
54
|
* specimens are directly associated with.<BR>
|
55
|
* Also lower rank taxon descriptions are aggregated to upper rank taxa.
|
56
|
*
|
57
|
* @author a.mueller
|
58
|
* @author p.plitzner
|
59
|
* @since 03.11.2019
|
60
|
*/
|
61
|
public class StructuredDescriptionAggregation
|
62
|
extends DescriptionAggregationBase<StructuredDescriptionAggregation, StructuredDescriptionAggregationConfiguration>{
|
63
|
|
64
|
private DescriptiveDataSet dataSet;
|
65
|
|
66
|
@Override
|
67
|
protected String pluralDataType(){
|
68
|
return "structured descriptive data";
|
69
|
}
|
70
|
|
71
|
@Override
|
72
|
protected void preAggregate(IProgressMonitor monitor) {
|
73
|
monitor.subTask("preAccumulate - nothing to do");
|
74
|
|
75
|
// take start time for performance testing
|
76
|
double start = System.currentTimeMillis();
|
77
|
|
78
|
getResult().setCdmEntity(getDescriptiveDatasetService().load(getConfig().getDatasetUuid()));
|
79
|
|
80
|
double end1 = System.currentTimeMillis();
|
81
|
logger.info("Time elapsed for pre-accumulate() : " + (end1 - start) / (1000) + "s");
|
82
|
}
|
83
|
|
84
|
@Override
|
85
|
protected void verifyConfiguration(IProgressMonitor monitor){
|
86
|
if (!AggregationSourceMode.list(AggregationMode.ToParent, AggregationType.StructuredDescription)
|
87
|
.contains(getConfig().getToParentSourceMode())){
|
88
|
throw new AggregationException("Unsupported source mode for to-parent aggregation: " + getConfig().getToParentSourceMode());
|
89
|
}
|
90
|
if (!AggregationSourceMode.list(AggregationMode.WithinTaxon, AggregationType.StructuredDescription)
|
91
|
.contains(getConfig().getWithinTaxonSourceMode())){
|
92
|
throw new AggregationException("Unsupported source mode for within-taxon aggregation: " + getConfig().getWithinTaxonSourceMode());
|
93
|
}
|
94
|
}
|
95
|
|
96
|
private boolean hasCharacterData(DescriptionElementBase element) {
|
97
|
return hasCategoricalData(element) || hasQuantitativeData(element);
|
98
|
}
|
99
|
|
100
|
private boolean hasQuantitativeData(DescriptionElementBase element) {
|
101
|
if(element instanceof QuantitativeData
|
102
|
&& !((QuantitativeData) element).getStatisticalValues().isEmpty()){
|
103
|
QuantitativeData quantitativeData = (QuantitativeData)element;
|
104
|
return !getExactValues(quantitativeData).isEmpty()
|
105
|
|| quantitativeData.getMin()!=null
|
106
|
|| quantitativeData.getMax()!=null;
|
107
|
}
|
108
|
return false;
|
109
|
}
|
110
|
|
111
|
private boolean hasCategoricalData(DescriptionElementBase element) {
|
112
|
return element instanceof CategoricalData && !((CategoricalData) element).getStatesOnly().isEmpty();
|
113
|
}
|
114
|
|
115
|
@Override
|
116
|
protected void setDescriptionTitle(TaxonDescription description, Taxon taxon) {
|
117
|
String title = taxon.getName() != null? taxon.getName().getTitleCache() : taxon.getTitleCache();
|
118
|
description.setTitleCache("Aggregated description for " + title, true);
|
119
|
return;
|
120
|
}
|
121
|
|
122
|
@Override
|
123
|
protected TaxonDescription createNewDescription(Taxon taxon) {
|
124
|
String title = taxon.getTitleCache();
|
125
|
if (logger.isDebugEnabled()){logger.debug("creating new description for " + title);}
|
126
|
TaxonDescription description = TaxonDescription.NewInstance(taxon);
|
127
|
description.addType(DescriptionType.AGGREGATED_STRUC_DESC);
|
128
|
setDescriptionTitle(description, taxon);
|
129
|
return description;
|
130
|
}
|
131
|
|
132
|
@Override
|
133
|
protected boolean hasDescriptionType(TaxonDescription description) {
|
134
|
return dataSet.getDescriptions().contains(description) && description.isAggregatedStructuredDescription();
|
135
|
}
|
136
|
|
137
|
@Override
|
138
|
protected List<String> descriptionInitStrategy() {
|
139
|
return new ArrayList<>();
|
140
|
}
|
141
|
|
142
|
@Override
|
143
|
protected boolean mergeAggregationResultIntoTargetDescription(TaxonDescription targetDescription,
|
144
|
ResultHolder resultHolder) {
|
145
|
|
146
|
StructuredDescriptionResultHolder structuredResultHolder = (StructuredDescriptionResultHolder)resultHolder;
|
147
|
boolean updated = mergeDescriptionElements(targetDescription, structuredResultHolder.categoricalMap, CategoricalData.class);
|
148
|
updated |= mergeDescriptionElements(targetDescription, structuredResultHolder.quantitativeMap, QuantitativeData.class);
|
149
|
updated |= mergeDescriptionSources(targetDescription, structuredResultHolder);
|
150
|
|
151
|
if(!targetDescription.getElements().isEmpty()){
|
152
|
dataSet.addDescription(targetDescription);
|
153
|
}else{
|
154
|
dataSet.removeDescription(targetDescription);
|
155
|
}
|
156
|
return updated;
|
157
|
}
|
158
|
|
159
|
@Override
|
160
|
protected boolean isRelevantDescriptionElement(DescriptionElementBase deb){
|
161
|
return deb.isInstanceOf(CategoricalData.class) || deb.isInstanceOf(QuantitativeData.class);
|
162
|
}
|
163
|
|
164
|
private <T extends DescriptionBase<?>> boolean mergeDescriptionSources(TaxonDescription targetDescription,
|
165
|
StructuredDescriptionResultHolder structuredResultHolder) {
|
166
|
|
167
|
boolean updated = false;
|
168
|
//Remove sources from description
|
169
|
Set<IdentifiableSource> sourcesToRemove = targetDescription.getSources().stream()
|
170
|
.filter(source->source.getType().equals(OriginalSourceType.Aggregation))
|
171
|
.collect(Collectors.toSet());
|
172
|
|
173
|
Set<IdentifiableSource> newSources = structuredResultHolder.sources;
|
174
|
for (IdentifiableSource newSource : newSources) {
|
175
|
IdentifiableSource mergeSourceCandidate = findSourceCandidate(targetDescription, newSource);
|
176
|
if (mergeSourceCandidate == null){
|
177
|
addNewSource(targetDescription, newSource);
|
178
|
updated = true;
|
179
|
}else{
|
180
|
updated |= mergeSource(mergeSourceCandidate, newSource);
|
181
|
sourcesToRemove.remove(mergeSourceCandidate);
|
182
|
}
|
183
|
}
|
184
|
|
185
|
//remove remaining sources-to-be-removed
|
186
|
for (IdentifiableSource sourceToRemove : sourcesToRemove) {
|
187
|
targetDescription.removeSource(sourceToRemove);
|
188
|
updated |= sourceToRemove.isPersited();
|
189
|
ICdmBase target = CdmBase.deproxy(sourceToRemove.getCdmSource());
|
190
|
if (target != null){
|
191
|
sourceToRemove.setCdmSource(null); //workaround for missing orphan removal #9801
|
192
|
if (target instanceof DescriptionBase){
|
193
|
@SuppressWarnings("unchecked")
|
194
|
T descriptionToDelete = (T)target;
|
195
|
if (descriptionToDelete.isCloneForSource()){
|
196
|
//TODO maybe this is not really needed as it is later done anyway with .deltedDescription
|
197
|
//but currently this still leads to a re-saved by cascade exception
|
198
|
((IDescribable<T>)descriptionToDelete.describedEntity()).removeDescription(descriptionToDelete);
|
199
|
structuredResultHolder.descriptionsToDelete.add(descriptionToDelete);
|
200
|
}
|
201
|
} else if (target.isInstanceOf(Taxon.class)){
|
202
|
//nothing to do for now
|
203
|
} else {
|
204
|
throw new AggregationException("CdmLink target type not yet supported: " + target.getClass().getSimpleName());
|
205
|
}
|
206
|
}
|
207
|
}
|
208
|
return updated;
|
209
|
}
|
210
|
|
211
|
private <T extends DescriptionBase<?>> void addNewSource(TaxonDescription targetDescription,
|
212
|
IdentifiableSource newSource) {
|
213
|
|
214
|
//add source
|
215
|
targetDescription.addSource(newSource);
|
216
|
//if it is a description add it to the described entity (specimen, taxon)
|
217
|
ICdmBase target = newSource.getCdmSource();
|
218
|
if (target != null){
|
219
|
if (target.isInstanceOf(DescriptionBase.class)){
|
220
|
@SuppressWarnings("unchecked")
|
221
|
T description = (T)CdmBase.deproxy(target);
|
222
|
((IDescribable<T>)description.describedEntity()).addDescription(description);
|
223
|
}
|
224
|
}
|
225
|
}
|
226
|
|
227
|
//mergeablity has been checked before
|
228
|
private <T extends DescriptionBase<?>> boolean mergeSource(IdentifiableSource mergeCandidate, IdentifiableSource newSource) {
|
229
|
|
230
|
boolean updated = false;
|
231
|
ICdmBase newTarget = newSource.getCdmSource();
|
232
|
if (newTarget != null){
|
233
|
newTarget = CdmBase.deproxy(newTarget);
|
234
|
if (newTarget instanceof DescriptionBase){
|
235
|
@SuppressWarnings("unchecked")
|
236
|
T newTargetDesc = (T)newTarget;
|
237
|
@SuppressWarnings("unchecked")
|
238
|
T existingTargetDesc = CdmBase.deproxy((T)mergeCandidate.getCdmSource());
|
239
|
updated |= mergeSourceDescription(existingTargetDesc, newTargetDesc);
|
240
|
((IDescribable<T>)existingTargetDesc.describedEntity()).addDescription(existingTargetDesc);
|
241
|
if (!existingTargetDesc.equals(newTargetDesc)){
|
242
|
((IDescribable<T>)newTargetDesc.describedEntity()).removeDescription(newTargetDesc);
|
243
|
}
|
244
|
}else if (newTarget instanceof Taxon){
|
245
|
//nothing to do for now (we do not support reuse of sources linking to different taxa yet)
|
246
|
}else{
|
247
|
throw new AggregationException("Sources not linking to a description or a taxon instance currently not yet supported.");
|
248
|
}
|
249
|
}else{
|
250
|
throw new AggregationException("Sources not linking to another CdmBase instance currently not yet supported.");
|
251
|
}
|
252
|
return updated;
|
253
|
}
|
254
|
|
255
|
private <T extends DescriptionBase<?>> boolean mergeSourceDescription(T existingSourceDescription, T newSourceDescription) {
|
256
|
|
257
|
boolean updated = false;
|
258
|
Set<DescriptionElementBase> elementsToRemove = new HashSet<>(existingSourceDescription.getElements());
|
259
|
Set<DescriptionElementBase> newElements = new HashSet<>(newSourceDescription.getElements());
|
260
|
|
261
|
for (DescriptionElementBase newElement : newElements){
|
262
|
DescriptionElementBase newElementClone = newElement.clone();
|
263
|
Optional<DescriptionElementBase> matchingElement = elementsToRemove.stream()
|
264
|
.filter(e->e.getFeature()!= null
|
265
|
&& e.getFeature().equals(newElementClone.getFeature()))
|
266
|
.findFirst();
|
267
|
if (matchingElement.isPresent()){
|
268
|
updated |= mergeDescriptionElement(matchingElement.get(), newElementClone);
|
269
|
elementsToRemove.remove(matchingElement.get());
|
270
|
}else{
|
271
|
existingSourceDescription.addElement(newElementClone);
|
272
|
updated = true;
|
273
|
}
|
274
|
}
|
275
|
updated |= addSourceDescriptionToDescribedEntity(newSourceDescription);
|
276
|
existingSourceDescription.setTitleCache(newSourceDescription.getTitleCache(), true);
|
277
|
|
278
|
for (DescriptionElementBase debToRemove : elementsToRemove){
|
279
|
existingSourceDescription.removeElement(debToRemove);
|
280
|
updated |= debToRemove.isPersited();
|
281
|
}
|
282
|
return updated;
|
283
|
}
|
284
|
|
285
|
@SuppressWarnings("unchecked")
|
286
|
private <T extends DescriptionBase<?>> boolean addSourceDescriptionToDescribedEntity(T sourceDescription) {
|
287
|
boolean updated = false;
|
288
|
IDescribable<T> describedEntity = ((IDescribable<T>)sourceDescription.describedEntity());
|
289
|
if (describedEntity.getDescriptions().contains(sourceDescription)){
|
290
|
describedEntity.addDescription(sourceDescription);
|
291
|
updated = true;
|
292
|
}
|
293
|
return updated;
|
294
|
}
|
295
|
|
296
|
private IdentifiableSource findSourceCandidate(TaxonDescription targetDescription, IdentifiableSource newSource) {
|
297
|
for (IdentifiableSource existingSource : targetDescription.getSources()){
|
298
|
boolean isCandidate = isCandidateForSourceReuse(existingSource, newSource);
|
299
|
if (isCandidate){
|
300
|
return existingSource;
|
301
|
}
|
302
|
}
|
303
|
return null;
|
304
|
}
|
305
|
|
306
|
private boolean isCandidateForSourceReuse(IdentifiableSource existingSource, IdentifiableSource newSource) {
|
307
|
if (newSource.getCdmSource()!= null){
|
308
|
if (existingSource.getCdmSource() == null){
|
309
|
return false;
|
310
|
}else {
|
311
|
ICdmBase newTarget = CdmBase.deproxy(newSource.getCdmSource());
|
312
|
ICdmBase existingTarget = CdmBase.deproxy((CdmBase)existingSource.getCdmSource());
|
313
|
if (!newTarget.getClass().equals(existingTarget.getClass())){
|
314
|
return false;
|
315
|
}else{
|
316
|
if (newTarget instanceof SpecimenDescription){
|
317
|
SpecimenOrObservationBase<?> newSob = ((SpecimenDescription)newTarget).getDescribedSpecimenOrObservation();
|
318
|
SpecimenOrObservationBase<?> existingSob = ((SpecimenDescription)existingTarget).getDescribedSpecimenOrObservation();
|
319
|
//for now reuse is possible if both are descriptions for the same specimen
|
320
|
return newSob != null && newSob.equals(existingSob);
|
321
|
}else if (newTarget instanceof TaxonDescription){
|
322
|
Taxon newTaxon = ((TaxonDescription)newTarget).getTaxon();
|
323
|
Taxon existingTaxon = ((TaxonDescription)existingTarget).getTaxon();
|
324
|
//for now reuse is possible if both are descriptions for the same taxon
|
325
|
return newTaxon != null && newTaxon.equals(existingTaxon);
|
326
|
}else if (newTarget instanceof Taxon){
|
327
|
return newTarget.equals(existingTarget);
|
328
|
}else{
|
329
|
throw new AggregationException("Other classes then SpecimenDescription and TaxonDescription are not yet supported. But was: " + newTarget.getClass());
|
330
|
}
|
331
|
}
|
332
|
}
|
333
|
}
|
334
|
|
335
|
return false;
|
336
|
}
|
337
|
|
338
|
private <T extends DescriptionBase<?>> T cloneNewSourceDescription(T newSourceDescription) {
|
339
|
if (!getConfig().isCloneAggregatedSourceDescriptions() && newSourceDescription.isAggregatedStructuredDescription()){
|
340
|
return newSourceDescription;
|
341
|
}
|
342
|
@SuppressWarnings("unchecked")
|
343
|
T clonedDescription = (T)newSourceDescription.clone();
|
344
|
// clonedDescription.removeSources();
|
345
|
clonedDescription.removeDescriptiveDataSet(dataSet);
|
346
|
clonedDescription.getTypes().add(DescriptionType.CLONE_FOR_SOURCE);
|
347
|
clonedDescription.setTitleCache("Clone: " + clonedDescription.getTitleCache(), true);
|
348
|
return clonedDescription;
|
349
|
}
|
350
|
|
351
|
@Override
|
352
|
protected <S extends DescriptionElementBase> boolean mergeDescriptionElement(S targetElement,
|
353
|
S newElement) {
|
354
|
|
355
|
boolean updated = false;
|
356
|
targetElement = CdmBase.deproxy(targetElement);
|
357
|
newElement = CdmBase.deproxy(newElement);
|
358
|
if (targetElement instanceof CategoricalData){
|
359
|
updated |= mergeDescriptionElement((CategoricalData)targetElement, (CategoricalData)newElement);
|
360
|
}else if (targetElement.isInstanceOf(QuantitativeData.class)){
|
361
|
updated |= mergeDescriptionElement((QuantitativeData)targetElement, (QuantitativeData)newElement);
|
362
|
}else{
|
363
|
throw new AggregationException("Class not supported: " + targetElement.getClass().getName());
|
364
|
}
|
365
|
return updated;
|
366
|
}
|
367
|
|
368
|
private boolean mergeDescriptionElement(CategoricalData elementToStay,
|
369
|
CategoricalData newElement) {
|
370
|
|
371
|
boolean updated = false;
|
372
|
List<StateData> dataToRemove = new ArrayList<>(elementToStay.getStateData());
|
373
|
List<StateData> newData = new ArrayList<>(newElement.getStateData());
|
374
|
for (StateData newStateData : newData){
|
375
|
State state = newStateData.getState();
|
376
|
StateData oldStateData = firstByState(state, dataToRemove);
|
377
|
if (oldStateData != null){
|
378
|
//for now only state and count is used for aggregation, below code needs to be adapted if this changes
|
379
|
if (!CdmUtils.nullSafeEqual(oldStateData.getCount(), newStateData.getCount())){
|
380
|
oldStateData.setCount(newStateData.getCount());
|
381
|
// getResult().addUpdatedObject(oldStateData);
|
382
|
updated = true;
|
383
|
}
|
384
|
dataToRemove.remove(oldStateData);
|
385
|
}else{
|
386
|
elementToStay.addStateData(newStateData);
|
387
|
updated = true;
|
388
|
}
|
389
|
}
|
390
|
for (StateData stateDataToRemove : dataToRemove){
|
391
|
elementToStay.removeStateData(stateDataToRemove);
|
392
|
updated |= stateDataToRemove.isPersited();
|
393
|
}
|
394
|
return updated;
|
395
|
}
|
396
|
|
397
|
private StateData firstByState(State state, List<StateData> oldData) {
|
398
|
if (state == null){
|
399
|
return null;
|
400
|
}
|
401
|
for (StateData sd : oldData){
|
402
|
if (state.equals(sd.getState())){
|
403
|
return sd;
|
404
|
}
|
405
|
}
|
406
|
return null;
|
407
|
}
|
408
|
|
409
|
private boolean mergeDescriptionElement(QuantitativeData elementToStay,
|
410
|
QuantitativeData newElement) {
|
411
|
|
412
|
boolean updated = false;
|
413
|
|
414
|
Set<StatisticalMeasurementValue> oldValues = new HashSet<>(elementToStay.getStatisticalValues());
|
415
|
Set<StatisticalMeasurementValue> newValues = new HashSet<>(newElement.getStatisticalValues());
|
416
|
for (StatisticalMeasurementValue newValue : newValues){
|
417
|
StatisticalMeasure type = newValue.getType();
|
418
|
StatisticalMeasurementValue oldValue = firstValueByType(type, oldValues);
|
419
|
if (oldValue != null){
|
420
|
//for now only state and count is used for aggregation, below code needs to be adapted if this changes
|
421
|
if (!CdmUtils.nullSafeEqual(oldValue.getValue(), newValue.getValue())){
|
422
|
oldValue.setValue(newValue.getValue());
|
423
|
updated = true;
|
424
|
}
|
425
|
oldValues.remove(oldValue);
|
426
|
}else{
|
427
|
elementToStay.addStatisticalValue(newValue);
|
428
|
updated = true;
|
429
|
}
|
430
|
}
|
431
|
for (StatisticalMeasurementValue valueToRemove : oldValues){
|
432
|
elementToStay.removeStatisticalValue(valueToRemove);
|
433
|
updated |= valueToRemove.isPersited();
|
434
|
}
|
435
|
return updated;
|
436
|
}
|
437
|
|
438
|
private StatisticalMeasurementValue firstValueByType(StatisticalMeasure type, Set<StatisticalMeasurementValue> oldValues) {
|
439
|
if (type == null){
|
440
|
return null;
|
441
|
}
|
442
|
for (StatisticalMeasurementValue value : oldValues){
|
443
|
if (type.equals(value.getType())){
|
444
|
return value;
|
445
|
}
|
446
|
}
|
447
|
return null;
|
448
|
}
|
449
|
|
450
|
@Override
|
451
|
protected void initTransaction() {
|
452
|
dataSet = getDescriptiveDatasetService().load(getConfig().getDatasetUuid());
|
453
|
}
|
454
|
|
455
|
@Override
|
456
|
protected void removeDescriptionIfEmpty(TaxonDescription description, ResultHolder resultHolder) {
|
457
|
super.removeDescriptionIfEmpty(description, resultHolder);
|
458
|
if (description.getElements().isEmpty()){
|
459
|
dataSet.removeDescription(description);
|
460
|
}
|
461
|
}
|
462
|
|
463
|
@Override
|
464
|
protected void aggregateToParentTaxon(TaxonNode taxonNode,
|
465
|
ResultHolder resultHolder,
|
466
|
Set<TaxonDescription> excludedDescriptions) {
|
467
|
StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
|
468
|
Set<TaxonDescription> childDescriptions = getChildTaxonDescriptions(taxonNode, dataSet);
|
469
|
addDescriptionToResultHolder(descriptiveResultHolder, childDescriptions, AggregationMode.ToParent);
|
470
|
}
|
471
|
|
472
|
@Override
|
473
|
protected void aggregateWithinSingleTaxon(Taxon taxon,
|
474
|
ResultHolder resultHolder,
|
475
|
Set<TaxonDescription> excludedDescriptions) {
|
476
|
StructuredDescriptionResultHolder descriptiveResultHolder = (StructuredDescriptionResultHolder)resultHolder;
|
477
|
|
478
|
//specimen descriptions
|
479
|
Set<SpecimenDescription> specimenDescriptions = getSpecimenDescriptions(taxon, dataSet);
|
480
|
addDescriptionToResultHolder(descriptiveResultHolder, specimenDescriptions, AggregationMode.WithinTaxon);
|
481
|
|
482
|
//"literature" descriptions
|
483
|
if (getConfig().isIncludeLiterature()){
|
484
|
Set<TaxonDescription> literatureDescriptions = getLiteratureDescriptions(taxon, dataSet);
|
485
|
addDescriptionToResultHolder(descriptiveResultHolder, literatureDescriptions, AggregationMode.WithinTaxon);
|
486
|
}
|
487
|
|
488
|
//"default" descriptions
|
489
|
//TODO add default descriptions
|
490
|
//xxx
|
491
|
|
492
|
}
|
493
|
|
494
|
private void addDescriptionToResultHolder(StructuredDescriptionResultHolder descriptiveResultHolder,
|
495
|
Set<? extends DescriptionBase<?>> specimenLiteraturOrDefaultDescriptions,
|
496
|
AggregationMode aggregationMode) {
|
497
|
|
498
|
boolean descriptionWasUsed = false;
|
499
|
for (DescriptionBase<?> desc: specimenLiteraturOrDefaultDescriptions){
|
500
|
for (DescriptionElementBase deb: desc.getElements()){
|
501
|
if (hasCharacterData(deb)){
|
502
|
if (deb.isInstanceOf(CategoricalData.class)){
|
503
|
addToCategorical(CdmBase.deproxy(deb, CategoricalData.class), descriptiveResultHolder);
|
504
|
descriptionWasUsed = true;
|
505
|
}else if (deb.isInstanceOf(QuantitativeData.class)){
|
506
|
addToQuantitativData(CdmBase.deproxy(deb, QuantitativeData.class), descriptiveResultHolder);
|
507
|
descriptionWasUsed = true;
|
508
|
}
|
509
|
}
|
510
|
}
|
511
|
|
512
|
//sources
|
513
|
AggregationSourceMode sourceMode = getConfig().getSourceMode(aggregationMode);
|
514
|
if(descriptionWasUsed && sourceMode != AggregationSourceMode.NONE){
|
515
|
IdentifiableSource source = IdentifiableSource.NewAggregationSourceInstance();
|
516
|
desc = CdmBase.deproxy(desc);
|
517
|
|
518
|
switch (sourceMode){
|
519
|
case DESCRIPTION:
|
520
|
DescriptionBase<?> clonedDesc = cloneNewSourceDescription(desc);
|
521
|
source.setCdmSource(clonedDesc);
|
522
|
break;
|
523
|
case TAXON:
|
524
|
if (desc instanceof TaxonDescription){
|
525
|
Taxon taxon = ((TaxonDescription) desc).getTaxon();
|
526
|
source.setCdmSource(taxon);
|
527
|
}else {
|
528
|
throw new AggregationException("Description type not yet supported for aggregation source mode TAXON: " + desc.getClass().getSimpleName() );
|
529
|
}
|
530
|
break;
|
531
|
case NONE:
|
532
|
source = null;
|
533
|
break;
|
534
|
case ALL: //not yet supported
|
535
|
throw new AggregationException("Source mode not yet supported: " + sourceMode);
|
536
|
case ALL_SAMEVALUE: //makes no sense
|
537
|
throw new AggregationException("Illegal source mode: " + sourceMode);
|
538
|
default:
|
539
|
throw new AggregationException("Source mode not supported: " + sourceMode);
|
540
|
}
|
541
|
if (source != null){
|
542
|
descriptiveResultHolder.sources.add(source);
|
543
|
}
|
544
|
}
|
545
|
}
|
546
|
}
|
547
|
|
548
|
private void addToQuantitativData(QuantitativeData qd, StructuredDescriptionResultHolder resultHolder) {
|
549
|
QuantitativeData aggregatedQuantitativeData = resultHolder.quantitativeMap.get(qd.getFeature());
|
550
|
if(aggregatedQuantitativeData==null){
|
551
|
// no QuantitativeData with this feature in aggregation
|
552
|
aggregatedQuantitativeData = aggregateWithinQuantitativeData(qd);
|
553
|
}
|
554
|
else{
|
555
|
aggregatedQuantitativeData = addToExistingQuantitativeData(aggregatedQuantitativeData, qd);
|
556
|
}
|
557
|
if (aggregatedQuantitativeData != null){
|
558
|
resultHolder.quantitativeMap.put(qd.getFeature(), aggregatedQuantitativeData);
|
559
|
}
|
560
|
}
|
561
|
|
562
|
private void addToCategorical(CategoricalData cd, StructuredDescriptionResultHolder resultHolder) {
|
563
|
CategoricalData aggregatedCategoricalData = resultHolder.categoricalMap.get(cd.getFeature());
|
564
|
if(aggregatedCategoricalData == null){
|
565
|
// no CategoricalData with this feature in aggregation
|
566
|
aggregatedCategoricalData = cd.clone();
|
567
|
// set count to 1 if not set
|
568
|
if (!aggregatedCategoricalData.getStatesOnly().isEmpty()){
|
569
|
aggregatedCategoricalData.getStateData().stream().filter(sd->sd.getCount()==null).forEach(sd->sd.incrementCount());
|
570
|
resultHolder.categoricalMap.put(aggregatedCategoricalData.getFeature(), aggregatedCategoricalData);
|
571
|
}
|
572
|
}
|
573
|
else{
|
574
|
// split all StateData into those where the state already exists and those where it doesn't
|
575
|
List<State> statesOnly = aggregatedCategoricalData.getStatesOnly();
|
576
|
List<StateData> sdWithExistingStateInAggregation = cd.getStateData().stream().filter(sd->statesOnly.contains(sd.getState())).collect(Collectors.toList());
|
577
|
List<StateData> sdWithNoExistingStateInAggregation = cd.getStateData().stream().filter(sd->!statesOnly.contains(sd.getState())).collect(Collectors.toList());
|
578
|
|
579
|
for (StateData sd : sdWithNoExistingStateInAggregation) {
|
580
|
StateData clone = sd.clone();
|
581
|
// set count to 1 if not set
|
582
|
if(clone.getCount()==null){
|
583
|
clone.incrementCount();
|
584
|
}
|
585
|
aggregatedCategoricalData.addStateData(clone);
|
586
|
}
|
587
|
|
588
|
for (StateData sdExist : sdWithExistingStateInAggregation) {
|
589
|
List<StateData> aggregatedSameStateData = aggregatedCategoricalData.getStateData().stream()
|
590
|
.filter(sd->hasSameState(sdExist, sd))
|
591
|
.collect(Collectors.toList());
|
592
|
for (StateData stateData : aggregatedSameStateData) {
|
593
|
if(sdExist.getCount()==null){
|
594
|
stateData.incrementCount();
|
595
|
}
|
596
|
else{
|
597
|
stateData.setCount(stateData.getCount()+sdExist.getCount());
|
598
|
}
|
599
|
}
|
600
|
}
|
601
|
}
|
602
|
}
|
603
|
|
604
|
@Override
|
605
|
protected StructuredDescriptionResultHolder createResultHolder() {
|
606
|
return new StructuredDescriptionResultHolder();
|
607
|
}
|
608
|
|
609
|
private class StructuredDescriptionResultHolder extends ResultHolder{
|
610
|
private Map<Feature, CategoricalData> categoricalMap = new HashMap<>();
|
611
|
private Map<Feature, QuantitativeData> quantitativeMap = new HashMap<>();
|
612
|
private Set<IdentifiableSource> sources = new HashSet<>();
|
613
|
@Override
|
614
|
public String toString() {
|
615
|
return "SDResultHolder [categoricals=" + categoricalMap.size()
|
616
|
+ ", quantitatives=" + quantitativeMap.size()
|
617
|
+ ", sources=" + sources.size()
|
618
|
+ ", descriptionsToDelete=" + this.descriptionsToDelete.size()
|
619
|
+ "]";
|
620
|
}
|
621
|
}
|
622
|
|
623
|
private Set<TaxonDescription> getChildTaxonDescriptions(TaxonNode taxonNode, DescriptiveDataSet dataSet) {
|
624
|
Set<TaxonDescription> result = new HashSet<>();
|
625
|
List<TaxonNode> childNodes = taxonNode.getChildNodes();
|
626
|
for (TaxonNode childNode : childNodes) {
|
627
|
Set<TaxonDescription> childDescriptions = childNode.getTaxon().getDescriptions();
|
628
|
result.addAll(childDescriptions.stream()
|
629
|
.filter(desc->desc.getTypes().contains(DescriptionType.AGGREGATED_STRUC_DESC))
|
630
|
.filter(desc->dataSet.getDescriptions().contains(desc))
|
631
|
.collect(Collectors.toSet()));
|
632
|
}
|
633
|
return result;
|
634
|
}
|
635
|
|
636
|
/**
|
637
|
* Computes all specimens attached to the given taxon within the given dataSet.
|
638
|
* For these secimens it returns all attache
|
639
|
* */
|
640
|
private Set<SpecimenDescription> getSpecimenDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
|
641
|
Set<SpecimenDescription> result = new HashSet<>();
|
642
|
//TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
|
643
|
for (TaxonDescription taxonDesc: taxon.getDescriptions()){
|
644
|
for (DescriptionElementBase taxonDeb : taxonDesc.getElements()){
|
645
|
if (taxonDeb.isInstanceOf(IndividualsAssociation.class)){
|
646
|
IndividualsAssociation indAss = CdmBase.deproxy(taxonDeb, IndividualsAssociation.class);
|
647
|
SpecimenOrObservationBase<?> specimen = indAss.getAssociatedSpecimenOrObservation();
|
648
|
Set<SpecimenDescription> descriptions = specimen.getSpecimenDescriptions();
|
649
|
for(SpecimenDescription specimenDescription : descriptions){
|
650
|
if(dataSet.getDescriptions().contains(specimenDescription) &&
|
651
|
specimenDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE))){
|
652
|
result.add(specimenDescription);
|
653
|
}
|
654
|
}
|
655
|
}
|
656
|
}
|
657
|
}
|
658
|
return result;
|
659
|
}
|
660
|
|
661
|
private Set<TaxonDescription> getLiteratureDescriptions(Taxon taxon, DescriptiveDataSet dataSet) {
|
662
|
Set<TaxonDescription> result = new HashSet<>();
|
663
|
//TODO performance: use DTO service to retrieve specimen descriptions without initializing all taxon descriptions
|
664
|
for(TaxonDescription taxonDescription : taxon.getDescriptions()){
|
665
|
if(dataSet.getDescriptions().contains(taxonDescription)
|
666
|
&& taxonDescription.getTypes().stream().anyMatch(type->type.equals(DescriptionType.SECONDARY_DATA))
|
667
|
&& taxonDescription.getTypes().stream().noneMatch(type->type.equals(DescriptionType.CLONE_FOR_SOURCE)) ){
|
668
|
result.add(taxonDescription);
|
669
|
}
|
670
|
}
|
671
|
return result;
|
672
|
}
|
673
|
|
674
|
/**
|
675
|
* Evaluates statistics for exact values collection and handles missing min and max values
|
676
|
*/
|
677
|
private QuantitativeData aggregateWithinQuantitativeData(QuantitativeData sourceQd){
|
678
|
QuantitativeData aggQD = QuantitativeData.NewInstance(sourceQd.getFeature());
|
679
|
aggQD.setUnit(sourceQd.getUnit());
|
680
|
Set<BigDecimal> exactValues = sourceQd.getExactValues();
|
681
|
if(!exactValues.isEmpty()){
|
682
|
// qd is not already aggregated
|
683
|
Comparator<BigDecimal> comp = Comparator.naturalOrder();
|
684
|
int exactValueSampleSize = exactValues.size();
|
685
|
BigDecimal exactValueMin = exactValues.stream().min(comp).get();
|
686
|
BigDecimal exactValueMax = exactValues.stream().max(comp).get();
|
687
|
BigDecimal exactValueAvg = BigDecimalUtil.average(exactValues);
|
688
|
//TODO also check for typical boundary data
|
689
|
if(sourceQd.getMin() == null && sourceQd.getMax() == null){
|
690
|
aggQD.setSampleSize(new BigDecimal(exactValueSampleSize), null);
|
691
|
aggQD.setAverage(exactValueAvg, null);
|
692
|
}
|
693
|
aggQD.setMinimum(sourceQd.getMin() == null ? exactValueMin: sourceQd.getMin().min(exactValueMin), null);
|
694
|
aggQD.setMaximum(sourceQd.getMax() == null ? exactValueMax: sourceQd.getMax().max(exactValueMax), null);
|
695
|
}
|
696
|
else{
|
697
|
// qd has only min, max, ... but no exact values
|
698
|
aggQD = sourceQd.clone();
|
699
|
aggQD = handleMissingValues(aggQD);
|
700
|
}
|
701
|
return aggQD;
|
702
|
}
|
703
|
|
704
|
private QuantitativeData handleMissingValues(QuantitativeData qd) {
|
705
|
//min max
|
706
|
qd = handleMissingMinOrMax(qd);
|
707
|
//average
|
708
|
if (qd != null && qd.getAverage() == null){
|
709
|
BigDecimal n = qd.getSampleSize();
|
710
|
if(n != null && !n.equals(0f)){
|
711
|
BigDecimal average = (qd.getMax().add(qd.getMin())).divide(n);
|
712
|
qd.setAverage(average, null);
|
713
|
}
|
714
|
}
|
715
|
return qd;
|
716
|
}
|
717
|
|
718
|
private QuantitativeData handleMissingMinOrMax(QuantitativeData qd) {
|
719
|
return handleMissingMinOrMax(qd, getConfig().getMissingMinimumMode(), getConfig().getMissingMaximumMode());
|
720
|
}
|
721
|
|
722
|
public static QuantitativeData handleMissingMinOrMax(QuantitativeData aggQD, MissingMinimumMode missingMinMode,
|
723
|
MissingMaximumMode missingMaxMode) {
|
724
|
if(aggQD.getMin() == null && aggQD.getMax() != null){
|
725
|
if (missingMinMode == MissingMinimumMode.MinToZero) {
|
726
|
aggQD.setMinimum(BigDecimal.valueOf(0f), null);
|
727
|
}else if (missingMinMode == MissingMinimumMode.MinToMax){
|
728
|
aggQD.setMinimum(aggQD.getMax(), null);
|
729
|
}else if (missingMinMode == MissingMinimumMode.SkipRecord){
|
730
|
return null;
|
731
|
}
|
732
|
}
|
733
|
if(aggQD.getMax() == null && aggQD.getMin() != null){
|
734
|
if (missingMaxMode == MissingMaximumMode.MaxToMin){
|
735
|
aggQD.setMaximum(aggQD.getMin(), null);
|
736
|
}else if (missingMaxMode == MissingMaximumMode.SkipRecord){
|
737
|
return null;
|
738
|
}
|
739
|
}
|
740
|
return aggQD;
|
741
|
}
|
742
|
|
743
|
private QuantitativeData addToExistingQuantitativeData(QuantitativeData aggQd, QuantitativeData newQd) {
|
744
|
|
745
|
newQd = aggregateWithinQuantitativeData(newQd); //alternatively we could check, if newQd is already basically aggregated, but for this we need a clear definition what the minimum requirements are and how ExactValues and MinMax if existing in parallel should be handled.
|
746
|
|
747
|
BigDecimal min = null;
|
748
|
BigDecimal max = null;
|
749
|
BigDecimal average = null;
|
750
|
BigDecimal sampleSize = null;
|
751
|
newQd = handleMissingValues(newQd);
|
752
|
if (newQd == null){
|
753
|
return aggQd;
|
754
|
}
|
755
|
min = aggQd.getMin().min(newQd.getMin());
|
756
|
max = aggQd.getMax().max(newQd.getMax());
|
757
|
if (newQd.getSampleSize() != null && aggQd.getSampleSize() != null){
|
758
|
sampleSize = newQd.getSampleSize().add(aggQd.getSampleSize());
|
759
|
}
|
760
|
if (sampleSize != null && !sampleSize.equals(0f) && aggQd.getAverage() != null && newQd.getAverage() != null){
|
761
|
BigDecimal aggTotalSum = aggQd.getAverage().multiply(aggQd.getSampleSize(), MathContext.DECIMAL32);
|
762
|
BigDecimal newTotalSum = newQd.getAverage().multiply(newQd.getSampleSize(), MathContext.DECIMAL32);
|
763
|
BigDecimal totalSum = aggTotalSum.add(newTotalSum);
|
764
|
average = totalSum.divide(sampleSize, MathContext.DECIMAL32).stripTrailingZeros(); //to be discussed if we really want to reduce precision here, however, due to the current way to compute average we do not have exact precision anyway
|
765
|
}
|
766
|
aggQd.setMinimum(min, null);
|
767
|
aggQd.setMaximum(max, null);
|
768
|
aggQd.setSampleSize(sampleSize, null);
|
769
|
aggQd.setAverage(average, null);
|
770
|
return aggQd;
|
771
|
}
|
772
|
|
773
|
private static List<BigDecimal> getExactValues(QuantitativeData qd) {
|
774
|
List<BigDecimal> exactValues = qd.getStatisticalValues().stream()
|
775
|
.filter(value->value.getType().equals(StatisticalMeasure.EXACT_VALUE()))
|
776
|
.map(exact->exact.getValue())
|
777
|
.collect(Collectors.toList());
|
778
|
return exactValues;
|
779
|
}
|
780
|
|
781
|
private static boolean hasSameState(StateData sd1, StateData sd2) {
|
782
|
if (sd2.getState() == null || sd1.getState() == null){
|
783
|
return false;
|
784
|
}else{
|
785
|
return sd2.getState().getUuid().equals(sd1.getState().getUuid());
|
786
|
}
|
787
|
}
|
788
|
}
|