Project

General

Profile

Revision ca93e81e

IDca93e81e71547ebb5b736007a7d495a91a1f6392
Parent d1504e5a
Child dd864603

Added by Andreas Müller about 1 year ago

ref #8469 handle all sub-branch taxa on the level they belong to

View differences:

cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/description/QuantitativeData.java
231 231

  
232 232
// ******************************** TRANSIENT METHODS *******************************/
233 233

  
234
    @Transient
235
    public Float getOverallMin(){
236
        float result = Float.MAX_VALUE;
237
        for (StatisticalMeasurementValue value : statisticalValues){
238
            if (withRangeValue(value)){
239
                result = Math.min(result, value.getValue());;
240
            }
241
        }
242
        return (result == Float.MAX_VALUE)? null: result;
243
    }
234 244

  
235
	/**
245
    @Transient
246
    public Float getOverallMax(){
247
        float result = Float.MIN_VALUE;
248
        for (StatisticalMeasurementValue value : statisticalValues){
249
            if (withRangeValue(value)){
250
                result = Math.max(result, value.getValue());;
251
            }
252
        }
253
        return (result == Float.MIN_VALUE)? null: result;
254
    }
255

  
256
    private boolean withRangeValue(StatisticalMeasurementValue value) {
257
        StatisticalMeasure type = value.getType();
258
        if (type != null){
259
            if (type.isAverage()|| type.isMin() || type.isTypicalLowerBoundary()||type.isMax()||type.isTypicalUpperBoundary()||type.isExactValue()){
260
                return true;
261
            }
262
        }
263
        return false;
264
    }
265

  
266
    /**
236 267
	 * Returns the numerical value of the one {@link StatisticalMeasurementValue statistical measurement value}
237 268
	 * with the corresponding {@link StatisticalMeasure statistical measure} "minimum" and
238 269
	 * belonging to <i>this</i> quantitative data. Returns <code>null</code> if no such
......
265 296
		return getSpecificStatisticalValue(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY());
266 297
	}
267 298

  
299
	@Transient
300
    public Float getExactValue(){
301
        return getSpecificStatisticalValue(StatisticalMeasure.EXACT_VALUE());
302
    }
303

  
268 304
	/**
269 305
	 * Returns the numerical value of the one {@link StatisticalMeasurementValue statistical measurement value}
270 306
	 * with the corresponding {@link StatisticalMeasure statistical measure}
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/generate/PolytomousKeyGenerator.java
1 1
package eu.etaxonomy.cdm.strategy.generate;
2 2

  
3 3
import java.util.ArrayList;
4
import java.util.Arrays;
4 5
import java.util.Comparator;
5 6
import java.util.HashMap;
6 7
import java.util.HashSet;
......
152 153
	 * @param featuresLeft List of features that can be used at this point
153 154
	 * @param taxaCovered the taxa left at this point (i.e. that verify the description corresponding to the path leading to this node)
154 155
     * @param featureStatesFilter
155
     * @return taxa which exist in ALL sub-branches and therefore can be linked on higher level
156 156
	 */
157
	private Map<PolytomousKeyNode,Set<KeyTaxon>> buildBranches(PolytomousKeyNode parent, List<Feature> featuresLeft, Set<KeyTaxon> taxaCovered,
157
	private void buildBranches(PolytomousKeyNode parent, List<Feature> featuresLeft, Set<KeyTaxon> taxaCovered,
158 158
	        Map<Feature, Set<State>> featureStatesFilter){
159 159

  
160
	    Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches;
160
	    //handle all branches taxa
161
        Set<KeyTaxon> allBranchesTaxa = getAllBranchesTaxa(featuresLeft, taxaCovered, featureStatesFilter);
162
        if (allBranchesTaxa.size()>0){
163
            if (allBranchesTaxa.size()>1){
164
            //TODO test if this case in handled and displayed correctly
165
                logger.warn(">1 final taxa in inner node");
166
            }
167
            taxaCovered.removeAll(allBranchesTaxa);
168
            if(taxaCovered.size() != 1){
169
                handleLeaf(parent, allBranchesTaxa);
170
            }else{
171
                //if only 1 is left it is better to handle all remaining in sub-branch to make difference clearer
172
                taxaCovered.addAll(allBranchesTaxa);
173
            }
174
        }
161 175

  
176
        //start real branching
162 177
	    if (taxaCovered.size()<=1){
163 178
		    //do nothing
164 179
	        logger.warn("Only 1 or no taxon covered. This should currently only be possible on top level and is not yet handled. ");
165
            //old: taxaCoveredByAllSubBranches = taxaCovered;
166
	        taxaCoveredByAllSubBranches = new HashMap<>();
167
            taxaCoveredByAllSubBranches.put(parent, taxaCovered);
168 180
		}else {
169 181
			// this map stores the thresholds giving the best dichotomy of taxa for the corresponding feature supporting quantitative data
170 182
			Map<Feature,Float> quantitativeFeaturesThresholds = new HashMap<>();
......
178 190
			    /************** either the feature supports quantitative data... **************/
179 191
			    // NB: in this version, "quantitative features" are dealt with in a dichotomous way
180 192
			    if (winnerFeature.isSupportsQuantitativeData()) {
181
			        taxaCoveredByAllSubBranches = handleQuantitativeData(parent, featuresLeft, taxaCovered,
193
			        handleQuantitativeData(parent, featuresLeft, taxaCovered,
182 194
			                quantitativeFeaturesThresholds, winnerFeature, featureStatesFilter);
183 195
			    }
184 196
			    /************** ...or it supports categorical data. **************/
185 197
			    else  if (winnerFeature.isSupportsCategoricalData()) {
186
			        taxaCoveredByAllSubBranches = handleCategorialFeature(parent, featuresLeft, taxaCovered,
198
			        handleCategorialFeature(parent, featuresLeft, taxaCovered,
187 199
			                winnerFeature, featureStatesFilter);
188 200
			    }else{
189 201
	                throw new RuntimeException("Winner feature does not support character data.");
......
191 203
			    // the winner features are put back to the features left once the branch is done
192 204
			    featuresLeft.add(winnerFeature);
193 205
			}else if (featuresLeft.isEmpty()){
194
			    //old: handleLeaf(parent, taxaCovered);
195
			    taxaCoveredByAllSubBranches = new HashMap<>();
196
			    taxaCoveredByAllSubBranches.put(parent, taxaCovered);
206
			    handleLeaf(parent, taxaCovered);
197 207
			}else{
198 208
			    throw new RuntimeException("No winner feature but features left to handle should not happen.");
199 209
			}
200
//			handleTaxaCoveredByAllSubBranches(parent, taxaCoveredByAllSubBranches);
201 210
		}
202
        return taxaCoveredByAllSubBranches;
211
        return;
203 212
	}
204 213

  
214
    private Set<KeyTaxon> getAllBranchesTaxa(List<Feature> featuresLeft, Set<KeyTaxon> taxaCovered,
215
            Map<Feature, Set<State>> featureStatesFilter) {
216

  
217
        Set<KeyTaxon> candidates = new HashSet<>(taxaCovered);
218
        List<Feature> dependendFeatures = new ArrayList<>();
219
        for (Feature feature : featuresLeft){
220
            if(feature.isSupportsCategoricalData()){
221
                Set<State> allStates = getAllStates(feature, taxaCovered, featureStatesFilter.get(feature));
222
                Iterator<KeyTaxon> it = candidates.iterator();
223
                while (it.hasNext()){
224
                    Set<KeyTaxon> taxonSet = new HashSet<>(Arrays.asList(it.next()));
225
                    Set<State> taxonStates = getAllStates(feature, taxonSet, featureStatesFilter.get(feature));
226
                    if(allStates.size() > taxonStates.size()){
227
                        it.remove();
228
                    }
229
                }
230
                if(candidates.isEmpty()){
231
                    break;
232
                }else{
233
                    addDependentFeatures(dependendFeatures, feature, new HashSet<>(), new ArrayList<>(allStates));
234
                }
235
            }else if (feature.isSupportsQuantitativeData()){
236
                Iterator<KeyTaxon> it = candidates.iterator();
237
                while (it.hasNext()){
238
                    float min = Float.MAX_VALUE;
239
                    Float max = Float.MIN_VALUE;
240
                    Set<QuantitativeData> qds = it.next().quantitativeData.get(feature);
241
                    qds = qds == null? new HashSet<>(): qds;
242
                    for (QuantitativeData qd : qds){
243
                        Float qdMin = qd.getOverallMin();
244
                        if(qdMin != null){
245
                            min = Math.min(min, qdMin);
246
                        }
247
                        Float qdMax = qd.getOverallMax();
248
                        if(qdMax != null){
249
                            max = Math.max(max, qdMax);
250
                        }
251
                    }
252
                    boolean staysCandidate = true;
253
                    for(KeyTaxon taxon : taxaCovered){
254
                        Set<QuantitativeData> tqds = taxon.quantitativeData.get(feature);
255
                        tqds = tqds == null? new HashSet<>(): tqds;
256
                        for (QuantitativeData qd : tqds){
257
                            staysCandidate &= qd.getOverallMin() == null || qd.getOverallMin() > min;
258
                            staysCandidate &= qd.getOverallMax() == null || qd.getOverallMax() > max;
259
                        }
260
                        if (!staysCandidate){
261
                            break;
262
                        }
263
                    }
264
                    if (!staysCandidate){
265
                        it.remove();
266
                    }
267
                }
268
            }
269
        }
270
        if(config.isUseDependencies() && !dependendFeatures.isEmpty() && !candidates.isEmpty()){
271
            Set<KeyTaxon> dependetCandidates = getAllBranchesTaxa(dependendFeatures, taxaCovered, featureStatesFilter);
272
            candidates.retainAll(dependetCandidates);
273
        }
274
        if(!candidates.isEmpty()){
275
            logger.warn("Candidates: " + candidates);
276
        }
277
        return candidates;
278
    }
279

  
205 280
    /**
206 281
     * Creates a leaf. It adds the taxa the parent taxon as linked taxa. Handles a
207 282
     * list of multiple taxa and handles "specimen taxa" (not yet fully implemented)
......
229 304
    /**
230 305
     * "categorical features" may present several different states/branches,
231 306
     * each one of these might correspond to one child.
232
     * @return taxa which exist in ALL sub-branches and therefore can be linked on higher level
233 307
     */
234
    private Map<PolytomousKeyNode,Set<KeyTaxon>> handleCategorialFeature(PolytomousKeyNode parent, List<Feature> featuresLeft,
308
    private void handleCategorialFeature(PolytomousKeyNode parent, List<Feature> featuresLeft,
235 309
            Set<KeyTaxon> taxaCovered,
236 310
            Feature winnerFeature,
237 311
            Map<Feature, Set<State>> featureStatesFilter) {
238 312

  
239
        Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches;
240 313
        Map<Set<KeyTaxon>,Boolean> reuseWinner = new HashMap<>();
241 314

  
242 315
        Set<State> states = getAllStates(winnerFeature, taxaCovered, featureStatesFilter.get(winnerFeature));
......
252 325
		        Set<Feature> featuresAdded = new HashSet<>();
253 326
		        addDependentFeatures(featuresLeft, winnerFeature, featuresAdded, stateList);
254 327
		        featuresLeft.remove(winnerFeature);
255
		        taxaCoveredByAllSubBranches = buildBranches(parent, featuresLeft, taxaCovered, featureStatesFilter);
328
		        buildBranches(parent, featuresLeft, taxaCovered, featureStatesFilter);
256 329
		        removeAddedDependendFeatures(featuresLeft, featuresAdded);
257 330
		    }else{
258 331
		        //if only 1 branch is left we can handle this as a leaf, no matter how many taxa are left
259
		        //old: taxaCoveredByAllSubBranches = handleLeaf(parent, taxaCovered);
260
	            taxaCoveredByAllSubBranches = new HashMap<>();
261
	            taxaCoveredByAllSubBranches.put(parent, taxaCovered);
332
		        handleLeaf(parent, taxaCovered);
262 333
		    }
263 334
		}else {
264 335
		    // if the merge option is ON, branches with the same discriminative power will be merged (see Vignes & Lebbes, 1989)
......
267 338
		                taxonStatesMap, featureStatesFilter.get(winnerFeature));
268 339
		    }
269 340
		    List<Set<KeyTaxon>> sortedKeys = sortKeys(taxonStatesMap);
270
		    taxaCoveredByAllSubBranches = new HashMap<>();
271 341
            for (Set<KeyTaxon> newTaxaCovered : sortedKeys){
272 342
		        //handle each branch
273
                Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByBranch = handleCategoricalBranch(parent, featuresLeft,
343
                handleCategoricalBranch(parent, featuresLeft,
274 344
                        taxaCovered, winnerFeature, reuseWinner, taxonStatesMap, newTaxaCovered, featureStatesFilter);
275
                mergeBranchResults(taxaCoveredByAllSubBranches, taxaCoveredByBranch);
276 345
            }
277
            taxaCoveredByAllSubBranches = handleAllBranchesResult(taxaCoveredByAllSubBranches, parent);
278 346
		}
279
		return taxaCoveredByAllSubBranches;
347
		return;
280 348
    }
281 349

  
282 350
    private Map<PolytomousKeyNode, Set<KeyTaxon>> handleAllBranchesResult(
......
404 472
        return taxonStatesMap;
405 473
    }
406 474

  
407
    /**
408
     * @return taxa which exist in ALL sub-branches and therefore can be linked on higher level
409
     */
410
    private Map<PolytomousKeyNode,Set<KeyTaxon>> handleCategoricalBranch(PolytomousKeyNode parent, List<Feature> featuresLeft,
475
    private void handleCategoricalBranch(PolytomousKeyNode parent, List<Feature> featuresLeft,
411 476
            Set<KeyTaxon> taxaCovered,
412 477
            Feature winnerFeature, Map<Set<KeyTaxon>, Boolean> reuseWinner,
413 478
            Map<Set<KeyTaxon>, List<State>> taxonStatesMap,
414 479
            Set<KeyTaxon> newTaxaCovered,
415 480
            Map<Feature,Set<State>> featureStatesFilter) {
416 481

  
417
        Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches;
418

  
419 482
        //to restore old state
420 483
        Set<State> oldFilterSet = featureStatesFilter.get(winnerFeature);
421 484
        Set<Feature> featuresAdded = new HashSet<>();
......
453 516

  
454 517
        boolean hasChildren = areTheTaxaDiscriminated && (newTaxaCovered.size() > 1);
455 518
        if (hasChildren){
456
            taxaCoveredByAllSubBranches = buildBranches(childNode, featuresLeft, newTaxaCovered, featureStatesFilter);
519
            buildBranches(childNode, featuresLeft, newTaxaCovered, featureStatesFilter);
457 520
        }else{
458
            //old: taxaCoveredByAllSubBranches = handleLeaf(childNode, newTaxaCovered);
459
            taxaCoveredByAllSubBranches = new HashMap<>();
460
            taxaCoveredByAllSubBranches.put(childNode, newTaxaCovered);
521
            handleLeaf(childNode, newTaxaCovered);
461 522
        }
462 523

  
463 524
        //restore old state before returning to parent node
464 525
        removeAddedDependendFeatures(featuresLeft, featuresAdded);
465 526
        featureStatesFilter.put(winnerFeature, oldFilterSet);
466 527

  
467
        return taxaCoveredByAllSubBranches;
528
        return;
468 529
    }
469 530

  
470 531
    private void setStatesFilter(Map<Feature, Set<State>> filter, Feature feature,
......
483 544
        }
484 545
    }
485 546

  
486
    private void addDependentFeatures(List<Feature> featuresLeft, Feature winnerFeature,
547
    private void addDependentFeatures(List<Feature> featuresLeft, Feature baseFeature,
487 548
            Set<Feature> featuresAdded, List<State> listOfStates) {
488 549

  
489
        Set<Feature> newFeatureCandidates = new HashSet<>(featureDependencies.get(winnerFeature));
490
        newFeatureCandidates.remove(null);
491
        for (State state : listOfStates) {
492
            //in-applicable
493
            List<Feature> inapplicableFeatures = getApplicableFeatures(winnerFeature, state, iAifDependencies);
494
            newFeatureCandidates.removeAll(inapplicableFeatures);
495
            //only-applicable
496
            List<Feature> onlyApplicableFeatures = getApplicableFeatures(winnerFeature, state, oAifDependencies);
497
            if (!onlyApplicableFeatures.isEmpty()){
498
                Iterator<Feature> it = newFeatureCandidates.iterator();
499
                while (it.hasNext()){
500
                    Feature featureCandidate = it.next();
501
                    if (!onlyApplicableFeatures.contains(featureCandidate)){
502
                        it.remove();
550
        if(notEmpty(featureDependencies.get(baseFeature))){
551
            Set<Feature> newFeatureCandidates = new HashSet<>(featureDependencies.get(baseFeature));
552
            newFeatureCandidates.remove(null);
553
            for (State state : listOfStates) {
554
                //in-applicable
555
                List<Feature> inapplicableFeatures = getApplicableFeatures(baseFeature, state, iAifDependencies);
556
                newFeatureCandidates.removeAll(inapplicableFeatures);
557
                //only-applicable
558
                List<Feature> onlyApplicableFeatures = getApplicableFeatures(baseFeature, state, oAifDependencies);
559
                if (!onlyApplicableFeatures.isEmpty()){
560
                    Iterator<Feature> it = newFeatureCandidates.iterator();
561
                    while (it.hasNext()){
562
                        Feature featureCandidate = it.next();
563
                        if (!onlyApplicableFeatures.contains(featureCandidate)){
564
                            it.remove();
565
                        }
503 566
                    }
504 567
                }
505 568
            }
569
            featuresLeft.addAll(newFeatureCandidates);
570
            featuresAdded.addAll(newFeatureCandidates);
506 571
        }
507
        featuresLeft.addAll(newFeatureCandidates);
508
        featuresAdded.addAll(newFeatureCandidates);
509 572
    }
510 573

  
511 574
    private List<Feature> getApplicableFeatures(Feature feature, State state,
......
617 680
        return states;
618 681
    }
619 682

  
620
    /**
621
     * @return taxa which exist in ALL sub-branches and therefore can be linked on higher level
622
     */
623
    private Map<PolytomousKeyNode,Set<KeyTaxon>> handleQuantitativeData(PolytomousKeyNode parent, List<Feature> featuresLeft,
683
    private void handleQuantitativeData(PolytomousKeyNode parent, List<Feature> featuresLeft,
624 684
            Set<KeyTaxon> taxaCovered, Map<Feature, Float> quantitativeFeaturesThresholds,
625 685
            Feature winnerFeature, Map<Feature, Set<State>> featureStatesFilter) {
626 686

  
627
        Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches = null;
628

  
629 687
        // first, get the threshold
630 688
        float threshold = quantitativeFeaturesThresholds.get(winnerFeature);
631 689
        //TODO unit not seems to be used yet
......
635 693
        List<Set<KeyTaxon>> quantitativeStates = determineQuantitativeStates(threshold, winnerFeature, taxaCovered, unit);
636 694
        // thus the list contains two sets of KeyTaxon, the first corresponding to
637 695
        // those before, the second to those after the threshold
638
        taxaCoveredByAllSubBranches = new HashMap<>();
639 696
        for (int i=0; i<2; i++) {
640
            Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByBranch = handleQuantitativeBranch(parent, featuresLeft, taxaCovered, winnerFeature, threshold, unit,
697
            handleQuantitativeBranch(parent, featuresLeft, taxaCovered.size(), winnerFeature, threshold, unit,
641 698
                    quantitativeStates, featureStatesFilter, i);
642
            mergeBranchResults(taxaCoveredByAllSubBranches, taxaCoveredByBranch);
643 699
        }
700
//        taxaCoveredByAllSubBranches = handleAllBranchesResult(taxaCoveredByAllSubBranches, parent);
644 701

  
645
        taxaCoveredByAllSubBranches = handleAllBranchesResult(taxaCoveredByAllSubBranches, parent);
646

  
647
        return taxaCoveredByAllSubBranches;
702
        return;
648 703
    }
649 704

  
650 705
    /**
651 706
     * Creates the branch for a quantitative feature.
652 707
     * TODO if the quantitative feature has dependent features they are not yet handled
653
     * @return taxa which exist in ALL sub-branches and therefore can be linked on higher level
654 708
     */
655
    private Map<PolytomousKeyNode,Set<KeyTaxon>> handleQuantitativeBranch(PolytomousKeyNode parent, List<Feature> featuresLeft,
656
            Set<KeyTaxon> taxaCovered, Feature winnerFeature, float threshold, StringBuilder unit,
657
            List<Set<KeyTaxon>> quantitativeStates, Map<Feature, Set<State>> featureStatesFilter, int i) {
709
    private void handleQuantitativeBranch(PolytomousKeyNode parent, List<Feature> featuresLeft,
710
            int parentTaxaCoveredSize, Feature winnerFeature, float threshold, StringBuilder unit,
711
            List<Set<KeyTaxon>> quantitativeStates, Map<Feature, Set<State>> featureStatesFilter,
712
            int brunchNum) {
658 713

  
659
        Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches;
660 714
        String sign;
661
        Set<KeyTaxon> newTaxaCovered = quantitativeStates.get(i);
662
        if (i==0){
715
        Set<KeyTaxon> newTaxaCovered = quantitativeStates.get(brunchNum);
716
        if (brunchNum==0){
663 717
        	sign = before; // the first element of the list corresponds to taxa before the threshold
664 718
        } else {
665 719
        	sign = after; // the second to those after
......
671 725
        	childNode.setStatement(statement);
672 726
        	parent.addChild(childNode);
673 727
        	//TODO don't we need to check dependent features, they are not very likely for quantitative features, but still might exist as exception ...
674
        	boolean taxaAreDiscriminatedInThisStep = newTaxaCovered.size() < taxaCovered.size();
728
        	boolean taxaAreDiscriminatedInThisStep = newTaxaCovered.size() < parentTaxaCoveredSize;
675 729
        	boolean childrenExist = taxaAreDiscriminatedInThisStep && (newTaxaCovered.size() > 1);
676 730
        	if (childrenExist){
677
        	    taxaCoveredByAllSubBranches = buildBranches(childNode, featuresLeft, newTaxaCovered, featureStatesFilter);
731
        	    buildBranches(childNode, featuresLeft, newTaxaCovered, featureStatesFilter);
678 732
        	}else{
679
        	    //old: taxaCoveredByAllSubBranches = handleLeaf(childNode, newTaxaCovered);
680
                taxaCoveredByAllSubBranches = new HashMap<>();
681
                taxaCoveredByAllSubBranches.put(childNode, newTaxaCovered);
733
        	    handleLeaf(childNode, newTaxaCovered);
682 734
        	}
683 735
        }else{
684 736
            //TODO do we need to check the 0 case, can this happen at all, shouldn't we throw a warning instead?
685
            taxaCoveredByAllSubBranches = new HashMap<>();
737
            throw new RuntimeException("No taxa left on branch. This should probably not happen.");
686 738
        }
687
        return taxaCoveredByAllSubBranches;
739
        return;
688 740
    }
689 741

  
690 742
    private Feature computeScores(List<Feature> featuresLeft, Set<KeyTaxon> taxaCovered,
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/generate/PolytomousKeyGeneratorConfigurator.java
104 104

  
105 105
    public List<Feature> getFeatures() {
106 106
        List<Feature> result;
107
        if(!useDependencies){
107
        if(!isUseDependencies()){
108 108
            result = dataSet.getDescriptiveSystem().asTermList();
109 109
        }else{
110 110
            result = new ArrayList<>(dataSet.getDescriptiveSystem().independentTerms());

Also available in: Unified diff

Add picture from clipboard (Maximum size: 40 MB)