Project

General

Profile

Revision 8e29c4cc

ID8e29c4cc8116ac2be56cea7029faed4ec636988d
Parent 185dfa24
Child f6fda820

Added by Andreas Müller about 2 years ago

ref #8469 introduce KeyTaxon to allow multiple data and multiple descriptions per taxon

View differences:

cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/generate/PolytomousKeyGenerator.java
9 9
import java.util.Map;
10 10
import java.util.Map.Entry;
11 11
import java.util.Set;
12
import java.util.UUID;
12 13

  
13 14
import org.apache.log4j.Logger;
14 15

  
......
23 24
import eu.etaxonomy.cdm.model.description.PolytomousKey;
24 25
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
25 26
import eu.etaxonomy.cdm.model.description.QuantitativeData;
27
import eu.etaxonomy.cdm.model.description.SpecimenDescription;
26 28
import eu.etaxonomy.cdm.model.description.State;
27 29
import eu.etaxonomy.cdm.model.description.StateData;
28 30
import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
29 31
import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
30 32
import eu.etaxonomy.cdm.model.description.TaxonDescription;
33
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
34
import eu.etaxonomy.cdm.model.taxon.Taxon;
31 35
import eu.etaxonomy.cdm.model.term.TermNode;
32 36

  
33 37
/**
......
52 56
	private Map<FeatureState,Set<Feature>> oAifDependencies = new HashMap<>(); // map of a set of Features (value) only applicables if a State (key) is present
53 57
	private Map<Feature,Set<Feature>> featureDependencies = new HashMap<>(); // map of all the sets of features (values) which have dependencies with states of other features (keys)
54 58

  
59
	private class KeyTaxon{
60
	    private UUID uuid;
61
	    private Taxon taxon;
62
	    private SpecimenOrObservationBase<?> specimen;
63
	    private Map<Feature,Set<CategoricalData>> categoricalData = new HashMap<>();
64
        private Map<Feature,Set<QuantitativeData>> quantitativeData = new HashMap<>();
65

  
66
        private Set<CategoricalData> getCategoricalData(Feature feature){
67
            return categoricalData.get(feature) == null? new HashSet<>():categoricalData.get(feature);
68
        }
69
        private Set<QuantitativeData> getQuantitativeData(Feature feature){
70
            return quantitativeData.get(feature) == null? new HashSet<>():quantitativeData.get(feature);
71
        }
72

  
73
        private void addDescription(DescriptionBase<?> db) {
74
            for (DescriptionElementBase deb : db.getElements()){
75
                Feature feature = deb.getFeature();
76
                if (deb.isCharacterData()){
77
                    if (deb.isInstanceOf(CategoricalData.class)){
78
                        CategoricalData cd = CdmBase.deproxy(deb, CategoricalData.class);
79
                        if (categoricalData.get(feature)== null){
80
                            categoricalData.put(feature, new HashSet<>());
81
                        }
82
                        categoricalData.get(feature).add(cd);
83
                    }else if (deb.isInstanceOf(QuantitativeData.class)){
84
                        QuantitativeData qd = CdmBase.deproxy(deb, QuantitativeData.class);
85
                        if (quantitativeData.get(feature)== null){
86
                            quantitativeData.put(feature, new HashSet<>());
87
                        }
88
                        quantitativeData.get(feature).add(qd);
89
                    }
90
                }
91
            }
92
        }
93

  
94
        @Override
95
        public String toString() {
96
            return "KeyTaxon [uuid=" + uuid + "]";
97
        }
98
	}
99

  
55 100
// *************************** METHODS ***************************************/
56 101

  
57 102
    /**
......
67 112
        }
68 113
        PolytomousKey polytomousKey = PolytomousKey.NewInstance();
69 114
        PolytomousKeyNode root = polytomousKey.getRoot();
70
        buildBranches(root, config.getFeatures(), (Set)config.getTaxonDescriptions(), true);
115
        Set<KeyTaxon> taxaCovered = makeKeyTaxa((Set)config.getTaxonDescriptions());
116
        buildBranches(root, config.getFeatures(), taxaCovered, true);
71 117
        return polytomousKey;
72 118
    }
73 119

  
74 120
	/**
121
     * @param taxonDescriptions
122
     * @return
123
     */
124
    private Set<KeyTaxon> makeKeyTaxa(Set<DescriptionBase<?>> descriptions) {
125
        Map<UUID,KeyTaxon> taxonMap = new HashMap<>();
126
        for (DescriptionBase<?> db : descriptions){
127
            KeyTaxon taxon = new KeyTaxon();
128
            if (db.isInstanceOf(TaxonDescription.class)){
129
                TaxonDescription td = CdmBase.deproxy(db, TaxonDescription.class);
130
                taxon.uuid = td.getTaxon().getUuid();
131
                taxon.taxon = td.getTaxon();
132
            }else if (db.isInstanceOf(SpecimenDescription.class)){
133
                SpecimenDescription sd = CdmBase.deproxy(db, SpecimenDescription.class);
134
                taxon.uuid = sd.getDescribedSpecimenOrObservation().getUuid();
135
                taxon.specimen = sd.getDescribedSpecimenOrObservation();
136
            }else{
137
                throw new RuntimeException("Unhandled entity type " + db.getClass().getName());
138
            }
139
            if (taxonMap.get(taxon.uuid)!= null){
140
                taxon = taxonMap.get(taxon.uuid);
141
            }else{
142
                taxonMap.put(taxon.uuid, taxon);
143
            }
144
            taxon.addDescription(db);
145
        }
146
        return new HashSet<>(taxonMap.values());
147
    }
148

  
149
    /**
75 150
	 * Recursive function that builds the branches of the identification key
76 151
	 *
77 152
	 * @param parent the node considered
......
80 155
	 * @param taxaDiscriminatedInPreviousStep if in the previous level the taxa discriminated are the same, this boolean is set to true,
81 156
	 *           thus if the taxa, again, are not discriminated at this level the function stops
82 157
	 */
83
	private void buildBranches(PolytomousKeyNode parent, List<Feature> featuresLeft, Set<DescriptionBase<?>> taxaCovered, boolean taxaDiscriminatedInPreviousStep){
158
	private void buildBranches(PolytomousKeyNode parent, List<Feature> featuresLeft, Set<KeyTaxon> taxaCovered, boolean taxaDiscriminatedInPreviousStep){
84 159

  
85 160
		if (taxaCovered.size()<=1){
86 161
		    //do nothing
......
116 191
		}
117 192
	}
118 193

  
119
    private void handleLeaf(PolytomousKeyNode parent, Set<DescriptionBase<?>> taxaCovered) {
194
    private void handleLeaf(PolytomousKeyNode parent, Set<KeyTaxon> taxaCovered) {
120 195
        KeyStatement parentStatement = parent.getStatement();
121
        for(DescriptionBase<?> description: taxaCovered){
122
            description = CdmBase.deproxy(description);
123
            if  (description instanceof TaxonDescription){
124
                parent.setOrAddTaxon(((TaxonDescription)description).getTaxon());
196
        for(KeyTaxon taxon: taxaCovered){
197
            if  (taxon.taxon instanceof Taxon){
198
                parent.setOrAddTaxon(taxon.taxon);
125 199
            }else{
126 200
                //FIXME handle other descriptions like specimen descriptions better
127 201
                if (parentStatement!=null){
128 202
                    String statementString = parentStatement.getLabelText(Language.DEFAULT());
129
                    if (statementString !=null && description != null){
130
                        String label = statementString + " --> " + description.getTitleCache();
203
                    if (statementString !=null && taxon.specimen != null){
204
                        String label = statementString + " --> " + taxon.specimen.getTitleCache();
131 205
                        parentStatement.putLabel(Language.DEFAULT(), label);
132 206
                    }
133 207
                }
......
140 214
     * each one of these might correspond to one child
141 215
     */
142 216
    private void handleCategorialFeature(PolytomousKeyNode parent, List<Feature> featuresLeft,
143
            Set<DescriptionBase<?>> taxaCovered,
217
            Set<KeyTaxon> taxaCovered,
144 218
            Feature winnerFeature, boolean taxaDiscriminatedInPreviousStep) {
145 219

  
146
        Map<Set<DescriptionBase<?>>,Boolean> reuseWinner = new HashMap<>();
220
        Map<Set<KeyTaxon>,Boolean> reuseWinner = new HashMap<>();
147 221

  
148 222
        Set<State> states = getAllStates(winnerFeature, taxaCovered);
149 223
		// a map is created, the key being the set of taxa that present the state(s) stored in the corresponding value
150 224
        // this key represents a single branch in the decision tree
151
		Map<Set<DescriptionBase<?>>, List<State>> taxonStatesMap
225
		Map<Set<KeyTaxon>, List<State>> taxonStatesMap
152 226
		        = determineCategoricalStates(states, winnerFeature, taxaCovered);
153 227

  
154 228
		if (taxonStatesMap.size()<=1){
......
173 247
			taxonStatesMap = handleMerge(taxaCovered, winnerFeature, reuseWinner, taxonStatesMap);
174 248
		}
175 249

  
176
		List<Set<DescriptionBase<?>>> sortedKeys = sortKeys(taxonStatesMap);
250
		List<Set<KeyTaxon>> sortedKeys = sortKeys(taxonStatesMap);
177 251

  
178
		for (Set<DescriptionBase<?>> newTaxaCovered : sortedKeys){
252
		for (Set<KeyTaxon> newTaxaCovered : sortedKeys){
179 253
		    //handle each branch
180 254
		    handleCategoricalBranch(parent, featuresLeft, taxaCovered, winnerFeature,
181 255
                    reuseWinner, taxonStatesMap, newTaxaCovered);
......
183 257

  
184 258
    }
185 259

  
186
    private Map<Set<DescriptionBase<?>>, List<State>> handleMerge(Set<DescriptionBase<?>> taxaCovered,
187
            Feature winnerFeature, Map<Set<DescriptionBase<?>>, Boolean> reuseWinner,
188
            Map<Set<DescriptionBase<?>>, List<State>> taxonStatesMap) {
260
    private Map<Set<KeyTaxon>, List<State>> handleMerge(Set<KeyTaxon> taxaCovered,
261
            Feature winnerFeature, Map<Set<KeyTaxon>, Boolean> reuseWinner,
262
            Map<Set<KeyTaxon>, List<State>> taxonStatesMap) {
189 263

  
190 264
        // creates a map between the different states of the winnerFeature and the sets of states "incompatible" with them
191 265
        Map<State,Set<State>> exclusions = new HashMap<>();
......
205 279
    }
206 280

  
207 281
    private void handleCategoricalBranch(PolytomousKeyNode parent, List<Feature> featuresLeft,
208
            Set<DescriptionBase<?>> taxaCovered,
209
            Feature winnerFeature, Map<Set<DescriptionBase<?>>, Boolean> reuseWinner,
210
            Map<Set<DescriptionBase<?>>, List<State>> taxonStatesMap, Set<DescriptionBase<?>> newTaxaCovered) {
282
            Set<KeyTaxon> taxaCovered,
283
            Feature winnerFeature, Map<Set<KeyTaxon>, Boolean> reuseWinner,
284
            Map<Set<KeyTaxon>, List<State>> taxonStatesMap, Set<KeyTaxon> newTaxaCovered) {
211 285

  
212 286
        Set<Feature> featuresAdded = new HashSet<>();
213 287
        boolean areTheTaxaDiscriminated = false;
......
306 380
        return statementLabel.toString();
307 381
    }
308 382

  
309
    private Map<Set<DescriptionBase<?>>, List<State>> renewTaxonStatesMap(Map<Set<DescriptionBase<?>>, List<State>> taxonStatesMap) {
310
        Map<Set<DescriptionBase<?>>, List<State>> result = new HashMap<>();
311
        for (Map.Entry<Set<DescriptionBase<?>>, List<State>> entry : taxonStatesMap.entrySet()){
383
    private Map<Set<KeyTaxon>, List<State>> renewTaxonStatesMap(Map<Set<KeyTaxon>, List<State>> taxonStatesMap) {
384
        Map<Set<KeyTaxon>, List<State>> result = new HashMap<>();
385
        for (Map.Entry<Set<KeyTaxon>, List<State>> entry : taxonStatesMap.entrySet()){
312 386
            result.put(entry.getKey(), entry.getValue());
313 387
        }
314 388
        return result;
315 389
    }
316 390

  
317
    private List<Set<DescriptionBase<?>>> sortKeys(Map<Set<DescriptionBase<?>>, List<State>> taxonStatesMap) {
391
    private List<Set<KeyTaxon>> sortKeys(Map<Set<KeyTaxon>, List<State>> taxonStatesMap) {
318 392
        //for now this is a dummy sorting
319
        List<Map.Entry<Set<DescriptionBase<?>>, List<State>>> sortedEntries = new ArrayList<>();
393
        List<Map.Entry<Set<KeyTaxon>, List<State>>> sortedEntries = new ArrayList<>();
320 394
        sortedEntries.addAll(taxonStatesMap.entrySet());
321 395

  
322 396
        sortedEntries.sort(entryComparator);
323
        List<Set<DescriptionBase<?>>> result = new ArrayList<>();
324
        for (Map.Entry<Set<DescriptionBase<?>>, List<State>> entry : sortedEntries){
397
        List<Set<KeyTaxon>> result = new ArrayList<>();
398
        for (Map.Entry<Set<KeyTaxon>, List<State>> entry : sortedEntries){
325 399
            result.add(entry.getKey());
326 400
        }
327 401
        return result;
......
340 414
        return 0;
341 415
    };
342 416

  
343
    private static final Comparator<? super Entry<Set<DescriptionBase<?>>, List<State>>> entryComparator =  (a,b)-> {
417
    private static final Comparator<? super Entry<Set<KeyTaxon>, List<State>>> entryComparator =  (a,b)-> {
344 418
        if (a.getKey().size()!=b.getKey().size()){
345 419
            //order by number of taxa covered
346 420
            return b.getKey().size() - a.getKey().size();
......
357 431
                    return result;
358 432
                }
359 433
            }
360
            //TODO compare keys (sets of descriptionBase)
434
            //TODO compare keys (sets of KeyTaxon)
361 435
//            for (int i = 0; i < a.getKey().size(); i++){
362 436
//                Object stateA = a.getKey().getUuid;
363 437
//                State stateB = a.getKey().get(i);
......
370 444
        }
371 445
    };
372 446

  
373
    private Set<State> getAllStates(Feature winnerFeature, Set<DescriptionBase<?>> taxaCovered) {
447
    private Set<State> getAllStates(Feature feature, Set<KeyTaxon> taxaCovered) {
448
        //TODO handle modifier
374 449
        Set<State> states = new HashSet<>();
375
        for (DescriptionBase<?> td : taxaCovered){
376
            for (DescriptionElementBase deb : td.getElements()) {
377
                if (deb.getFeature().equals(winnerFeature)) {
378
                    List<StateData> stateDatas = CdmBase.deproxy(deb, CategoricalData.class).getStateData();
379

  
380
                    for (StateData sd : stateDatas){
381
                        states.add(sd.getState());
382
                    }
450
        for (KeyTaxon taxon : taxaCovered){
451
            Set<CategoricalData> cdSet = taxon.getCategoricalData(feature);
452
            for (CategoricalData cd : cdSet){
453
                List<StateData> stateDatas = cd.getStateData();
454
                for (StateData sd : stateDatas){
455
                    states.add(sd.getState());
383 456
                }
384 457
            }
385 458
        }
386 459
        return states;
387 460
    }
388 461

  
389
    private DescriptionElementBase getDescriptionElementForFeature(Feature winnerFeature, DescriptionBase<?> td) {
390
        DescriptionElementBase result = null;
391
        for (DescriptionElementBase deb : td.getElements()) {
392
        	if (deb.getFeature().equals(winnerFeature)) {
393
        	    result = deb;
394
            }
395
        }
396
        return result;
397
    }
398

  
399 462
    private void handleQuantitativeData(PolytomousKeyNode parent, List<Feature> featuresLeft,
400
            Set<DescriptionBase<?>> taxaCovered, Map<Feature, Float> quantitativeFeaturesThresholds,
463
            Set<KeyTaxon> taxaCovered, Map<Feature, Float> quantitativeFeaturesThresholds,
401 464
            Feature winnerFeature, boolean taxaDiscriminatedInPreviousStep) {
402 465

  
403 466
        // first, get the threshold
......
405 468
        StringBuilder unit = new StringBuilder();
406 469
        // then determine which taxa are before and which are after this threshold (dichotomy)
407 470
        //in order to create the children of the parent node
408
        List<Set<DescriptionBase<?>>> quantitativeStates = determineQuantitativeStates(threshold, winnerFeature, taxaCovered, unit);
409
        // thus the list contains two sets of DescriptionBase, the first corresponding to
471
        List<Set<KeyTaxon>> quantitativeStates = determineQuantitativeStates(threshold, winnerFeature, taxaCovered, unit);
472
        // thus the list contains two sets of KeyTaxon, the first corresponding to
410 473
        //those before, the second to those after the threshold
411 474
        for (int i=0; i<2; i++) {
412 475
        	handleQuantitativeBranch(parent, featuresLeft, taxaCovered, winnerFeature, threshold, unit,
......
416 479
    }
417 480

  
418 481
    private void handleQuantitativeBranch(PolytomousKeyNode parent, List<Feature> featuresLeft,
419
            Set<DescriptionBase<?>> taxaCovered, Feature winnerFeature, float threshold, StringBuilder unit,
420
            List<Set<DescriptionBase<?>>> quantitativeStates, int i) {
482
            Set<KeyTaxon> taxaCovered, Feature winnerFeature, float threshold, StringBuilder unit,
483
            List<Set<KeyTaxon>> quantitativeStates, int i) {
421 484
        String sign;
422
        Set<DescriptionBase<?>> newTaxaCovered = quantitativeStates.get(i);
485
        Set<KeyTaxon> newTaxaCovered = quantitativeStates.get(i);
423 486
        if (i==0){
424 487
        	sign = before; // the first element of the list corresponds to taxa before the threshold
425 488
        } else {
......
441 504
        }
442 505
    }
443 506

  
444
    private Feature computeScores(List<Feature> featuresLeft, Set<DescriptionBase<?>> taxaCovered,
507
    private Feature computeScores(List<Feature> featuresLeft, Set<KeyTaxon> taxaCovered,
445 508
            Map<Feature, Float> quantitativeFeaturesThresholds) {
446 509
        Map<Feature,Float> scoreMap = featureScores(featuresLeft, taxaCovered, quantitativeFeaturesThresholds);
447 510
        dependenciesScores(scoreMap, featuresLeft, taxaCovered, quantitativeFeaturesThresholds);
......
462 525
	 * "onlyApplicableIf" or "InapplicableIf", the feature it depends can be chosen in order to build a better key.
463 526
	 */
464 527
	private void dependenciesScores(Map<Feature,Float> scoreMap, List<Feature> featuresLeft,
465
	        Set<DescriptionBase<?>> coveredTaxa, Map<Feature,Float> quantitativeFeaturesThresholds){
528
	        Set<KeyTaxon> coveredTaxa, Map<Feature,Float> quantitativeFeaturesThresholds){
466 529

  
467 530
	    //TODO maybe we need to do this recursive?
468 531

  
......
512 575
	 * @return <code>true</code>, if all taxa covered by the new branch include all states of the clique.
513 576
	 * <code>false</code> otherwise.
514 577
	 */
515
	private void mergeBranches(List<State> clique, Map<Set<DescriptionBase<?>>, List<State>> taxonStatesMap, Map<Set<DescriptionBase<?>>, Boolean> reuseWinner){
578
	private void mergeBranches(List<State> clique, Map<Set<KeyTaxon>, List<State>> taxonStatesMap, Map<Set<KeyTaxon>, Boolean> reuseWinner){
516 579

  
517 580
	    boolean isExact = true;
518 581
	    if (clique.size()<=1){
519 582
	        return;
520 583
	    }
521
	    Map.Entry<Set<DescriptionBase<?>>,List<State>> firstBranch = null;
522
	    List<Set<DescriptionBase<?>>> tdToDelete = new ArrayList<>();
523
	    for (Map.Entry<Set<DescriptionBase<?>>, List<State>> branch : taxonStatesMap.entrySet()){
584
	    Map.Entry<Set<KeyTaxon>,List<State>> firstBranch = null;
585
	    List<Set<KeyTaxon>> tdToDelete = new ArrayList<>();
586
	    for (Map.Entry<Set<KeyTaxon>, List<State>> branch : taxonStatesMap.entrySet()){
524 587
		    boolean stateFound = false;
525 588
			// looks for one state of the clique in this branch
526 589
			for(State state : clique){
......
549 612
			}
550 613
		}
551 614
		// once this is done, the branches merged to the first one are deleted
552
		for (Set<DescriptionBase<?>> td : tdToDelete){
615
		for (Set<KeyTaxon> td : tdToDelete){
553 616
			taxonStatesMap.remove(td);
554 617
		}
555 618
		if (!isExact && firstBranch != null){
......
612 675
	 * @param taxaCovered the base of taxa considered
613 676
	 * @return
614 677
	 */
615
	private Map<Set<DescriptionBase<?>>,List<State>> determineCategoricalStates(
616
	        Set<State> states, Feature feature, Set<DescriptionBase<?>> taxaCovered){
678
	private Map<Set<KeyTaxon>,List<State>> determineCategoricalStates(
679
	        Set<State> states, Feature feature, Set<KeyTaxon> taxaCovered){
617 680

  
618
	    Map<Set<DescriptionBase<?>>, List<State>> childrenStatesMap = new HashMap<>();
681
	    Map<Set<KeyTaxon>, List<State>> childrenStatesMap = new HashMap<>();
619 682
	    List<State> statesDone = new ArrayList<>(); // the list of states already used
620 683

  
621 684
		for (State state : states){ // for each state
622 685
			statesDone.add(state);
623
			Set<DescriptionBase<?>> newTaxaCovered = taxaByFeatureState(feature, state, taxaCovered); //gets which taxa present this state
686
			Set<KeyTaxon> newTaxaCovered = taxaByFeatureState(feature, state, taxaCovered); //gets which taxa present this state
624 687
			List<State> statesOfTaxa = childrenStatesMap.get(newTaxaCovered);
625 688
			if (statesOfTaxa == null) { // if no states are associated to these taxa, create a new list
626 689
				statesOfTaxa = new ArrayList<>();
......
635 698
	/**
636 699
	 * Returns the list of taxa from previously covered taxa, which have the state featureState for the given feature
637 700
	 */
638
	private Set<DescriptionBase<?>> taxaByFeatureState(Feature feature, State featureState, Set<DescriptionBase<?>> taxaCovered){
639
		Set<DescriptionBase<?>> newCoveredTaxa = new HashSet<>();
640
		for (DescriptionBase<?> td : taxaCovered){
641
			Set<DescriptionElementBase> elements = td.getElements();
642
			for (DescriptionElementBase deb : elements){
643
				if (deb.isInstanceOf(CategoricalData.class)) {
644
					if (deb.getFeature().equals(feature)) {
645
						List<StateData> stateDatas = CdmBase.deproxy(deb, CategoricalData.class).getStateData();
646
						for (StateData sd : stateDatas) {
647
							if (sd.getState().equals(featureState)){
648
								newCoveredTaxa.add(td);
649
							}
650
						}
651
					}
652
				}
701
	private Set<KeyTaxon> taxaByFeatureState(Feature feature, State featureState, Set<KeyTaxon> taxaCovered){
702
		Set<KeyTaxon> newCoveredTaxa = new HashSet<>();
703
		for (KeyTaxon td : taxaCovered){
704
			for (CategoricalData cd : td.getCategoricalData(feature)){
705
			    List<StateData> stateDatas = cd.getStateData();
706
                for (StateData sd : stateDatas) {
707
                    if (sd.getState().equals(featureState)){
708
                        newCoveredTaxa.add(td);
709
                    }
710
                }
653 711
			}
654 712
		}
655 713
		return newCoveredTaxa;
......
659 717
	 * This function returns the feature with the highest score. However, if several features have the same score
660 718
	 * the one which leads to less options is chosen (this way, the key is easier to read).
661 719
	 */
662
	private Feature lessStatesWinner(Map<Feature,Float> scores, Set<DescriptionBase<?>> taxaCovered){
720
	private Feature lessStatesWinner(Map<Feature,Float> scores, Set<KeyTaxon> taxaCovered){
663 721
		int nTaxa = taxaCovered.size();
664 722
		if (nTaxa==1) {
665 723
            return null;
......
686 744
			return bestFeatures.get(0);
687 745
		}
688 746
		else { // else choose the one with less states
689
			int lessStates=-1;
747
			int lessStates = -1;
690 748
			int numberOfDifferentStates=-1;
691 749
			for (Feature feature : bestFeatures){
692 750
				if (feature.isSupportsCategoricalData()){
693 751
					Set<State> differentStates = new HashSet<>();
694
					for (DescriptionBase<?> td : taxaCovered){
695
						DescriptionElementBase deb = getDescriptionElementByFeature(td, feature);
696
						if (deb!=null && deb.isInstanceOf(CategoricalData.class)) {
697
						    CategoricalData catdat = (CategoricalData)deb;
698
							List<StateData> stateDatas = catdat.getStateData();
699
							for (StateData sd : stateDatas) {
700
								differentStates.add(sd.getState());
701
							}
752
					for (KeyTaxon taxon : taxaCovered){
753
						Set<CategoricalData> cds = taxon.getCategoricalData(feature);
754
						Set<StateData> allStateData = getStateData(cds);
755
						for (StateData sd : allStateData) {
756
							differentStates.add(sd.getState());
702 757
						}
703 758
					}
704 759
					numberOfDifferentStates=differentStates.size();
......
730 785
	/**
731 786
	 * This function fills the map of features (keys) with their respecting scores (values)
732 787
	 */
733
	private Map<Feature,Float> featureScores(List<Feature> featuresLeft, Set<DescriptionBase<?>> coveredTaxa, Map<Feature,Float> quantitativeFeaturesThresholds){
788
	private Map<Feature,Float> featureScores(List<Feature> featuresLeft, Set<KeyTaxon> coveredTaxa, Map<Feature,Float> quantitativeFeaturesThresholds){
734 789
		Map<Feature,Float> scoreMap = new HashMap<>();
735 790
		for (Feature feature : featuresLeft){
736 791
			if (feature.isSupportsCategoricalData()) {
......
746 801
	/**
747 802
	 * Since Quantitative features do not have states, unlike Categorical ones, this function determines which taxa,
748 803
	 * for a given quantitative feature, present either a lower or higher value than a given threshold.
749
	 * It returns two Sets of DescriptionBase, one with the taxa under this threshold (taxaBefore) and another one
804
	 * It returns two Sets of {@link KeyTaxon}, one with the taxa under this threshold (taxaBefore) and another one
750 805
	 * with the taxa over (taxaAfter).
751 806
	 */
752
	private List<Set<DescriptionBase<?>>> determineQuantitativeStates (Float threshold, Feature feature,
753
	        Set<DescriptionBase<?>> taxa, StringBuilder unit){
807
	private List<Set<KeyTaxon>> determineQuantitativeStates (Float threshold, Feature feature,
808
	        Set<KeyTaxon> taxa, StringBuilder unit){
754 809

  
755
	    List<Set<DescriptionBase<?>>> list = new ArrayList<>();
756
		Set<DescriptionBase<?>> taxaBefore = new HashSet<>();
757
		Set<DescriptionBase<?>> taxaAfter = new HashSet<>();
810
	    List<Set<KeyTaxon>> list = new ArrayList<>();
811
		Set<KeyTaxon> taxaBefore = new HashSet<>();
812
		Set<KeyTaxon> taxaAfter = new HashSet<>();
758 813
		list.add(taxaBefore);
759 814
		list.add(taxaAfter);
760
		for (DescriptionBase<?> td : taxa){
761
			Set<DescriptionElementBase> elements = td.getElements();
762
			for (DescriptionElementBase deb : elements){
763
				if (deb.getFeature().equals(feature)) {
764
					if (deb.isInstanceOf(QuantitativeData.class)) {
765
						QuantitativeData qd = (QuantitativeData)deb;
766
						if (unit.toString().equals("") && qd.getUnit()!=null && qd.getUnit().getLabel()!=null){
767
							unit.append(" " + qd.getUnit().getLabel());
768
						}
769
						Set<StatisticalMeasurementValue> values = qd.getStatisticalValues();
770
						for (StatisticalMeasurementValue smv : values){
771
							StatisticalMeasure type = smv.getType();
772
							//TODO DONT FORGET sample size, MEAN etc
773
							if (type.isMax() || type.isTypicalUpperBoundary() || type.isAverage() || type.isExactValue()) {
774
								if (smv.getValue()>threshold){
775
									taxaAfter.add(td);
776
								}
777
							}
778
							if (type.isMin() || type.isTypicalLowerBoundary() || type.isAverage() || type.isExactValue()) {
779
								if (smv.getValue()<=threshold){
780
									taxaBefore.add(td);
781
								}
782
							}
783
						}
784
					}
785
				}
786
			}
815
		for (KeyTaxon td : taxa){
816
		    for (QuantitativeData qd : td.getQuantitativeData(feature)){
817
		        if (unit.toString().equals("") && qd.getUnit()!=null && qd.getUnit().getLabel()!=null){
818
                    unit.append(" " + qd.getUnit().getLabel());
819
                }
820
                Set<StatisticalMeasurementValue> values = qd.getStatisticalValues();
821
                for (StatisticalMeasurementValue smv : values){
822
                    StatisticalMeasure type = smv.getType();
823
                    //TODO DONT FORGET sample size, MEAN etc
824
                    if (type.isMax() || type.isTypicalUpperBoundary() || type.isAverage() || type.isExactValue()) {
825
                        if (smv.getValue()>threshold){
826
                            taxaAfter.add(td);
827
                        }
828
                    }
829
                    if (type.isMin() || type.isTypicalLowerBoundary() || type.isAverage() || type.isExactValue()) {
830
                        if (smv.getValue()<=threshold){
831
                            taxaBefore.add(td);
832
                        }
833
                    }
834
                }
835
		    }
787 836
		}
788 837
		return list;
789 838
	}
......
791 840
	/**
792 841
	 * This function returns the score of a quantitative feature.
793 842
	 */
794
	private float quantitativeFeatureScore(Feature feature, Set<DescriptionBase<?>> coveredTaxa, Map<Feature,Float> quantitativeFeaturesThresholds){
795
		List<Float> allValues = new ArrayList<>();
796

  
797
		for (DescriptionBase<?> td : coveredTaxa){
798
			Set<DescriptionElementBase> elements = td.getElements();
799
			for (DescriptionElementBase deb : elements){
800
				if (deb.getFeature().equals(feature)) {
801
					if (deb.isInstanceOf(QuantitativeData.class)) {
802
					    QuantitativeData qd = (QuantitativeData)deb;
803
						computeAllValues(allValues, qd);
804
					}
805
				}
806
			}
843
	private float quantitativeFeatureScore(Feature feature, Set<KeyTaxon> coveredTaxa, Map<Feature,Float> quantitativeFeaturesThresholds){
844

  
845
	    List<Float> allValues = new ArrayList<>();
846
		for (KeyTaxon td : coveredTaxa){
847
		    for (QuantitativeData qd : td.getQuantitativeData(feature)){
848
		        computeAllValues(allValues, qd);
849
		    }
807 850
		}
808 851
		int i,j;
809 852
		float threshold=0;
......
895 938
	 * by comparing each taxon with each other. If the feature
896 939
	 * discriminates a single pair of taxa the score is increased.
897 940
	 */
898
	private float categoricalFeatureScore(Feature feature, Set<DescriptionBase<?>> coveredTaxa){
941
	private float categoricalFeatureScore(Feature feature, Set<KeyTaxon> coveredTaxa){
899 942
		int i,j;
900 943
		float score =0;
901 944
		float power=0;
902
		DescriptionBase<?>[] coveredTaxaArray = coveredTaxa.toArray(new DescriptionBase[coveredTaxa.size()]); // I did not figure a better way to do this
945
		KeyTaxon[] coveredTaxaArray = coveredTaxa.toArray(new KeyTaxon[coveredTaxa.size()]); // I did not figure a better way to do this
903 946
		for (i=0 ; i<coveredTaxaArray.length; i++){
904
			DescriptionElementBase taxon1 = getDescriptionElementByFeature(coveredTaxaArray[i], feature);
947
			Set<CategoricalData> cd1 = coveredTaxaArray[i].getCategoricalData(feature);
905 948
			for (j=i+1 ; j< coveredTaxaArray.length ; j++){
906
				DescriptionElementBase taxon2 = getDescriptionElementByFeature(coveredTaxaArray[j], feature);
907
				power = defaultCategoricalPower(taxon1, taxon2);
949
			    Set<CategoricalData> cd2 = coveredTaxaArray[j].getCategoricalData(feature);
950
				power = defaultCategoricalPower(cd1, cd2);
908 951
				score = score + power;
909 952
			}
910 953
		}
......
946 989
		for (TermNode<Feature> fn : node.getChildNodes()){
947 990
			createDependencies(fn);
948 991
		}
992
		System.out.println(featureDependencies);
949 993
	}
950 994

  
951 995
	/**
952 996
	 * This function fills the exclusions map.
953 997
	 */
954
	private float computeExclusions(Feature feature, Set<DescriptionBase<?>> coveredTaxa, Map<State,Set<State>> exclusions){
998
	private float computeExclusions(Feature feature, Set<KeyTaxon> coveredTaxa, Map<State,Set<State>> exclusions){
955 999
		//unclear what the score is fore here
956 1000
		float score =0;
957 1001
		float power=0;
958
		DescriptionBase<?>[] fixedOrderTaxa = coveredTaxa.toArray(new DescriptionBase[coveredTaxa.size()]); // I did not figure a better way to do this
1002
		KeyTaxon[] fixedOrderTaxa = coveredTaxa.toArray(new KeyTaxon[coveredTaxa.size()]); // I did not figure a better way to do this
959 1003
		for (int i=0 ; i<fixedOrderTaxa.length; i++){
960
			DescriptionElementBase deb1 = getDescriptionElementByFeature(fixedOrderTaxa[i], feature);
1004
		    Set<CategoricalData> cd1 = fixedOrderTaxa[i].getCategoricalData(feature);
961 1005

  
962 1006
			for (int j=i+1 ; j< fixedOrderTaxa.length ; j++){
963
				DescriptionElementBase deb2 = getDescriptionElementByFeature(fixedOrderTaxa[j], feature);
1007
				Set<CategoricalData> cd2 = fixedOrderTaxa[j].getCategoricalData(feature);
964 1008

  
965 1009
//				System.out.println(deb1 + "; " +deb2);
966
				power = defaultCategoricalPower(deb1, deb2);
1010
				power = defaultCategoricalPower(cd1, cd2);
967 1011
				score = score + power;
968 1012
				if (power >= 1.0){ // if there is no state in common between deb1 and deb2
969 1013

  
970
					CategoricalData cat1 = (CategoricalData)deb1;
971
					CategoricalData cat2 = (CategoricalData)deb2;
972
					for (StateData statedata1 : cat1.getStateData()){
1014
					for (StateData statedata1 : getStateData(cd1)){
973 1015
						State state1 = statedata1.getState();
974 1016
						if (!exclusions.containsKey(state1)){
975 1017
							exclusions.put(state1, new HashSet<>());
976 1018
						}
977
						for (StateData statedata2 : cat2.getStateData()){
1019
						for (StateData statedata2 : getStateData(cd2)){
978 1020
							State state2 = statedata2.getState();
979 1021
							if (!exclusions.containsKey(state2)){
980 1022
								exclusions.put(state2, new HashSet<>());
......
989 1031
		return score;
990 1032
	}
991 1033

  
992
	/**
993
     * finds the DescriptionElementBase corresponding to the concerned Feature
994
     *
995
     * FIXME: handle multiple occurrences of a feature in the description
996
     */
997
    private DescriptionElementBase getDescriptionElementByFeature(DescriptionBase<?> description, Feature feature) {
998
        for (DescriptionElementBase deb : description.getElements()){
999
            if (deb.getFeature().equals(feature)){
1000
                return deb; // finds the DescriptionElementBase corresponding to the concerned Feature
1001
            }
1034
    private Set<StateData> getStateData(Set<CategoricalData> cds) {
1035
        Set<StateData> result = new HashSet<>();
1036
        for (CategoricalData cd : cds){
1037
            result.addAll(cd.getStateData());
1002 1038
        }
1003
        return null;
1039
        return result;
1004 1040
    }
1005 1041

  
1006 1042
    /**
1007
	 * Returns the score between two DescriptionElementBase. If one of them is null, returns -1.
1008
	 * If they are not of the same type (Categorical) returns 0.
1009
	 */
1010
	private float defaultCategoricalPower(DescriptionElementBase deb1, DescriptionElementBase deb2){
1011
		if (deb1==null || deb2==null) {
1012
			return 0; //TODO what if the two taxa don't have this feature in common ?
1013
		}
1014
		if ((deb1.isInstanceOf(CategoricalData.class))&&(deb2.isInstanceOf(CategoricalData.class))) {
1015
			return defaultCategoricalPower((CategoricalData)deb1, (CategoricalData)deb2);
1016
		} else {
1017
            return 0;
1018
        }
1019
	}
1020

  
1021
	/**
1022 1043
	 * Returns the score of a categorical feature.
1023 1044
	 */
1024
	private float defaultCategoricalPower(CategoricalData cd1, CategoricalData cd2){
1045
	private float defaultCategoricalPower(Set<CategoricalData> cd1, Set<CategoricalData> cd2){
1046
	    if (cd1 == null || cd2 == null ||cd1.isEmpty() || cd2.isEmpty()){
1047
	        return 0;
1048
	    }
1049

  
1025 1050
	    //FIXME see defaultCategoricalPower_old for additional code on dependencies
1026 1051
	    //which has been removed here for now but might be important
1027 1052
        //Now I moved it to #createDependencies. Therefore the below is maybe not needed
1028 1053
	    //anymore but superfluent.
1029 1054
	    //But the implementation at createDependencies is not fully correct yet
1030 1055
	    //so I keep it here for now.
1031
	    if (!featureDependencies.containsKey(cd1.getFeature())){
1032
            featureDependencies.put(cd1.getFeature(), new HashSet<>());
1033
        }
1034
        for (State state : getStates(cd1)){
1035
            if (iAifDependencies.get(state)!=null) {
1036
                featureDependencies.get(cd1.getFeature()).addAll(iAifDependencies.get(state));
1037
            }
1038
            if (oAifDependencies.get(state)!=null) {
1039
                featureDependencies.get(cd1.getFeature()).addAll(oAifDependencies.get(state));
1040
            }
1041
        }
1056

  
1057
	    for (CategoricalData cd : cd1){
1058
	        if (!featureDependencies.containsKey(cd.getFeature())){
1059
	            featureDependencies.put(cd.getFeature(), new HashSet<>());
1060
	        }
1061
	        for (State state : getStates(cd)){
1062
	            if (iAifDependencies.get(state)!=null) {
1063
	                featureDependencies.get(cd.getFeature()).addAll(iAifDependencies.get(state));
1064
	            }
1065
	            if (oAifDependencies.get(state)!=null) {
1066
	                featureDependencies.get(cd.getFeature()).addAll(oAifDependencies.get(state));
1067
	            }
1068
	        }
1069
	    }
1042 1070

  
1043 1071
	    //get all states of both categorical data
1044 1072
        Set<State> states = getStates(cd1, cd2);
1045 1073
        if (states.size() == 0){
1046 1074
            return 0;
1047
        }else if (cd1.getStateData().isEmpty() || cd2.getStateData().isEmpty()){
1048
            return 0;
1049 1075
        }
1050 1076

  
1051 1077
	    int nDiscriminative = 0;
......
1061 1087
	    return result;
1062 1088
	}
1063 1089

  
1064
    private boolean hasState(State state, CategoricalData cd) {
1090
    private boolean hasState(State state, Set<CategoricalData> cds) {
1065 1091
        boolean result = false;
1066
        for (StateData stateData:cd.getStateData()){
1067
            result |= state.equals(stateData.getState());
1092
        for (CategoricalData cd : cds){
1093
            for (StateData stateData:cd.getStateData()){
1094
                result |= state.equals(stateData.getState());
1095
            }
1068 1096
        }
1069 1097
        return result;
1070 1098
    }
1071 1099

  
1072
    private Set<State> getStates(CategoricalData... cds) {
1100
    private Set<State> getStates(Set<CategoricalData> cdset1, Set<CategoricalData> cdset2) {
1073 1101
        Set<State> result = new HashSet<>();
1074
        for (CategoricalData cd : cds){
1075
            List<StateData> states = cd.getStateData();
1076
            for (StateData state:states){
1077
                result.add(state.getState());
1078
            }
1102
        result.addAll(getStates(cdset1));
1103
        result.addAll(getStates(cdset2));
1104
        return result;
1105
    }
1106

  
1107
    private Set<State> getStates(Set<CategoricalData> cdset) {
1108
        Set<State> result = new HashSet<>();
1109
        for (CategoricalData cd : cdset){
1110
            result.addAll(getStates(cd));
1111
        }
1112
        return result;
1113
    }
1114

  
1115
    private Set<State> getStates(CategoricalData cd) {
1116
        //TODO handle modifier
1117
        Set<State> result = new HashSet<>();
1118
        List<StateData> states = cd.getStateData();
1119
        for (StateData state:states){
1120
            result.add(state.getState());
1079 1121
        }
1080 1122
        return result;
1081 1123
    }

Also available in: Unified diff

Add picture from clipboard (Maximum size: 40 MB)