1 package eu
.etaxonomy
.cdm
.strategy
.generate
;
3 import java
.util
.ArrayList
;
4 import java
.util
.HashMap
;
5 import java
.util
.HashSet
;
6 import java
.util
.Iterator
;
11 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
12 import eu
.etaxonomy
.cdm
.model
.description
.CategoricalData
;
13 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
14 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
15 import eu
.etaxonomy
.cdm
.model
.description
.KeyStatement
;
16 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKey
;
17 import eu
.etaxonomy
.cdm
.model
.description
.PolytomousKeyNode
;
18 import eu
.etaxonomy
.cdm
.model
.description
.QuantitativeData
;
19 import eu
.etaxonomy
.cdm
.model
.description
.State
;
20 import eu
.etaxonomy
.cdm
.model
.description
.StateData
;
21 import eu
.etaxonomy
.cdm
.model
.description
.StatisticalMeasure
;
22 import eu
.etaxonomy
.cdm
.model
.description
.StatisticalMeasurementValue
;
23 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
24 import eu
.etaxonomy
.cdm
.model
.term
.FeatureNode
;
25 import eu
.etaxonomy
.cdm
.model
.term
.FeatureTree
;
31 public class PolytomousKeyGenerator
{
33 static int level
=-1; // global variable needed by the printTree function in order to store the level which is being printed
34 private PolytomousKey polytomousKey
; // the Identification Key
35 private List
<Feature
> features
; // the features used to generate the key
36 private Set
<TaxonDescription
> taxa
; // the base of taxa
38 private boolean merge
=true; // if this boolean is set to true, branches of the tree will be merged if the corresponding states can be used together without decreasing their score
40 private FeatureTree dependenciesTree
; // the tree containing the dependencies between states and features (InapplicableIf and OnlyApplicableIf)
41 private Map
<State
,Set
<Feature
>> iIdependencies
= new HashMap
<>(); // map of a set of Features (value) inapplicables if a State (key) is present
42 private Map
<State
,Set
<Feature
>> oAIdependencies
= new HashMap
<>(); // map of a set of Features (value) only applicables if a State (key) is present
43 private Map
<Feature
,Set
<Feature
>> featureDependencies
= new HashMap
<>(); // map of all the sets of features (values) which have dependencies with states of other features (keys)
44 private boolean dependenciesON
= true; // if this boolean is true, the dependencies are taken into account
48 * These strings are used for generating the statements of the key.
50 private String before
="<";
51 private String after
=">";
52 private String separator
= " or ";
55 * Sets the features used to generate the key.
59 public void setFeatures(List
<Feature
> featuresList
){
60 this.features
= featuresList
;
64 * Sets the base of taxa.
68 public void setTaxa(Set
<TaxonDescription
> taxaSet
){
73 * Sets the tree containing the dependencies between states and features.
77 public void setDependencies(FeatureTree tree
){
78 this.dependenciesTree
= tree
;
82 * Allows the generator to use the dependencies given by the function "setDependencies".
84 public void turnDependenciesON(){
85 this.dependenciesON
= true;
89 * Prevent the generator from using dependencies.
91 public void turnDependenciesOFF(){
92 this.dependenciesON
= false;
96 * Allows the generator to merge branches if the corresponding states can be used together without diminishing their score.
98 public void mergeModeON(){
103 * Prevent the generator from merging branches (apart from those leading to the same set of taxa).
105 public void mergeModeOFF(){
111 * Initializes the function buildBranches() with the starting parameters in order to build the key
114 polytomousKey
= PolytomousKey
.NewInstance();
115 PolytomousKeyNode root
= polytomousKey
.getRoot();
116 buildBranches(root
,features
,taxa
,true);
121 * Creates the key and prints it
123 public PolytomousKey
invoke(){
124 if (dependenciesON
&& dependenciesTree
!=null){
125 checkDependencies(dependenciesTree
.getRoot());
128 List
<PolytomousKeyNode
> rootlist
= new ArrayList
<PolytomousKeyNode
>();
129 rootlist
.add(polytomousKey
.getRoot());
130 // String spaces = new String();
131 // printTree(rootlist,spaces);
132 // System.out.println(paths.toString());
133 return polytomousKey
;
138 * Recursive function that builds the branches of the identification key (FeatureTree)
140 * @param father the node considered
141 * @param featuresLeft List of features that can be used at this point
142 * @param taxaCovered the taxa left at this point (i.e. that verify the description corresponding to the path leading to this node)
143 * @param taxaAreTheSame if in the previous level the taxa discriminated are the same, this boolean is set to true, thus if the taxa, again, are not discriminated at this level the function stops
145 private void buildBranches(PolytomousKeyNode father
, List
<Feature
> featuresLeft
, Set
<TaxonDescription
> taxaCovered
, boolean taxaDiscriminated
){
146 boolean childrenExist
= false; // boolean indicating if this node has children, if not, it means we reached a leaf and instead of adding a
147 // question or statement to it, we must add the list of taxa discriminated at this point
149 // These sets respectively store the features inapplicables and applicables at this point in the key
150 Set
<Feature
> innapplicables
= new HashSet
<Feature
>();
151 Set
<Feature
> applicables
= new HashSet
<Feature
>();
153 if (taxaCovered
.size()>1) {
154 // this map stores the thresholds giving the best dichotomy of taxa for the corresponding feature supporting quantitative data
155 Map
<Feature
,Float
> quantitativeFeaturesThresholds
= new HashMap
<Feature
,Float
>();
156 // the scores of the different features are calculated, the thresholds in the same time
157 Map
<Feature
,Float
> scoreMap
= featureScores(featuresLeft
, taxaCovered
, quantitativeFeaturesThresholds
);
158 dependenciesScores(scoreMap
,featuresLeft
, taxaCovered
, quantitativeFeaturesThresholds
);
159 // the feature with the best score becomes the one corresponding to the current node
160 Feature winnerFeature
= lessStatesWinner(scoreMap
, taxaCovered
);
161 // the feature is removed from the list of features available to build the next level of the tree
162 featuresLeft
.remove(winnerFeature
);
165 /************** either the feature supports quantitative data... **************/
166 // NB: in this version, "quantitative features" are dealt with in a dichotomous way
167 if (winnerFeature
!=null && winnerFeature
.isSupportsQuantitativeData()) {
168 // first, get the threshold
169 float threshold
= quantitativeFeaturesThresholds
.get(winnerFeature
);
171 StringBuilder unit
= new StringBuilder("");
172 // then determine which taxa are before and which are after this threshold (dichotomy) in order to create the children of the father node
173 List
<Set
<TaxonDescription
>> quantitativeStates
= determineQuantitativeStates(threshold
,winnerFeature
,taxaCovered
,unit
);
174 // thus the list contains two sets of TaxonDescription, the first corresponding to those before, the second to those after the threshold
176 Set
<TaxonDescription
> newTaxaCovered
= quantitativeStates
.get(i
);
178 sign
= before
; // the first element of the list corresponds to taxa before the threshold
180 sign
= after
; // the second to those after
182 if (newTaxaCovered
.size()>0 && !((newTaxaCovered
.size()==taxaCovered
.size()) && !taxaDiscriminated
)){ // if the taxa are discriminated compared to those of the father node, a child is created
183 childrenExist
= true;
184 PolytomousKeyNode son
= PolytomousKeyNode
.NewInstance();
185 son
.setFeature(winnerFeature
);
186 // old code used when PolytomousKey extended FeatureTree
187 // Representation question = new Representation(null, " " + sign + " " + threshold +unit,null, Language.DEFAULT()); // the question attribute is used to store the state of the feature
188 // son.addQuestion(question);
189 KeyStatement statement
= KeyStatement
.NewInstance( " " + sign
+ " " + threshold
+unit
); // the question attribute is used to store the state of the feature
190 son
.setStatement(statement
);
191 father
.addChild(son
);
192 boolean areTheTaxaDiscriminated
;
193 if (newTaxaCovered
.size()==taxaCovered
.size()) {
194 areTheTaxaDiscriminated
= false;
196 areTheTaxaDiscriminated
= true;
198 buildBranches(son
,featuresLeft
, newTaxaCovered
,areTheTaxaDiscriminated
);
203 /************** ...or it supports categorical data. **************/
204 // "categorical features" may present several different states, each one of these might correspond to one child
205 List
<State
> statesDone
= new ArrayList
<State
>(); // the list of states already used
207 if (winnerFeature
!=null && winnerFeature
.isSupportsCategoricalData()) {
208 for (TaxonDescription td
: taxaCovered
){
209 DescriptionElementBase debConcerned
= null;
211 // first, get the right DescriptionElementBase
212 for (DescriptionElementBase deb
: td
.getElements()) {
213 if (deb
.getFeature().equals(winnerFeature
)) {
218 if (debConcerned
!=null) {
219 // a map is created, the key being the set of taxa that present the state(s) stored in the corresponding value
220 Map
<Set
<TaxonDescription
>,List
<State
>> taxonStatesMap
= determineCategoricalStates(statesDone
,(CategoricalData
)debConcerned
,winnerFeature
,taxaCovered
);
221 // if the merge option is ON, branches with the same discriminative power will be merged (see Vignes & Lebbes, 1989)
223 // creates a map between the different states of the winnerFeature and the sets of states "incompatible" with them
224 Map
<State
,Set
<State
>> exclusions
= new HashMap
<State
,Set
<State
>>();
225 featureScoreAndMerge(winnerFeature
,taxaCovered
,exclusions
);
227 while (!exclusions
.isEmpty()){
228 // looks for the largest clique, i.e. the state with less exclusions
229 List
<State
> clique
= returnBestClique(exclusions
);
230 // then merges the corresponding branches
231 mergeBranches(clique
,taxonStatesMap
);
234 if (taxonStatesMap
!=null && !taxonStatesMap
.isEmpty()) {
235 for (Map
.Entry
<Set
<TaxonDescription
>,List
<State
>> entry
: taxonStatesMap
.entrySet()){
236 Set
<TaxonDescription
> newTaxaCovered
= entry
.getKey();
237 List
<State
> listOfStates
= entry
.getValue();
238 if ((newTaxaCovered
.size()>0) && !((newTaxaCovered
.size()==taxaCovered
.size()) && !taxaDiscriminated
)){ // if the taxa are discriminated compared to those of the father node, a child is created
239 childrenExist
= true;
240 PolytomousKeyNode son
= PolytomousKeyNode
.NewInstance();
241 StringBuilder questionLabel
= new StringBuilder();
242 numberOfStates
= listOfStates
.size()-1;
243 for (State state
: listOfStates
) {
245 // if the dependencies are considered, removes and adds the right features from/to the list of features left
246 // these features are stored in order to be put back again when the current branch is finished
247 if (iIdependencies
.get(state
)!= null) {
248 innapplicables
.addAll(iIdependencies
.get(state
));
250 if (oAIdependencies
.get(state
)!= null) {
251 applicables
.addAll(oAIdependencies
.get(state
));
253 for (Feature feature
: innapplicables
) {
254 featuresLeft
.remove(feature
);
256 for (Feature feature
: applicables
) {
257 featuresLeft
.add(feature
);
260 questionLabel
.append(state
.getLabel());
261 if (listOfStates
.lastIndexOf(state
)!=numberOfStates
)
263 questionLabel
.append(separator
); // append a separator after each state except for the last one
266 // old code used when PolytomousKey extended FeatureTree
267 // Representation question = new Representation(null, questionLabel.toString(),null, Language.DEFAULT());
268 // son.addQuestion(question);
269 KeyStatement statement
= KeyStatement
.NewInstance(questionLabel
.toString());
270 son
.setStatement(statement
);
271 son
.setFeature(winnerFeature
);
272 father
.addChild(son
);
273 featuresLeft
.remove(winnerFeature
); // unlike for quantitative features, once a categorical one has been used, it cannot be reused in the same branch
274 boolean areTheTaxaDiscriminated
;
275 if (newTaxaCovered
.size()==taxaCovered
.size()) {
276 areTheTaxaDiscriminated
= true;
278 areTheTaxaDiscriminated
= false;
280 buildBranches(son
,featuresLeft
, newTaxaCovered
,areTheTaxaDiscriminated
);
287 // the features depending on other features are added/removed to/from the features left once the branch is done
289 for (Feature feature
: innapplicables
) {
290 featuresLeft
.add(feature
);
292 for (Feature feature
: applicables
) {
293 featuresLeft
.remove(feature
);
296 // the winner features are put back to the features left once the branch is done
297 featuresLeft
.add(winnerFeature
);
299 // if the node is a leaf, the statement contains the list of taxa to which the current leaf leads.
301 KeyStatement fatherStatement
= father
.getStatement();
302 if (fatherStatement
!=null){
303 String statementString
= fatherStatement
.getLabelText(Language
.DEFAULT());
304 if (statementString
!=null && taxaCovered
!= null){
305 String label
= statementString
+ " --> " + taxaCovered
.toString();
306 fatherStatement
.putLabel(Language
.DEFAULT(), label
);
315 * Dependencies can be seen as a hierarchy.
316 * If the dependencies are on, this function calculates the score of all children features and give the highest score
317 * of these to their father (of course, if its score is lower). This way, if a feature has a good score but is
318 * "onlyApplicableIf" or "InapplicableIf", the feature it depends can be chosen in order to build a better key.
321 * @param featuresLeft
323 * @param quantitativeFeaturesThresholds
325 private void dependenciesScores(Map
<Feature
,Float
> scoreMap
, List
<Feature
> featuresLeft
,Set
<TaxonDescription
> coveredTaxa
, Map
<Feature
,Float
> quantitativeFeaturesThresholds
){
326 // first copies the list of features left
327 List
<Feature
> pseudoFeaturesLeft
= featuresLeft
.subList(0, featuresLeft
.size()-1);
328 // then adds all the features which depend on features contained in the list
329 for (Feature feature
: pseudoFeaturesLeft
){
330 if (featureDependencies
.containsKey(feature
)){
331 for (Feature feature2
: featureDependencies
.get(feature
)){
332 if (!pseudoFeaturesLeft
.contains(feature2
)){
333 pseudoFeaturesLeft
.add(feature2
);
338 // then calculates the scores of all features that have been added
339 Map
<Feature
,Float
> newScoreMap
= featureScores(pseudoFeaturesLeft
, coveredTaxa
, quantitativeFeaturesThresholds
);
340 for (Feature feature
: featureDependencies
.keySet()){
341 if (newScoreMap
.containsKey(feature
)){
342 for (Feature feature2
: featureDependencies
.get(feature
)){
343 if (newScoreMap
.containsKey(feature2
)) {
344 // if a feature has a better than the "father" feature it depends on
345 if (newScoreMap
.get(feature2
)>newScoreMap
.get(feature
)){
346 // the "father" feature gets its score
347 scoreMap
.put(feature
, newScoreMap
.get(feature2
));
357 * This function merges branches of the key belonging to the same clique
358 * (http://en.wikipedia.org/wiki/Clique_%28graph_theory%29)
360 * @param clique the list of States linked together (i.e. if merged have the same score)
361 * @param taxonStatesMap the map between the taxa (keys) and the states (keys) leading to them
363 private void mergeBranches(List
<State
> clique
, Map
<Set
<TaxonDescription
>, List
<State
>> taxonStatesMap
){
365 Map
.Entry
<Set
<TaxonDescription
>,List
<State
>> firstBranch
=null;
366 List
<Set
<TaxonDescription
>> tdToDelete
= new ArrayList
<Set
<TaxonDescription
>>();
368 if (clique
.size()>1){
369 Iterator
<Map
.Entry
<Set
<TaxonDescription
>, List
<State
>>> it1
= taxonStatesMap
.entrySet().iterator();
370 while (it1
.hasNext()){
371 Map
.Entry
<Set
<TaxonDescription
>,List
<State
>> branch
= it1
.next();
372 Iterator
<State
> stateIterator
= clique
.iterator();
374 // looks for one state of the clique in this branch
375 while(stateIterator
.hasNext() && stateFound
!=true) {
376 State state
= stateIterator
.next();
377 if (branch
.getValue().contains(state
)) {
381 // if one state is found...
382 if (stateFound
==true){
383 // ...for the first time, the branch becomes the one to which the others will be merged
384 if (firstBranch
==null){
387 // ... else the branch is merged to the first one.
389 firstBranch
.getKey().addAll(branch
.getKey());
390 firstBranch
.getValue().addAll(branch
.getValue());
391 tdToDelete
.add(branch
.getKey());
395 // once this is done, the branches merged to the first one are deleted
396 for (Set
<TaxonDescription
> td
: tdToDelete
){
397 taxonStatesMap
.remove(td
);
405 * This function looks for the largest clique of States
406 * (http://en.wikipedia.org/wiki/Clique_%28graph_theory%29)
407 * from the map of exclusions.
409 * @param exclusions map a state (key) to the set of states (value) which can not be merged with it without decreasing its score.
412 private List
<State
> returnBestClique (Map
<State
,Set
<State
>> exclusions
){
413 int bestNumberOfExclusions
=-1;;
414 int numberOfExclusions
;
415 List
<State
> clique
= new ArrayList
<State
>();
417 // looks for the largest clique, i.e. the state with less exclusions
418 State bestState
=null;
419 for (Iterator
<Map
.Entry
<State
,Set
<State
>>> it1
= exclusions
.entrySet().iterator() ; it1
.hasNext();){
420 Map
.Entry
<State
,Set
<State
>> pair
= it1
.next();
421 numberOfExclusions
= pair
.getValue().size();
422 if ((bestNumberOfExclusions
==-1) || numberOfExclusions
<bestNumberOfExclusions
) {
423 bestNumberOfExclusions
=numberOfExclusions
;
424 bestState
= pair
.getKey();
428 clique
.add(bestState
);
429 exclusions
.remove(bestState
);
431 boolean isNotExcluded
;
432 // then starts building the clique by adding the states which do not exclude each other
433 for (Iterator
<Map
.Entry
<State
,Set
<State
>>> iterator
= exclusions
.entrySet().iterator() ; iterator
.hasNext();){
434 Map
.Entry
<State
,Set
<State
>> pair
= iterator
.next();
435 isNotExcluded
= true;
436 for (State state
: clique
) {
437 if (pair
.getValue().contains(state
)) {
438 isNotExcluded
= false;
442 clique
.add(pair
.getKey());
445 for (State state
: clique
) {
446 exclusions
.remove(state
);
453 * fills a map of the sets of taxa (key) presenting the different states (value) for the given feature.
455 * @param statesDone the list of states already done for this feature
456 * @param categoricalData the element from which the states are extracted
457 * @param feature the feature corresponding to the CategoricalData
458 * @param taxaCovered the base of taxa considered
461 private Map
<Set
<TaxonDescription
>,List
<State
>> determineCategoricalStates(List
<State
> statesDone
, CategoricalData categoricalData
, Feature feature
, Set
<TaxonDescription
> taxaCovered
){
462 Map
<Set
<TaxonDescription
>,List
<State
>> childrenStatesMap
= new HashMap
<Set
<TaxonDescription
>,List
<State
>>();
464 List
<StateData
> stateDatas
= categoricalData
.getStateData();
466 List
<State
> states
= new ArrayList
<State
>();
467 for (StateData sd
: stateDatas
){
468 states
.add(sd
.getState());
471 for (State featureState
: states
){ // for each state
472 if(!statesDone
.contains(featureState
)){ // if the state hasn't already be considered
473 statesDone
.add(featureState
);
474 Set
<TaxonDescription
> newTaxaCovered
= whichTaxa(feature
,featureState
,taxaCovered
); //gets which taxa present this state
475 List
<State
> newStates
= childrenStatesMap
.get(newTaxaCovered
);
476 if (newStates
==null) { // if no states are associated to these taxa, create a new list
477 newStates
= new ArrayList
<State
>();
478 childrenStatesMap
.put(newTaxaCovered
,newStates
);
480 newStates
.add(featureState
); // then add the state to the list
483 return childrenStatesMap
;
488 * returns the list of taxa from previously covered taxa, which have the state featureState for the given feature
491 * @param featureState
495 private Set
<TaxonDescription
> whichTaxa(Feature feature
, State featureState
, Set
<TaxonDescription
> taxaCovered
){
496 Set
<TaxonDescription
> newCoveredTaxa
= new HashSet
<TaxonDescription
>();
497 for (TaxonDescription td
: taxaCovered
){
498 Set
<DescriptionElementBase
> elements
= td
.getElements();
499 for (DescriptionElementBase deb
: elements
){
500 if (deb
.isInstanceOf(CategoricalData
.class)) {
501 if (deb
.getFeature().equals(feature
)) {
502 List
<StateData
> stateDatas
= ((CategoricalData
)deb
).getStateData();
503 for (StateData sd
: stateDatas
) {
504 if (sd
.getState().equals(featureState
)){
505 newCoveredTaxa
.add(td
);
512 return newCoveredTaxa
;
517 * This function returns the feature with the highest score. However, if several features have the same score
518 * the one wich leads to less options is chosen (this way, the key is easier to read).
525 private Feature
lessStatesWinner(Map
<Feature
,Float
> scores
, Set
<TaxonDescription
> taxaCovered
){
526 int nTaxa
= taxaCovered
.size();
530 float meanScore
= defaultMeanScore(nTaxa
);
531 float bestScore
= nTaxa
*nTaxa
;
532 List
<Feature
> bestFeatures
= new ArrayList
<Feature
>(); // if ever different features have the best score, they are put in this list
533 Feature bestFeature
= null;
534 Iterator
<Map
.Entry
<Feature
,Float
>> it
= scores
.entrySet().iterator();
536 while (it
.hasNext()){
537 Map
.Entry
<Feature
,Float
> pair
= it
.next();
538 if (pair
.getValue()!=null){
539 newScore
= Math
.abs(pair
.getValue()-meanScore
);// the best score is the closest to the score (meanScore here)
540 // a feature would have if it divided the taxa in two equal parts
541 if (newScore
< bestScore
){
542 bestFeatures
.clear();
543 bestFeatures
.add(pair
.getKey());
544 bestScore
= newScore
;
545 }else if (newScore
==bestScore
){
546 bestFeatures
.add(pair
.getKey());
550 if (bestFeatures
.size()==1) { // if there is only one feature with the best score, pick it
551 return bestFeatures
.get(0);
553 else { // else choose the one with less states
555 int numberOfDifferentStates
=-1;
556 for (Feature feature
: bestFeatures
){
557 if (feature
.isSupportsCategoricalData()){
558 Set
<State
> differentStates
= new HashSet
<State
>();
559 for (TaxonDescription td
: taxaCovered
){
560 Set
<DescriptionElementBase
> elements
= td
.getElements();
561 for (DescriptionElementBase deb
: elements
){
562 if (deb
.isInstanceOf(CategoricalData
.class)) {
563 CategoricalData catdat
= (CategoricalData
)deb
;
564 if (catdat
.getFeature().equals(feature
)) {
565 List
<StateData
> stateDatas
= catdat
.getStateData();
566 for (StateData sd
: stateDatas
) {
567 differentStates
.add(sd
.getState());
573 numberOfDifferentStates
=differentStates
.size();
574 }else if (feature
.isSupportsQuantitativeData()){
575 numberOfDifferentStates
=2;
577 if (lessStates
==-1 || numberOfDifferentStates
<lessStates
){
578 lessStates
=numberOfDifferentStates
;
579 bestFeature
= feature
;
588 * This function calculates the mean score, i.e. the score a feature dividing the taxa in two equal parts
594 private float defaultMeanScore(int nTaxa
){
597 for (i
=1;i
<nTaxa
;i
++){
598 score
= score
+ Math
.round(i
+1/2);
605 * This function fills the map of features (keys) with their respecting scores (values)
607 * @param featuresLeft
609 * @param quantitativeFeaturesThresholds
612 private Map
<Feature
,Float
> featureScores(List
<Feature
> featuresLeft
, Set
<TaxonDescription
> coveredTaxa
, Map
<Feature
,Float
> quantitativeFeaturesThresholds
){
613 Map
<Feature
,Float
> scoreMap
= new HashMap
<Feature
,Float
>();
614 for (Feature feature
: featuresLeft
){
615 if (feature
.isSupportsCategoricalData()) {
616 scoreMap
.put(feature
, categoricalFeatureScore(feature
,coveredTaxa
));
618 if (feature
.isSupportsQuantitativeData()){
619 scoreMap
.put(feature
, quantitativeFeatureScore(feature
,coveredTaxa
, quantitativeFeaturesThresholds
));
626 * Since Quantitative features do not have states, unlike Categorical ones, this function determines which taxa,
627 * for a given quantitative feature, present either a lower or higher value than a given threshold.
628 * It returns two Sets of TaxonDescription, one with the taxa under this threshold (taxaBefore) and another one
629 * with the taxa over (taxaAfter).
637 private List
<Set
<TaxonDescription
>> determineQuantitativeStates (Float threshold
, Feature feature
, Set
<TaxonDescription
> taxa
, StringBuilder unit
){
638 List
<Set
<TaxonDescription
>> list
= new ArrayList
<Set
<TaxonDescription
>>();
639 Set
<TaxonDescription
> taxaBefore
= new HashSet
<TaxonDescription
>();
640 Set
<TaxonDescription
> taxaAfter
= new HashSet
<TaxonDescription
>();
641 list
.add(taxaBefore
);
643 for (TaxonDescription td
: taxa
){
644 Set
<DescriptionElementBase
> elements
= td
.getElements();
645 for (DescriptionElementBase deb
: elements
){
646 if (deb
.getFeature().equals(feature
)) {
647 if (deb
.isInstanceOf(QuantitativeData
.class)) {
648 QuantitativeData qd
= (QuantitativeData
)deb
;
649 if (unit
.toString().equals("") && qd
.getUnit()!=null && qd
.getUnit().getLabel()!=null){
650 unit
.append(" " + qd
.getUnit().getLabel());
652 Set
<StatisticalMeasurementValue
> values
= qd
.getStatisticalValues();
653 for (StatisticalMeasurementValue smv
: values
){
654 StatisticalMeasure type
= smv
.getType();
655 //TODO DONT FORGET sample size, MEAN etc
656 if (type
.equals(StatisticalMeasure
.MAX()) || type
.equals(StatisticalMeasure
.TYPICAL_UPPER_BOUNDARY()) || type
.equals(StatisticalMeasure
.AVERAGE())) {
657 if (smv
.getValue()>threshold
){
661 if (type
.equals(StatisticalMeasure
.MIN()) || type
.equals(StatisticalMeasure
.TYPICAL_LOWER_BOUNDARY()) || type
.equals(StatisticalMeasure
.AVERAGE())) {
662 if (smv
.getValue()<=threshold
){
677 * This function returns the score of a quantitative feature.
681 * @param quantitativeFeaturesThresholds
684 private float quantitativeFeatureScore(Feature feature
, Set
<TaxonDescription
> coveredTaxa
, Map
<Feature
,Float
> quantitativeFeaturesThresholds
){
685 List
<Float
> allValues
= new ArrayList
<Float
>();
686 boolean lowerboundarypresent
;
687 boolean upperboundarypresent
;
688 float lowerboundary
=0;
689 float upperboundary
=0;
690 for (TaxonDescription td
: coveredTaxa
){
691 Set
<DescriptionElementBase
> elements
= td
.getElements();
692 for (DescriptionElementBase deb
: elements
){
693 if (deb
.getFeature().equals(feature
)) {
694 if (deb
.isInstanceOf(QuantitativeData
.class)) {
695 QuantitativeData qd
= (QuantitativeData
)deb
;
696 Set
<StatisticalMeasurementValue
> values
= qd
.getStatisticalValues();
697 lowerboundarypresent
= false;
698 upperboundarypresent
= false;
699 for (StatisticalMeasurementValue smv
: values
){
700 StatisticalMeasure type
= smv
.getType();
701 // DONT FORGET sample size, MEAN etc
702 if (type
.equals(StatisticalMeasure
.MAX())) {
703 upperboundary
= smv
.getValue();
704 upperboundarypresent
=true;
706 if (type
.equals(StatisticalMeasure
.MIN())) {
707 lowerboundary
= smv
.getValue();
708 lowerboundarypresent
=true;
710 if (type
.equals(StatisticalMeasure
.TYPICAL_UPPER_BOUNDARY()) && upperboundarypresent
==false) {
711 upperboundary
= smv
.getValue();
712 upperboundarypresent
=true;
714 if (type
.equals(StatisticalMeasure
.TYPICAL_LOWER_BOUNDARY()) && lowerboundarypresent
==false) {
715 lowerboundary
= smv
.getValue();
716 lowerboundarypresent
=true;
718 if (type
.equals(StatisticalMeasure
.AVERAGE()) && upperboundarypresent
==false && lowerboundarypresent
==false) {
719 lowerboundary
= smv
.getValue();
720 upperboundary
= lowerboundary
;
721 lowerboundarypresent
=true;
722 upperboundarypresent
=true;
725 if (lowerboundarypresent
&& upperboundarypresent
) {
726 allValues
.add(lowerboundary
);
727 allValues
.add(upperboundary
);
735 float bestThreshold
=0;
736 int difference
=allValues
.size();
737 int differenceMin
= difference
;
740 for (i
=0;i
<allValues
.size()/2;i
++) {
741 threshold
= allValues
.get(i
*2+1);
744 for (j
=0;j
<allValues
.size()/2;j
++) {
745 if (allValues
.get(j
*2+1)<=threshold
){
748 if (allValues
.get(j
*2)>threshold
){
752 difference
= Math
.abs(taxaBefore
-taxaAfter
);
753 if (difference
<differenceMin
){
754 differenceMin
=difference
;
755 bestThreshold
= threshold
;
758 quantitativeFeaturesThresholds
.put(feature
, bestThreshold
);
759 int defaultQuantitativeScore
=0;
760 for (i
=0;i
<taxaBefore
;i
++) {
761 defaultQuantitativeScore
+= taxaAfter
- i
;
763 return (defaultQuantitativeScore
);
769 * This function returns the score of a categorical feature.
775 private float categoricalFeatureScore(Feature feature
, Set
<TaxonDescription
> coveredTaxa
){
779 TaxonDescription
[] coveredTaxaArray
= coveredTaxa
.toArray(new TaxonDescription
[coveredTaxa
.size()]); // I did not figure a better way to do this
780 for (i
=0 ; i
<coveredTaxaArray
.length
; i
++){
781 Set
<DescriptionElementBase
> elements1
= coveredTaxaArray
[i
].getElements();
782 DescriptionElementBase deb1
= null;
783 for (DescriptionElementBase deb
: elements1
){
784 if (deb
.getFeature().equals(feature
)){
785 deb1
= deb
; // finds the DescriptionElementBase corresponding to the concerned Feature
788 for (j
=i
+1 ; j
< coveredTaxaArray
.length
; j
++){
789 Set
<DescriptionElementBase
> elements2
= coveredTaxaArray
[j
].getElements();
790 DescriptionElementBase deb2
= null;
791 for (DescriptionElementBase deb
: elements2
){
792 if (deb
.getFeature().equals(feature
)){
793 deb2
= deb
; // finds the DescriptionElementBase corresponding to the concerned Feature
796 power
= defaultPower(deb1
,deb2
);
797 score
= score
+ power
;
805 * This recursive function fills the maps of dependencies by reading the tree containing the dependencies.
809 private void checkDependencies(FeatureNode
<Feature
> node
){
810 if (node
.getOnlyApplicableIf()!=null){
811 Set
<State
> addToOAI
= node
.getOnlyApplicableIf();
812 for (State state
: addToOAI
){
813 if (oAIdependencies
.containsKey(state
)) {
814 oAIdependencies
.put(state
, new HashSet
<Feature
>());
816 oAIdependencies
.get(state
).add(node
.getTerm());
819 if (node
.getInapplicableIf()!=null){
820 Set
<State
> addToiI
= node
.getInapplicableIf();
821 for (State state
: addToiI
){
822 if (iIdependencies
.containsKey(state
)) {
823 iIdependencies
.put(state
, new HashSet
<Feature
>());
825 iIdependencies
.get(state
).add(node
.getTerm());
828 if (node
.getChildNodes()!=null) {
829 for (FeatureNode fn
: node
.getChildNodes()){
830 checkDependencies(fn
);
838 * This function fills the exclusions map.
845 private float featureScoreAndMerge(Feature feature
, Set
<TaxonDescription
> coveredTaxa
, Map
<State
,Set
<State
>> exclusions
){
849 TaxonDescription
[] coveredTaxaArray
= coveredTaxa
.toArray(new TaxonDescription
[coveredTaxa
.size()]); // I did not figure a better way to do this
850 for (i
=0 ; i
<coveredTaxaArray
.length
; i
++){
851 Set
<DescriptionElementBase
> elements1
= coveredTaxaArray
[i
].getElements();
852 DescriptionElementBase deb1
= null;
853 for (DescriptionElementBase deb
: elements1
){
854 if (deb
.getFeature().equals(feature
)){
855 deb1
= deb
; // finds the DescriptionElementBase corresponding to the concerned Feature
858 for (j
=i
+1 ; j
< coveredTaxaArray
.length
; j
++){
859 Set
<DescriptionElementBase
> elements2
= coveredTaxaArray
[j
].getElements();
860 DescriptionElementBase deb2
= null;
861 for (DescriptionElementBase deb
: elements2
){
862 if (deb
.getFeature().equals(feature
)){
863 deb2
= deb
; // finds the DescriptionElementBase corresponding to the concerned Feature
866 power
= defaultPower(deb1
,deb2
);
867 score
= score
+ power
;
868 if (power
>0) // if there is no state in common between deb1 and deb2
870 CategoricalData cat1
= (CategoricalData
)deb1
;
871 CategoricalData cat2
= (CategoricalData
)deb2
;
872 for (StateData statedata1
: cat1
.getStateData()){
873 State state1
= statedata1
.getState();
874 if (!exclusions
.containsKey(state1
)){
875 exclusions
.put(state1
, new HashSet
<State
>());
877 for (StateData statedata2
: cat2
.getStateData()){
878 State state2
= statedata2
.getState();
879 if (!exclusions
.containsKey(state2
)){
880 exclusions
.put(state2
, new HashSet
<State
>());
882 exclusions
.get(state1
).add(state2
);
883 exclusions
.get(state2
).add(state1
);
895 * Returns the score between two DescriptionElementBase. If one of them is null, returns -1.
896 * If they are not of the same type (Categorical) returns 0.
902 private float defaultPower(DescriptionElementBase deb1
, DescriptionElementBase deb2
){
903 if (deb1
==null || deb2
==null) {
904 return -1; //what if the two taxa don't have this feature in common ?
906 if ((deb1
.isInstanceOf(CategoricalData
.class))&&(deb2
.isInstanceOf(CategoricalData
.class))) {
907 return defaultCategoricalPower((CategoricalData
)deb1
, (CategoricalData
)deb2
);
914 * Returns the score of a categorical feature.
920 private float defaultCategoricalPower(CategoricalData deb1
, CategoricalData deb2
){
921 List
<StateData
> states1
= deb1
.getStateData();
922 List
<StateData
> states2
= deb2
.getStateData();
923 boolean bool
= false;
924 Iterator
<StateData
> stateData1Iterator
= states1
.iterator() ;
925 // while (!bool && stateData1Iterator.hasNext()) {
926 // Iterator<StateData> stateData2Iterator = states2.iterator() ;
927 // StateData stateData1 = stateData1Iterator.next();
928 // while (!bool && stateData2Iterator.hasNext()) {
929 // bool = stateData1.getState().equals(stateData2Iterator.next().getState()); // checks if the states are the same
932 // one point each time two taxa can be discriminated for a given feature
934 boolean checkFeature
= false;
936 if (!featureDependencies
.containsKey(deb1
.getFeature())){
937 featureDependencies
.put(deb1
.getFeature(), new HashSet
<Feature
>());
941 while (stateData1Iterator
.hasNext()) {
942 Iterator
<StateData
> stateData2Iterator
= states2
.iterator() ;
943 StateData stateData1
= stateData1Iterator
.next();
944 State state1
= stateData1
.getState();
946 if (iIdependencies
.get(state1
)!=null) {
947 featureDependencies
.get(deb1
.getFeature()).addAll(iIdependencies
.get(state1
));
949 if (oAIdependencies
.get(state1
)!=null) {
950 featureDependencies
.get(deb1
.getFeature()).addAll(oAIdependencies
.get(state1
));
953 while (stateData2Iterator
.hasNext()) {
954 State state2
= stateData1
.getState();
955 bool
= bool
|| state1
.equals(state2
); // checks if the states are the same
967 // old code used when PolytomousKey extended FeatureTree
968 // private void printTree(List<PolytomousKeyNode> fnodes, String spaces){
969 // if (!fnodes.isEmpty()){
971 // int levelcopy = level;
974 // String equals = " = ";
975 // String quantitative = "";
976 // String newspaces = spaces.concat("\t");
977 // for (PolytomousKeyNode fnode : fnodes){
978 // if (fnode.getFeature()!=null) {
979 // String state = null;
980 // if (fnode.getQuestion(Language.DEFAULT())!=null) state = fnode.getQuestion(Language.DEFAULT()).getLabel();
981 // if (fnode.getFeature().isSupportsQuantitativeData()) delimiter = quantitative;
982 // else delimiter = equals;
983 // System.out.println(newspaces + levelcopy + " : " + j + " " + fnode.getFeature().getLabel() + delimiter + state);
986 // else { // TODO never read ?
987 // if (fnode.getQuestion(Language.DEFAULT())!=null) System.out.println(newspaces + "-> " + fnode.getQuestion(Language.DEFAULT()).getLabel());
989 // printTree(fnode.getChildren(),newspaces);