|
1 |
package eu.etaxonomy.cdm.strategy.generate;
|
|
2 |
|
|
3 |
import java.util.ArrayList;
|
|
4 |
import java.util.HashMap;
|
|
5 |
import java.util.HashSet;
|
|
6 |
import java.util.Iterator;
|
|
7 |
import java.util.List;
|
|
8 |
import java.util.Map;
|
|
9 |
import java.util.Set;
|
|
10 |
|
|
11 |
import eu.etaxonomy.cdm.model.common.Language;
|
|
12 |
import eu.etaxonomy.cdm.model.description.CategoricalData;
|
|
13 |
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
|
14 |
import eu.etaxonomy.cdm.model.description.Feature;
|
|
15 |
import eu.etaxonomy.cdm.model.description.FeatureNode;
|
|
16 |
import eu.etaxonomy.cdm.model.description.FeatureTree;
|
|
17 |
import eu.etaxonomy.cdm.model.description.KeyStatement;
|
|
18 |
import eu.etaxonomy.cdm.model.description.PolytomousKey;
|
|
19 |
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
|
|
20 |
import eu.etaxonomy.cdm.model.description.QuantitativeData;
|
|
21 |
import eu.etaxonomy.cdm.model.description.State;
|
|
22 |
import eu.etaxonomy.cdm.model.description.StateData;
|
|
23 |
import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
|
|
24 |
import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
|
|
25 |
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
|
26 |
|
|
27 |
/**
|
|
28 |
* @author m.venin
|
|
29 |
*
|
|
30 |
*/
|
|
31 |
public class PolytomousKeyGenerator {
|
|
32 |
|
|
33 |
static int level=-1; // global variable needed by the printTree function in order to store the level which is being printed
|
|
34 |
private PolytomousKey polytomousKey; // the Identification Key
|
|
35 |
private List<Feature> features; // the features used to generate the key
|
|
36 |
private Set<TaxonDescription> taxa; // the base of taxa
|
|
37 |
|
|
38 |
private boolean merge=true; // if this boolean is set to true, branches of the tree will be merged if the corresponding states can be used together without decreasing their score
|
|
39 |
|
|
40 |
private FeatureTree dependenciesTree; // the tree containing the dependencies between states and features (InapplicableIf and OnlyApplicableIf)
|
|
41 |
private Map<State,Set<Feature>> iIdependencies = new HashMap<State,Set<Feature>>(); // map of a set of Features (value) inapplicables if a State (key) is present
|
|
42 |
private Map<State,Set<Feature>> oAIdependencies = new HashMap<State,Set<Feature>>(); // map of a set of Features (value) only applicables if a State (key) is present
|
|
43 |
private Map<Feature,Set<Feature>> featureDependencies = new HashMap<Feature,Set<Feature>>(); // map of all the sets of features (values) which have dependencies with states of other features (keys)
|
|
44 |
private boolean dependenciesON = true; // if this boolean is true, the dependencies are taken into account
|
|
45 |
|
|
46 |
|
|
47 |
/**
|
|
48 |
* These strings are used for generating the statements of the key.
|
|
49 |
*/
|
|
50 |
private String before="<";
|
|
51 |
private String after=">";
|
|
52 |
private String separator = " or ";
|
|
53 |
|
|
54 |
/**
|
|
55 |
* Sets the features used to generate the key.
|
|
56 |
*
|
|
57 |
* @param featuresList
|
|
58 |
*/
|
|
59 |
public void setFeatures(List<Feature> featuresList){
|
|
60 |
this.features = featuresList;
|
|
61 |
}
|
|
62 |
|
|
63 |
/**
|
|
64 |
* Sets the base of taxa.
|
|
65 |
*
|
|
66 |
* @param featuresList
|
|
67 |
*/
|
|
68 |
public void setTaxa(Set<TaxonDescription> taxaSet){
|
|
69 |
this.taxa = taxaSet;
|
|
70 |
}
|
|
71 |
|
|
72 |
/**
|
|
73 |
* Sets the tree containing the dependencies between states and features.
|
|
74 |
*
|
|
75 |
* @param tree
|
|
76 |
*/
|
|
77 |
public void setDependencies(FeatureTree tree){
|
|
78 |
this.dependenciesTree = tree;
|
|
79 |
}
|
|
80 |
|
|
81 |
/**
|
|
82 |
* Allows the generator to use the dependencies given by the function "setDependencies".
|
|
83 |
*/
|
|
84 |
public void turnDependenciesON(){
|
|
85 |
this.dependenciesON = true;
|
|
86 |
}
|
|
87 |
|
|
88 |
/**
|
|
89 |
* Prevent the generator from using dependencies.
|
|
90 |
*/
|
|
91 |
public void turnDependenciesOFF(){
|
|
92 |
this.dependenciesON = false;
|
|
93 |
}
|
|
94 |
|
|
95 |
/**
|
|
96 |
* Allows the generator to merge branches if the corresponding states can be used together without diminishing their score.
|
|
97 |
*/
|
|
98 |
public void mergeModeON(){
|
|
99 |
this.merge = true;
|
|
100 |
}
|
|
101 |
|
|
102 |
/**
|
|
103 |
* Prevent the generator from merging branches (apart from those leading to the same set of taxa).
|
|
104 |
*/
|
|
105 |
public void mergeModeOFF(){
|
|
106 |
this.merge = false;
|
|
107 |
}
|
|
108 |
|
|
109 |
|
|
110 |
/**
|
|
111 |
* Initializes the function buildBranches() with the starting parameters in order to build the key
|
|
112 |
*/
|
|
113 |
private void loop(){
|
|
114 |
polytomousKey = PolytomousKey.NewInstance();
|
|
115 |
PolytomousKeyNode root = polytomousKey.getRoot();
|
|
116 |
buildBranches(root,features,taxa,true);
|
|
117 |
}
|
|
118 |
|
|
119 |
|
|
120 |
/**
|
|
121 |
* Creates the key and prints it
|
|
122 |
*/
|
|
123 |
public PolytomousKey invoke(){
|
|
124 |
if (dependenciesON && dependenciesTree!=null){
|
|
125 |
checkDependencies(dependenciesTree.getRoot());
|
|
126 |
}
|
|
127 |
loop();
|
|
128 |
List<PolytomousKeyNode> rootlist = new ArrayList<PolytomousKeyNode>();
|
|
129 |
rootlist.add(polytomousKey.getRoot());
|
|
130 |
// String spaces = new String();
|
|
131 |
// printTree(rootlist,spaces);
|
|
132 |
// System.out.println(paths.toString());
|
|
133 |
return polytomousKey;
|
|
134 |
}
|
|
135 |
|
|
136 |
|
|
137 |
/**
|
|
138 |
* Recursive function that builds the branches of the identification key (FeatureTree)
|
|
139 |
*
|
|
140 |
* @param father the node considered
|
|
141 |
* @param featuresLeft List of features that can be used at this point
|
|
142 |
* @param taxaCovered the taxa left at this point (i.e. that verify the description corresponding to the path leading to this node)
|
|
143 |
* @param taxaAreTheSame if in the previous level the taxa discriminated are the same, this boolean is set to true, thus if the taxa, again, are not discriminated at this level the function stops
|
|
144 |
*/
|
|
145 |
private void buildBranches(PolytomousKeyNode father, List<Feature> featuresLeft, Set<TaxonDescription> taxaCovered, boolean taxaDiscriminated){
|
|
146 |
boolean childrenExist = false; // boolean indicating if this node has children, if not, it means we reached a leaf and instead of adding a
|
|
147 |
// question or statement to it, we must add the list of taxa discriminated at this point
|
|
148 |
|
|
149 |
// These sets respectively store the features inapplicables and applicables at this point in the key
|
|
150 |
Set<Feature> innapplicables = new HashSet<Feature>();
|
|
151 |
Set<Feature> applicables = new HashSet<Feature>();
|
|
152 |
|
|
153 |
if (taxaCovered.size()>1) {
|
|
154 |
// this map stores the thresholds giving the best dichotomy of taxa for the corresponding feature supporting quantitative data
|
|
155 |
Map<Feature,Float> quantitativeFeaturesThresholds = new HashMap<Feature,Float>();
|
|
156 |
// the scores of the different features are calculated, the thresholds in the same time
|
|
157 |
Map<Feature,Float> scoreMap = featureScores(featuresLeft, taxaCovered, quantitativeFeaturesThresholds);
|
|
158 |
dependenciesScores(scoreMap,featuresLeft, taxaCovered, quantitativeFeaturesThresholds);
|
|
159 |
// the feature with the best score becomes the one corresponding to the current node
|
|
160 |
Feature winnerFeature = lessStatesWinner(scoreMap, taxaCovered);
|
|
161 |
// the feature is removed from the list of features available to build the next level of the tree
|
|
162 |
featuresLeft.remove(winnerFeature);
|
|
163 |
|
|
164 |
int i;
|
|
165 |
/************** either the feature supports quantitative data... **************/
|
|
166 |
// NB: in this version, "quantitative features" are dealt with in a dichotomous way
|
|
167 |
if (winnerFeature!=null && winnerFeature.isSupportsQuantitativeData()) {
|
|
168 |
// first, get the threshold
|
|
169 |
float threshold = quantitativeFeaturesThresholds.get(winnerFeature);
|
|
170 |
String sign;
|
|
171 |
StringBuilder unit= new StringBuilder("");
|
|
172 |
// then determine which taxa are before and which are after this threshold (dichotomy) in order to create the children of the father node
|
|
173 |
List<Set<TaxonDescription>> quantitativeStates = determineQuantitativeStates(threshold,winnerFeature,taxaCovered,unit);
|
|
174 |
// thus the list contains two sets of TaxonDescription, the first corresponding to those before, the second to those after the threshold
|
|
175 |
for (i=0;i<2;i++) {
|
|
176 |
Set<TaxonDescription> newTaxaCovered = quantitativeStates.get(i);
|
|
177 |
if (i==0){
|
|
178 |
sign = before; // the first element of the list corresponds to taxa before the threshold
|
|
179 |
} else {
|
|
180 |
sign = after; // the second to those after
|
|
181 |
}
|
|
182 |
if (newTaxaCovered.size()>0 && !((newTaxaCovered.size()==taxaCovered.size()) && !taxaDiscriminated)){ // if the taxa are discriminated compared to those of the father node, a child is created
|
|
183 |
childrenExist = true;
|
|
184 |
PolytomousKeyNode son = PolytomousKeyNode.NewInstance();
|
|
185 |
son.setFeature(winnerFeature);
|
|
186 |
// old code used when PolytomousKey extended FeatureTree
|
|
187 |
// Representation question = new Representation(null, " " + sign + " " + threshold +unit,null, Language.DEFAULT()); // the question attribute is used to store the state of the feature
|
|
188 |
// son.addQuestion(question);
|
|
189 |
KeyStatement statement = KeyStatement.NewInstance( " " + sign + " " + threshold +unit); // the question attribute is used to store the state of the feature
|
|
190 |
son.setStatement(statement);
|
|
191 |
father.addChild(son);
|
|
192 |
boolean areTheTaxaDiscriminated;
|
|
193 |
if (newTaxaCovered.size()==taxaCovered.size()) areTheTaxaDiscriminated = false;
|
|
194 |
else areTheTaxaDiscriminated = true;
|
|
195 |
buildBranches(son,featuresLeft, newTaxaCovered,areTheTaxaDiscriminated);
|
|
196 |
}
|
|
197 |
}
|
|
198 |
}
|
|
199 |
|
|
200 |
/************** ...or it supports categorical data. **************/
|
|
201 |
// "categorical features" may present several different states, each one of these might correspond to one child
|
|
202 |
List<State> statesDone = new ArrayList<State>(); // the list of states already used
|
|
203 |
int numberOfStates;
|
|
204 |
if (winnerFeature!=null && winnerFeature.isSupportsCategoricalData()) {
|
|
205 |
for (TaxonDescription td : taxaCovered){
|
|
206 |
DescriptionElementBase debConcerned = null;
|
|
207 |
|
|
208 |
// first, get the right DescriptionElementBase
|
|
209 |
for (DescriptionElementBase deb : td.getElements()) {
|
|
210 |
if (deb.getFeature().equals(winnerFeature)) debConcerned = deb;
|
|
211 |
}
|
|
212 |
|
|
213 |
if (debConcerned!=null) {
|
|
214 |
// a map is created, the key being the set of taxa that present the state(s) stored in the corresponding value
|
|
215 |
Map<Set<TaxonDescription>,List<State>> taxonStatesMap = determineCategoricalStates(statesDone,(CategoricalData)debConcerned,winnerFeature,taxaCovered);
|
|
216 |
// if the merge option is ON, branches with the same discriminative power will be merged (see Vignes & Lebbes, 1989)
|
|
217 |
if (merge){
|
|
218 |
// creates a map between the different states of the winnerFeature and the sets of states "incompatible" with them
|
|
219 |
Map<State,Set<State>> exclusions = new HashMap<State,Set<State>>();
|
|
220 |
featureScoreAndMerge(winnerFeature,taxaCovered,exclusions);
|
|
221 |
|
|
222 |
while (!exclusions.isEmpty()){
|
|
223 |
// looks for the largest clique, i.e. the state with less exclusions
|
|
224 |
List<State> clique = returnBestClique(exclusions);
|
|
225 |
// then merges the corresponding branches
|
|
226 |
mergeBranches(clique,taxonStatesMap);
|
|
227 |
}
|
|
228 |
}
|
|
229 |
if (taxonStatesMap!=null && !taxonStatesMap.isEmpty()) {
|
|
230 |
for (Map.Entry<Set<TaxonDescription>,List<State>> entry : taxonStatesMap.entrySet()){
|
|
231 |
Set<TaxonDescription> newTaxaCovered = entry.getKey();
|
|
232 |
List<State> listOfStates = entry.getValue();
|
|
233 |
if ((newTaxaCovered.size()>0) && !((newTaxaCovered.size()==taxaCovered.size()) && !taxaDiscriminated)){ // if the taxa are discriminated compared to those of the father node, a child is created
|
|
234 |
childrenExist = true;
|
|
235 |
PolytomousKeyNode son = PolytomousKeyNode.NewInstance();
|
|
236 |
StringBuilder questionLabel = new StringBuilder();
|
|
237 |
numberOfStates = listOfStates.size()-1;
|
|
238 |
for (State state : listOfStates) {
|
|
239 |
if (dependenciesON){
|
|
240 |
// if the dependencies are considered, removes and adds the right features from/to the list of features left
|
|
241 |
// these features are stored in order to be put back again when the current branch is finished
|
|
242 |
if (iIdependencies.get(state)!= null) innapplicables.addAll(iIdependencies.get(state));
|
|
243 |
if (oAIdependencies.get(state)!= null) applicables.addAll(oAIdependencies.get(state));
|
|
244 |
for (Feature feature : innapplicables) featuresLeft.remove(feature);
|
|
245 |
for (Feature feature : applicables) featuresLeft.add(feature);
|
|
246 |
}
|
|
247 |
questionLabel.append(state.getLabel());
|
|
248 |
if (listOfStates.lastIndexOf(state)!=numberOfStates) questionLabel.append(separator); // append a separator after each state except for the last one
|
|
249 |
}
|
|
250 |
// old code used when PolytomousKey extended FeatureTree
|
|
251 |
// Representation question = new Representation(null, questionLabel.toString(),null, Language.DEFAULT());
|
|
252 |
// son.addQuestion(question);
|
|
253 |
KeyStatement statement = KeyStatement.NewInstance(questionLabel.toString());
|
|
254 |
son.setStatement(statement);
|
|
255 |
son.setFeature(winnerFeature);
|
|
256 |
father.addChild(son);
|
|
257 |
featuresLeft.remove(winnerFeature); // unlike for quantitative features, once a categorical one has been used, it cannot be reused in the same branch
|
|
258 |
boolean areTheTaxaDiscriminated;
|
|
259 |
if (newTaxaCovered.size()==taxaCovered.size()) areTheTaxaDiscriminated = true;
|
|
260 |
else areTheTaxaDiscriminated = false;
|
|
261 |
buildBranches(son,featuresLeft, newTaxaCovered,areTheTaxaDiscriminated);
|
|
262 |
}
|
|
263 |
}
|
|
264 |
}
|
|
265 |
}
|
|
266 |
}
|
|
267 |
}
|
|
268 |
// the features depending on other features are added/removed to/from the features left once the branch is done
|
|
269 |
if (dependenciesON){
|
|
270 |
for (Feature feature : innapplicables) featuresLeft.add(feature);
|
|
271 |
for (Feature feature : applicables) featuresLeft.remove(feature);
|
|
272 |
}
|
|
273 |
// the winner features are put back to the features left once the branch is done
|
|
274 |
featuresLeft.add(winnerFeature);
|
|
275 |
}
|
|
276 |
// if the node is a leaf, the statement contains the list of taxa to which the current leaf leads.
|
|
277 |
if (!childrenExist){
|
|
278 |
KeyStatement fatherStatement = father.getStatement();
|
|
279 |
if (fatherStatement!=null){
|
|
280 |
String statementString = fatherStatement.getLabelText(Language.DEFAULT());
|
|
281 |
if (statementString !=null && taxaCovered != null){
|
|
282 |
String label = statementString + " --> " + taxaCovered.toString();
|
|
283 |
fatherStatement.putLabel(Language.DEFAULT(), label);
|
|
284 |
}
|
|
285 |
}
|
|
286 |
}
|
|
287 |
}
|
|
288 |
|
|
289 |
|
|
290 |
|
|
291 |
/**
|
|
292 |
* Dependencies can be seen as a hierarchy.
|
|
293 |
* If the dependencies are on, this function calculates the score of all children features and give the highest score
|
|
294 |
* of these to their father (of course, if its score is lower). This way, if a feature has a good score but is
|
|
295 |
* "onlyApplicableIf" or "InapplicableIf", the feature it depends can be chosen in order to build a better key.
|
|
296 |
*
|
|
297 |
* @param scoreMap
|
|
298 |
* @param featuresLeft
|
|
299 |
* @param coveredTaxa
|
|
300 |
* @param quantitativeFeaturesThresholds
|
|
301 |
*/
|
|
302 |
private void dependenciesScores(Map<Feature,Float> scoreMap, List<Feature> featuresLeft,Set<TaxonDescription> coveredTaxa, Map<Feature,Float> quantitativeFeaturesThresholds){
|
|
303 |
// first copies the list of features left
|
|
304 |
List<Feature> pseudoFeaturesLeft = featuresLeft.subList(0, featuresLeft.size()-1);
|
|
305 |
// then adds all the features which depend on features contained in the list
|
|
306 |
for (Feature feature : pseudoFeaturesLeft){
|
|
307 |
if (featureDependencies.containsKey(feature)){
|
|
308 |
for (Feature feature2 : featureDependencies.get(feature)){
|
|
309 |
if (!pseudoFeaturesLeft.contains(feature2)){
|
|
310 |
pseudoFeaturesLeft.add(feature2);
|
|
311 |
}
|
|
312 |
}
|
|
313 |
}
|
|
314 |
}
|
|
315 |
// then calculates the scores of all features that have been added
|
|
316 |
Map<Feature,Float> newScoreMap = featureScores(pseudoFeaturesLeft, coveredTaxa, quantitativeFeaturesThresholds);
|
|
317 |
for (Feature feature : featureDependencies.keySet()){
|
|
318 |
if (newScoreMap.containsKey(feature)){
|
|
319 |
for (Feature feature2 : featureDependencies.get(feature)){
|
|
320 |
if (newScoreMap.containsKey(feature2)) {
|
|
321 |
// if a feature has a better than the "father" feature it depends on
|
|
322 |
if (newScoreMap.get(feature2)>newScoreMap.get(feature)){
|
|
323 |
// the "father" feature gets its score
|
|
324 |
scoreMap.put(feature, newScoreMap.get(feature2));
|
|
325 |
}
|
|
326 |
}
|
|
327 |
}
|
|
328 |
}
|
|
329 |
}
|
|
330 |
}
|
|
331 |
|
|
332 |
|
|
333 |
/**
|
|
334 |
* This function merges branches of the key belonging to the same clique
|
|
335 |
* (http://en.wikipedia.org/wiki/Clique_%28graph_theory%29)
|
|
336 |
*
|
|
337 |
* @param clique the list of States linked together (i.e. if merged have the same score)
|
|
338 |
* @param taxonStatesMap the map between the taxa (keys) and the states (keys) leading to them
|
|
339 |
*/
|
|
340 |
private void mergeBranches(List<State> clique, Map<Set<TaxonDescription>, List<State>> taxonStatesMap){
|
|
341 |
boolean stateFound;
|
|
342 |
Map.Entry<Set<TaxonDescription>,List<State>> firstBranch=null;
|
|
343 |
List<Set<TaxonDescription>> tdToDelete = new ArrayList<Set<TaxonDescription>>();
|
|
344 |
|
|
345 |
if (clique.size()>1){
|
|
346 |
Iterator<Map.Entry<Set<TaxonDescription>, List<State>>> it1 = taxonStatesMap.entrySet().iterator();
|
|
347 |
while (it1.hasNext()){
|
|
348 |
Map.Entry<Set<TaxonDescription>,List<State>> branch = (Map.Entry<Set<TaxonDescription>, List<State>>)it1.next();
|
|
349 |
Iterator<State> stateIterator = clique.iterator();
|
|
350 |
stateFound=false;
|
|
351 |
// looks for one state of the clique in this branch
|
|
352 |
while(stateIterator.hasNext() && stateFound!=true) {
|
|
353 |
State state = stateIterator.next();
|
|
354 |
if (branch.getValue().contains(state)) {
|
|
355 |
stateFound=true;
|
|
356 |
}
|
|
357 |
}
|
|
358 |
// if one state is found...
|
|
359 |
if (stateFound==true){
|
|
360 |
// ...for the first time, the branch becomes the one to which the others will be merged
|
|
361 |
if (firstBranch==null){
|
|
362 |
firstBranch=branch;
|
|
363 |
}
|
|
364 |
// ... else the branch is merged to the first one.
|
|
365 |
else {
|
|
366 |
firstBranch.getKey().addAll(branch.getKey());
|
|
367 |
firstBranch.getValue().addAll(branch.getValue());
|
|
368 |
tdToDelete.add(branch.getKey());
|
|
369 |
}
|
|
370 |
}
|
|
371 |
}
|
|
372 |
// once this is done, the branches merged to the first one are deleted
|
|
373 |
for (Set<TaxonDescription> td : tdToDelete){
|
|
374 |
taxonStatesMap.remove(td);
|
|
375 |
}
|
|
376 |
}
|
|
377 |
}
|
|
378 |
|
|
379 |
|
|
380 |
|
|
381 |
/**
|
|
382 |
* This function looks for the largest clique of States
|
|
383 |
* (http://en.wikipedia.org/wiki/Clique_%28graph_theory%29)
|
|
384 |
* from the map of exclusions.
|
|
385 |
*
|
|
386 |
* @param exclusions map a state (key) to the set of states (value) which can not be merged with it without decreasing its score.
|
|
387 |
* @return
|
|
388 |
*/
|
|
389 |
private List<State> returnBestClique (Map<State,Set<State>> exclusions){
|
|
390 |
int bestNumberOfExclusions=-1;;
|
|
391 |
int numberOfExclusions;
|
|
392 |
List<State> clique = new ArrayList<State>();
|
|
393 |
|
|
394 |
// looks for the largest clique, i.e. the state with less exclusions
|
|
395 |
State bestState=null;
|
|
396 |
for (Iterator<Map.Entry<State,Set<State>>> it1 = exclusions.entrySet().iterator() ; it1.hasNext();){
|
|
397 |
Map.Entry<State,Set<State>> pair = (Map.Entry<State,Set<State>>)it1.next();
|
|
398 |
numberOfExclusions = pair.getValue().size();
|
|
399 |
if ((bestNumberOfExclusions==-1) || numberOfExclusions<bestNumberOfExclusions) {
|
|
400 |
bestNumberOfExclusions=numberOfExclusions;
|
|
401 |
bestState = pair.getKey();
|
|
402 |
}
|
|
403 |
}
|
|
404 |
|
|
405 |
clique.add(bestState);
|
|
406 |
exclusions.remove(bestState);
|
|
407 |
|
|
408 |
boolean isNotExcluded;
|
|
409 |
// then starts building the clique by adding the states which do not exclude each other
|
|
410 |
for (Iterator<Map.Entry<State,Set<State>>> iterator = exclusions.entrySet().iterator() ; iterator.hasNext();){
|
|
411 |
Map.Entry<State,Set<State>> pair = (Map.Entry<State,Set<State>>)iterator.next();
|
|
412 |
isNotExcluded = true;
|
|
413 |
for (State state : clique) {
|
|
414 |
if (pair.getValue().contains(state)) isNotExcluded = false;
|
|
415 |
}
|
|
416 |
if (isNotExcluded){
|
|
417 |
clique.add(pair.getKey());
|
|
418 |
}
|
|
419 |
}
|
|
420 |
for (State state : clique) {
|
|
421 |
exclusions.remove(state);
|
|
422 |
}
|
|
423 |
return clique;
|
|
424 |
}
|
|
425 |
|
|
426 |
|
|
427 |
/**
|
|
428 |
* fills a map of the sets of taxa (key) presenting the different states (value) for the given feature.
|
|
429 |
*
|
|
430 |
* @param statesDone the list of states already done for this feature
|
|
431 |
* @param categoricalData the element from which the states are extracted
|
|
432 |
* @param feature the feature corresponding to the CategoricalData
|
|
433 |
* @param taxaCovered the base of taxa considered
|
|
434 |
* @return
|
|
435 |
*/
|
|
436 |
private Map<Set<TaxonDescription>,List<State>> determineCategoricalStates(List<State> statesDone, CategoricalData categoricalData, Feature feature, Set<TaxonDescription> taxaCovered){
|
|
437 |
Map<Set<TaxonDescription>,List<State>> childrenStatesMap = new HashMap<Set<TaxonDescription>,List<State>>();
|
|
438 |
|
|
439 |
List<StateData> stateDatas = categoricalData.getStates();
|
|
440 |
|
|
441 |
List<State> states = new ArrayList<State>();
|
|
442 |
for (StateData sd : stateDatas){
|
|
443 |
states.add(sd.getState());
|
|
444 |
}
|
|
445 |
|
|
446 |
for (State featureState : states){ // for each state
|
|
447 |
if(!statesDone.contains(featureState)){ // if the state hasn't already be considered
|
|
448 |
statesDone.add(featureState);
|
|
449 |
Set<TaxonDescription> newTaxaCovered = whichTaxa(feature,featureState,taxaCovered); //gets which taxa present this state
|
|
450 |
List<State> newStates = childrenStatesMap.get(newTaxaCovered);
|
|
451 |
if (newStates==null) { // if no states are associated to these taxa, create a new list
|
|
452 |
newStates = new ArrayList<State>();
|
|
453 |
childrenStatesMap.put(newTaxaCovered,newStates);
|
|
454 |
}
|
|
455 |
newStates.add(featureState); // then add the state to the list
|
|
456 |
}
|
|
457 |
}
|
|
458 |
return childrenStatesMap;
|
|
459 |
}
|
|
460 |
|
|
461 |
|
|
462 |
/**
|
|
463 |
* returns the list of taxa from previously covered taxa, which have the state featureState for the given feature
|
|
464 |
*
|
|
465 |
* @param feature
|
|
466 |
* @param featureState
|
|
467 |
* @param taxaCovered
|
|
468 |
* @return
|
|
469 |
*/
|
|
470 |
private Set<TaxonDescription> whichTaxa(Feature feature, State featureState, Set<TaxonDescription> taxaCovered){
|
|
471 |
Set<TaxonDescription> newCoveredTaxa = new HashSet<TaxonDescription>();
|
|
472 |
for (TaxonDescription td : taxaCovered){
|
|
473 |
Set<DescriptionElementBase> elements = td.getElements();
|
|
474 |
for (DescriptionElementBase deb : elements){
|
|
475 |
if (deb.isInstanceOf(CategoricalData.class)) {
|
|
476 |
if (deb.getFeature().equals(feature)) {
|
|
477 |
List<StateData> stateDatas = ((CategoricalData)deb).getStates();
|
|
478 |
for (StateData sd : stateDatas) {
|
|
479 |
if (sd.getState().equals(featureState)){
|
|
480 |
newCoveredTaxa.add(td);
|
|
481 |
}
|
|
482 |
}
|
|
483 |
}
|
|
484 |
}
|
|
485 |
}
|
|
486 |
}
|
|
487 |
return newCoveredTaxa;
|
|
488 |
}
|
|
489 |
|
|
490 |
|
|
491 |
/**
|
|
492 |
* This function returns the feature with the highest score. However, if several features have the same score
|
|
493 |
* the one wich leads to less options is chosen (this way, the key is easier to read).
|
|
494 |
*
|
|
495 |
* @param nTaxa
|
|
496 |
* @param scores
|
|
497 |
* @param taxaCovered
|
|
498 |
* @return
|
|
499 |
*/
|
|
500 |
private Feature lessStatesWinner(Map<Feature,Float> scores, Set<TaxonDescription> taxaCovered){
|
|
501 |
int nTaxa = taxaCovered.size();
|
|
502 |
if (nTaxa==1) return null;
|
|
503 |
float meanScore = defaultMeanScore(nTaxa);
|
|
504 |
float bestScore = nTaxa*nTaxa;
|
|
505 |
List<Feature> bestFeatures = new ArrayList<Feature>(); // if ever different features have the best score, they are put in this list
|
|
506 |
Feature bestFeature = null;
|
|
507 |
Iterator<Map.Entry<Feature,Float>> it = scores.entrySet().iterator();
|
|
508 |
float newScore;
|
|
509 |
while (it.hasNext()){
|
|
510 |
Map.Entry<Feature,Float> pair = (Map.Entry<Feature,Float>)it.next();
|
|
511 |
if (pair.getValue()!=null){
|
|
512 |
newScore = Math.abs((Float)pair.getValue()-meanScore);// the best score is the closest to the score (meanScore here)
|
|
513 |
// a feature would have if it divided the taxa in two equal parts
|
|
514 |
if (newScore < bestScore){
|
|
515 |
bestFeatures.clear();
|
|
516 |
bestFeatures.add((Feature)pair.getKey());
|
|
517 |
bestScore = newScore;
|
|
518 |
}else if (newScore==bestScore){
|
|
519 |
bestFeatures.add((Feature)pair.getKey());
|
|
520 |
}
|
|
521 |
}
|
|
522 |
}
|
|
523 |
if (bestFeatures.size()==1) { // if there is only one feature with the best score, pick it
|
|
524 |
return bestFeatures.get(0);
|
|
525 |
}
|
|
526 |
else { // else choose the one with less states
|
|
527 |
int lessStates=-1;
|
|
528 |
int numberOfDifferentStates=-1;
|
|
529 |
for (Feature feature : bestFeatures){
|
|
530 |
if (feature.isSupportsCategoricalData()){
|
|
531 |
Set<State> differentStates = new HashSet<State>();
|
|
532 |
for (TaxonDescription td : taxaCovered){
|
|
533 |
Set<DescriptionElementBase> elements = td.getElements();
|
|
534 |
for (DescriptionElementBase deb : elements){
|
|
535 |
if (deb.isInstanceOf(CategoricalData.class)) {
|
|
536 |
CategoricalData catdat = (CategoricalData)deb;
|
|
537 |
if (catdat.getFeature().equals(feature)) {
|
|
538 |
List<StateData> stateDatas = catdat.getStates();
|
|
539 |
for (StateData sd : stateDatas) {
|
|
540 |
differentStates.add(sd.getState());
|
|
541 |
}
|
|
542 |
}
|
|
543 |
}
|
|
544 |
}
|
|
545 |
}
|
|
546 |
numberOfDifferentStates=differentStates.size();
|
|
547 |
}else if (feature.isSupportsQuantitativeData()){
|
|
548 |
numberOfDifferentStates=2;
|
|
549 |
}
|
|
550 |
if (lessStates==-1 || numberOfDifferentStates<lessStates){
|
|
551 |
lessStates=numberOfDifferentStates;
|
|
552 |
bestFeature = feature;
|
|
553 |
}
|
|
554 |
}
|
|
555 |
return bestFeature;
|
|
556 |
}
|
|
557 |
}
|
|
558 |
|
|
559 |
|
|
560 |
/**
|
|
561 |
* This function calculates the mean score, i.e. the score a feature dividing the taxa in two equal parts
|
|
562 |
* would have.
|
|
563 |
*
|
|
564 |
* @param nTaxa
|
|
565 |
* @return
|
|
566 |
*/
|
|
567 |
private float defaultMeanScore(int nTaxa){
|
|
568 |
int i;
|
|
569 |
float score=0;
|
|
570 |
for (i=1;i<nTaxa;i++){
|
|
571 |
score = score + Math.round((float)(i+1/2));
|
|
572 |
}
|
|
573 |
return score;
|
|
574 |
}
|
|
575 |
|
|
576 |
|
|
577 |
/**
|
|
578 |
* This function fills the map of features (keys) with their respecting scores (values)
|
|
579 |
*
|
|
580 |
* @param featuresLeft
|
|
581 |
* @param coveredTaxa
|
|
582 |
* @param quantitativeFeaturesThresholds
|
|
583 |
* @return
|
|
584 |
*/
|
|
585 |
private Map<Feature,Float> featureScores(List<Feature> featuresLeft, Set<TaxonDescription> coveredTaxa, Map<Feature,Float> quantitativeFeaturesThresholds){
|
|
586 |
Map<Feature,Float> scoreMap = new HashMap<Feature,Float>();
|
|
587 |
for (Feature feature : featuresLeft){
|
|
588 |
if (feature.isSupportsCategoricalData()) {
|
|
589 |
scoreMap.put(feature, categoricalFeatureScore(feature,coveredTaxa));
|
|
590 |
}
|
|
591 |
if (feature.isSupportsQuantitativeData()){
|
|
592 |
scoreMap.put(feature, quantitativeFeatureScore(feature,coveredTaxa, quantitativeFeaturesThresholds));
|
|
593 |
}
|
|
594 |
}
|
|
595 |
return scoreMap;
|
|
596 |
}
|
|
597 |
|
|
598 |
/**
|
|
599 |
* Since Quantitative features do not have states, unlike Categorical ones, this function determines which taxa,
|
|
600 |
* for a given quantitative feature, present either a lower or higher value than a given threshold.
|
|
601 |
* It returns two Sets of TaxonDescription, one with the taxa under this threshold (taxaBefore) and another one
|
|
602 |
* with the taxa over (taxaAfter).
|
|
603 |
*
|
|
604 |
* @param threshold
|
|
605 |
* @param feature
|
|
606 |
* @param taxa
|
|
607 |
* @param unit
|
|
608 |
* @return
|
|
609 |
*/
|
|
610 |
private List<Set<TaxonDescription>> determineQuantitativeStates (Float threshold, Feature feature, Set<TaxonDescription> taxa, StringBuilder unit){
|
|
611 |
List<Set<TaxonDescription>> list = new ArrayList<Set<TaxonDescription>>();
|
|
612 |
Set<TaxonDescription> taxaBefore = new HashSet<TaxonDescription>();
|
|
613 |
Set<TaxonDescription> taxaAfter = new HashSet<TaxonDescription>();
|
|
614 |
list.add(taxaBefore);
|
|
615 |
list.add(taxaAfter);
|
|
616 |
for (TaxonDescription td : taxa){
|
|
617 |
Set<DescriptionElementBase> elements = td.getElements();
|
|
618 |
for (DescriptionElementBase deb : elements){
|
|
619 |
if (deb.getFeature().equals(feature)) {
|
|
620 |
if (deb.isInstanceOf(QuantitativeData.class)) {
|
|
621 |
QuantitativeData qd = (QuantitativeData)deb;
|
|
622 |
if (unit.toString().equals("") && qd.getUnit()!=null && qd.getUnit().getLabel()!=null){
|
|
623 |
unit.append(" " + qd.getUnit().getLabel());
|
|
624 |
}
|
|
625 |
Set<StatisticalMeasurementValue> values = qd.getStatisticalValues();
|
|
626 |
for (StatisticalMeasurementValue smv : values){
|
|
627 |
StatisticalMeasure type = smv.getType();
|
|
628 |
//TODO DONT FORGET sample size, MEAN etc
|
|
629 |
if (type.equals(StatisticalMeasure.MAX()) || type.equals(StatisticalMeasure.TYPICAL_UPPER_BOUNDARY()) || type.equals(StatisticalMeasure.AVERAGE())) {
|
|
630 |
if (smv.getValue()>threshold){
|
|
631 |
taxaAfter.add(td);
|
|
632 |
}
|
|
633 |
}
|
|
634 |
if (type.equals(StatisticalMeasure.MIN()) || type.equals(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY()) || type.equals(StatisticalMeasure.AVERAGE())) {
|
|
635 |
if (smv.getValue()<=threshold){
|
|
636 |
taxaBefore.add(td);
|
|
637 |
}
|
|
638 |
}
|
|
639 |
}
|
|
640 |
}
|
|
641 |
}
|
|
642 |
}
|
|
643 |
}
|
|
644 |
return list;
|
|
645 |
}
|
|
646 |
|
|
647 |
|
|
648 |
|
|
649 |
/**
|
|
650 |
* This function returns the score of a quantitative feature.
|
|
651 |
*
|
|
652 |
* @param feature
|
|
653 |
* @param coveredTaxa
|
|
654 |
* @param quantitativeFeaturesThresholds
|
|
655 |
* @return
|
|
656 |
*/
|
|
657 |
private float quantitativeFeatureScore(Feature feature, Set<TaxonDescription> coveredTaxa, Map<Feature,Float> quantitativeFeaturesThresholds){
|
|
658 |
List<Float> allValues = new ArrayList<Float>();
|
|
659 |
boolean lowerboundarypresent;
|
|
660 |
boolean upperboundarypresent;
|
|
661 |
float lowerboundary=0;
|
|
662 |
float upperboundary=0;
|
|
663 |
for (TaxonDescription td : coveredTaxa){
|
|
664 |
Set<DescriptionElementBase> elements = td.getElements();
|
|
665 |
for (DescriptionElementBase deb : elements){
|
|
666 |
if (deb.getFeature().equals(feature)) {
|
|
667 |
if (deb.isInstanceOf(QuantitativeData.class)) {
|
|
668 |
QuantitativeData qd = (QuantitativeData)deb;
|
|
669 |
Set<StatisticalMeasurementValue> values = qd.getStatisticalValues();
|
|
670 |
lowerboundarypresent = false;
|
|
671 |
upperboundarypresent = false;
|
|
672 |
for (StatisticalMeasurementValue smv : values){
|
|
673 |
StatisticalMeasure type = smv.getType();
|
|
674 |
// DONT FORGET sample size, MEAN etc
|
|
675 |
if (type.equals(StatisticalMeasure.MAX())) {
|
|
676 |
upperboundary = smv.getValue();
|
|
677 |
upperboundarypresent=true;
|
|
678 |
}
|
|
679 |
if (type.equals(StatisticalMeasure.MIN())) {
|
|
680 |
lowerboundary = smv.getValue();
|
|
681 |
lowerboundarypresent=true;
|
|
682 |
}
|
|
683 |
if (type.equals(StatisticalMeasure.TYPICAL_UPPER_BOUNDARY()) && upperboundarypresent==false) {
|
|
684 |
upperboundary = smv.getValue();
|
|
685 |
upperboundarypresent=true;
|
|
686 |
}
|
|
687 |
if (type.equals(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY()) && lowerboundarypresent==false) {
|
|
688 |
lowerboundary = smv.getValue();
|
|
689 |
lowerboundarypresent=true;
|
|
690 |
}
|
|
691 |
if (type.equals(StatisticalMeasure.AVERAGE()) && upperboundarypresent==false && lowerboundarypresent==false) {
|
|
692 |
lowerboundary = smv.getValue();
|
|
693 |
upperboundary = lowerboundary;
|
|
694 |
lowerboundarypresent=true;
|
|
695 |
upperboundarypresent=true;
|
|
696 |
}
|
|
697 |
}
|
|
698 |
if (lowerboundarypresent && upperboundarypresent) {
|
|
699 |
allValues.add(lowerboundary);
|
|
700 |
allValues.add(upperboundary);
|
|
701 |
}
|
|
702 |
}
|
|
703 |
}
|
|
704 |
}
|
|
705 |
}
|
|
706 |
int i,j;
|
|
707 |
float threshold=0;
|
|
708 |
float bestThreshold=0;
|
|
709 |
int difference=allValues.size();
|
|
710 |
int differenceMin = difference;
|
|
711 |
int taxaBefore=0;
|
|
712 |
int taxaAfter=0;
|
|
713 |
for (i=0;i<allValues.size()/2;i++) {
|
|
714 |
threshold = allValues.get(i*2+1);
|
|
715 |
taxaBefore=0;
|
|
716 |
taxaAfter=0;
|
|
717 |
for (j=0;j<allValues.size()/2;j++) {
|
|
718 |
if (allValues.get(j*2+1)<=threshold){
|
|
719 |
taxaBefore++;
|
|
720 |
}
|
|
721 |
if (allValues.get(j*2)>threshold){
|
|
722 |
taxaAfter++;
|
|
723 |
}
|
|
724 |
}
|
|
725 |
difference = Math.abs(taxaBefore-taxaAfter);
|
|
726 |
if (difference<differenceMin){
|
|
727 |
differenceMin=difference;
|
|
728 |
bestThreshold = threshold;
|
|
729 |
}
|
|
730 |
}
|
|
731 |
quantitativeFeaturesThresholds.put(feature, bestThreshold);
|
|
732 |
int defaultQuantitativeScore=0;
|
|
733 |
for (i=0;i<taxaBefore;i++) {
|
|
734 |
defaultQuantitativeScore += taxaAfter - i;
|
|
735 |
}
|
|
736 |
return (float)(defaultQuantitativeScore);
|
|
737 |
}
|
|
738 |
|
|
739 |
|
|
740 |
|
|
741 |
/**
|
|
742 |
* This function returns the score of a categorical feature.
|
|
743 |
*
|
|
744 |
* @param feature
|
|
745 |
* @param coveredTaxa
|
|
746 |
* @return
|
|
747 |
*/
|
|
748 |
private float categoricalFeatureScore(Feature feature, Set<TaxonDescription> coveredTaxa){
|
|
749 |
int i,j;
|
|
750 |
float score =0;
|
|
751 |
float power=0;
|
|
752 |
TaxonDescription[] coveredTaxaArray = coveredTaxa.toArray(new TaxonDescription[coveredTaxa.size()]); // I did not figure a better way to do this
|
|
753 |
for (i=0 ; i<coveredTaxaArray.length; i++){
|
|
754 |
Set<DescriptionElementBase> elements1 = coveredTaxaArray[i].getElements();
|
|
755 |
DescriptionElementBase deb1 = null;
|
|
756 |
for (DescriptionElementBase deb : elements1){
|
|
757 |
if (deb.getFeature().equals(feature)){
|
|
758 |
deb1 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature
|
|
759 |
}
|
|
760 |
}
|
|
761 |
for (j=i+1 ; j< coveredTaxaArray.length ; j++){
|
|
762 |
Set<DescriptionElementBase> elements2 = coveredTaxaArray[j].getElements();
|
|
763 |
DescriptionElementBase deb2 = null;
|
|
764 |
for (DescriptionElementBase deb : elements2){
|
|
765 |
if (deb.getFeature().equals(feature)){
|
|
766 |
deb2 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature
|
|
767 |
}
|
|
768 |
}
|
|
769 |
power = defaultPower(deb1,deb2);
|
|
770 |
score = score + power;
|
|
771 |
}
|
|
772 |
}
|
|
773 |
return score;
|
|
774 |
}
|
|
775 |
|
|
776 |
|
|
777 |
/**
|
|
778 |
* This recursive function fills the maps of dependencies by reading the tree containing the dependencies.
|
|
779 |
*
|
|
780 |
* @param node
|
|
781 |
*/
|
|
782 |
private void checkDependencies(FeatureNode node){
|
|
783 |
if (node.getOnlyApplicableIf()!=null){
|
|
784 |
Set<State> addToOAI = node.getOnlyApplicableIf();
|
|
785 |
for (State state : addToOAI){
|
|
786 |
if (oAIdependencies.containsKey(state)) oAIdependencies.put(state, new HashSet<Feature>());
|
|
787 |
oAIdependencies.get(state).add(node.getFeature());
|
|
788 |
}
|
|
789 |
}
|
|
790 |
if (node.getInapplicableIf()!=null){
|
|
791 |
Set<State> addToiI = node.getInapplicableIf();
|
|
792 |
for (State state : addToiI){
|
|
793 |
if (iIdependencies.containsKey(state)) iIdependencies.put(state, new HashSet<Feature>());
|
|
794 |
iIdependencies.get(state).add(node.getFeature());
|
|
795 |
}
|
|
796 |
}
|
|
797 |
if (node.getChildren()!=null) {
|
|
798 |
for (FeatureNode fn : node.getChildren()){
|
|
799 |
checkDependencies(fn);
|
|
800 |
}
|
|
801 |
}
|
|
802 |
}
|
|
803 |
|
|
804 |
|
|
805 |
|
|
806 |
/**
|
|
807 |
* This function fills the exclusions map.
|
|
808 |
*
|
|
809 |
* @param feature
|
|
810 |
* @param coveredTaxa
|
|
811 |
* @param exclusions
|
|
812 |
* @return
|
|
813 |
*/
|
|
814 |
private float featureScoreAndMerge(Feature feature, Set<TaxonDescription> coveredTaxa, Map<State,Set<State>> exclusions){
|
|
815 |
int i,j;
|
|
816 |
float score =0;
|
|
817 |
float power=0;
|
|
818 |
TaxonDescription[] coveredTaxaArray = coveredTaxa.toArray(new TaxonDescription[coveredTaxa.size()]); // I did not figure a better way to do this
|
|
819 |
for (i=0 ; i<coveredTaxaArray.length; i++){
|
|
820 |
Set<DescriptionElementBase> elements1 = coveredTaxaArray[i].getElements();
|
|
821 |
DescriptionElementBase deb1 = null;
|
|
822 |
for (DescriptionElementBase deb : elements1){
|
|
823 |
if (deb.getFeature().equals(feature)){
|
|
824 |
deb1 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature
|
|
825 |
}
|
|
826 |
}
|
|
827 |
for (j=i+1 ; j< coveredTaxaArray.length ; j++){
|
|
828 |
Set<DescriptionElementBase> elements2 = coveredTaxaArray[j].getElements();
|
|
829 |
DescriptionElementBase deb2 = null;
|
|
830 |
for (DescriptionElementBase deb : elements2){
|
|
831 |
if (deb.getFeature().equals(feature)){
|
|
832 |
deb2 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature
|
|
833 |
}
|
|
834 |
}
|
|
835 |
power = defaultPower(deb1,deb2);
|
|
836 |
score = score + power;
|
|
837 |
if (power>0) // if there is no state in common between deb1 and deb2
|
|
838 |
{
|
|
839 |
CategoricalData cat1 = (CategoricalData)deb1;
|
|
840 |
CategoricalData cat2 = (CategoricalData)deb2;
|
|
841 |
for (StateData statedata1 : cat1.getStates()){
|
|
842 |
State state1 = statedata1.getState();
|
|
843 |
if (!exclusions.containsKey(state1)){
|
|
844 |
exclusions.put(state1, new HashSet<State>());
|
|
845 |
}
|
|
846 |
for (StateData statedata2 : cat2.getStates()){
|
|
847 |
State state2 = statedata2.getState();
|
|
848 |
if (!exclusions.containsKey(state2)){
|
|
849 |
exclusions.put(state2, new HashSet<State>());
|
|
850 |
}
|
|
851 |
exclusions.get(state1).add(state2);
|
|
852 |
exclusions.get(state2).add(state1);
|
|
853 |
}
|
|
854 |
}
|
|
855 |
}
|
|
856 |
}
|
|
857 |
}
|
|
858 |
return score;
|
|
859 |
}
|
|
860 |
|
|
861 |
|
|
862 |
|
|
863 |
/**
|
|
864 |
* Returns the score between two DescriptionElementBase. If one of them is null, returns -1.
|
|
865 |
* If they are not of the same type (Categorical) returns 0.
|
|
866 |
*
|
|
867 |
* @param deb1
|
|
868 |
* @param deb2
|
|
869 |
* @return
|
|
870 |
*/
|
|
871 |
private float defaultPower(DescriptionElementBase deb1, DescriptionElementBase deb2){
|
|
872 |
if (deb1==null || deb2==null) {
|
|
873 |
return -1; //what if the two taxa don't have this feature in common ?
|
|
874 |
}
|
|
875 |
if ((deb1.isInstanceOf(CategoricalData.class))&&(deb2.isInstanceOf(CategoricalData.class))) {
|
|
876 |
return defaultCategoricalPower((CategoricalData)deb1, (CategoricalData)deb2);
|
|
877 |
}
|
|
878 |
else return 0;
|
|
879 |
}
|
|
880 |
|
|
881 |
/**
|
|
882 |
* Returns the score of a categorical feature.
|
|
883 |
*
|
|
884 |
* @param deb1
|
|
885 |
* @param deb2
|
|
886 |
* @return
|
|
887 |
*/
|
|
888 |
private float defaultCategoricalPower(CategoricalData deb1, CategoricalData deb2){
|
|
889 |
List<StateData> states1 = deb1.getStates();
|
|
890 |
List<StateData> states2 = deb2.getStates();
|
|
891 |
boolean bool = false;
|
|
892 |
Iterator<StateData> stateData1Iterator = states1.iterator() ;
|
|
893 |
// while (!bool && stateData1Iterator.hasNext()) {
|
|
894 |
// Iterator<StateData> stateData2Iterator = states2.iterator() ;
|
|
895 |
// StateData stateData1 = stateData1Iterator.next();
|
|
896 |
// while (!bool && stateData2Iterator.hasNext()) {
|
|
897 |
// bool = stateData1.getState().equals(stateData2Iterator.next().getState()); // checks if the states are the same
|
|
898 |
// }
|
|
899 |
// }
|
|
900 |
// one point each time two taxa can be discriminated for a given feature
|
|
901 |
|
|
902 |
boolean checkFeature = false;
|
|
903 |
|
|
904 |
if (!featureDependencies.containsKey(deb1.getFeature())){
|
|
905 |
featureDependencies.put(deb1.getFeature(), new HashSet<Feature>());
|
|
906 |
checkFeature = true;
|
|
907 |
}
|
|
908 |
|
|
909 |
while (stateData1Iterator.hasNext()) {
|
|
910 |
Iterator<StateData> stateData2Iterator = states2.iterator() ;
|
|
911 |
StateData stateData1 = stateData1Iterator.next();
|
|
912 |
State state1 = stateData1.getState();
|
|
913 |
if (checkFeature){
|
|
914 |
if (iIdependencies.get(state1)!=null) {
|
|
915 |
featureDependencies.get(deb1.getFeature()).addAll(iIdependencies.get(state1));
|
|
916 |
}
|
|
917 |
if (oAIdependencies.get(state1)!=null) {
|
|
918 |
featureDependencies.get(deb1.getFeature()).addAll(oAIdependencies.get(state1));
|
|
919 |
}
|
|
920 |
}
|
|
921 |
while (stateData2Iterator.hasNext()) {
|
|
922 |
State state2 = stateData1.getState();
|
|
923 |
bool = bool || state1.equals(state2); // checks if the states are the same
|
|
924 |
}
|
|
925 |
}
|
|
926 |
|
|
927 |
|
|
928 |
if (bool) return 0;
|
|
929 |
else return 1;
|
|
930 |
}
|
|
931 |
|
|
932 |
// old code used when PolytomousKey extended FeatureTree
|
|
933 |
// private void printTree(List<PolytomousKeyNode> fnodes, String spaces){
|
|
934 |
// if (!fnodes.isEmpty()){
|
|
935 |
// level++;
|
|
936 |
// int levelcopy = level;
|
|
937 |
// int j=1;
|
|
938 |
// String delimiter;
|
|
939 |
// String equals = " = ";
|
|
940 |
// String quantitative = "";
|
|
941 |
// String newspaces = spaces.concat("\t");
|
|
942 |
// for (PolytomousKeyNode fnode : fnodes){
|
|
943 |
// if (fnode.getFeature()!=null) {
|
|
944 |
// String state = null;
|
|
945 |
// if (fnode.getQuestion(Language.DEFAULT())!=null) state = fnode.getQuestion(Language.DEFAULT()).getLabel();
|
|
946 |
// if (fnode.getFeature().isSupportsQuantitativeData()) delimiter = quantitative;
|
|
947 |
// else delimiter = equals;
|
|
948 |
// System.out.println(newspaces + levelcopy + " : " + j + " " + fnode.getFeature().getLabel() + delimiter + state);
|
|
949 |
// j++;
|
|
950 |
// }
|
|
951 |
// else { // TODO never read ?
|
|
952 |
// if (fnode.getQuestion(Language.DEFAULT())!=null) System.out.println(newspaces + "-> " + fnode.getQuestion(Language.DEFAULT()).getLabel());
|
|
953 |
// }
|
|
954 |
// printTree(fnode.getChildren(),newspaces);
|
|
955 |
// }
|
|
956 |
// }
|
|
957 |
// }
|
|
958 |
|
|
959 |
}
|
|
960 |
|
move back PolytomousKeyGenerator and move test class from service to model