correcting utf8 chars
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / IdentificationKeyGenerator.java
1 package eu.etaxonomy.cdm.api.service;
2
3 import java.util.ArrayList;
4 import java.util.HashMap;
5 import java.util.HashSet;
6 import java.util.Iterator;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Set;
10
11 import eu.etaxonomy.cdm.model.common.Language;
12 import eu.etaxonomy.cdm.model.common.Representation;
13 import eu.etaxonomy.cdm.model.description.CategoricalData;
14 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
15 import eu.etaxonomy.cdm.model.description.Feature;
16 import eu.etaxonomy.cdm.model.description.FeatureNode;
17 import eu.etaxonomy.cdm.model.description.PolytomousKey;
18 import eu.etaxonomy.cdm.model.description.State;
19 import eu.etaxonomy.cdm.model.description.StateData;
20 import eu.etaxonomy.cdm.model.description.TaxonDescription;
21
22 public class IdentificationKeyGenerator {
23
24 static int level=-1;
25 private PolytomousKey polyto;
26 private List<Feature> features;
27 private Set<TaxonDescription> taxa;
28
29 public void setFeatures(List<Feature> featuresList){
30 this.features = featuresList;
31 }
32
33 public void setTaxa(Set<TaxonDescription> taxaSet){
34 this.taxa = taxaSet;
35 }
36
37
38 public void makeandprint(){
39 Boucle();
40 List<FeatureNode> rootlist = new ArrayList<FeatureNode>();
41 rootlist.add(polyto.getRoot());
42 String spaces = new String();
43 printTree2(rootlist,spaces);
44 }
45
46
47 private void Boucle(){
48 polyto = PolytomousKey.NewInstance();
49 FeatureNode root = polyto.getRoot();
50 buildBranches(root,features,taxa);
51 System.out.println();
52
53 }
54
55
56 private void buildBranches(FeatureNode father, List<Feature> featuresLeft, Set<TaxonDescription> taxaCovered){
57 //System.out.println(featuresLeft);
58 //System.out.println("FL size : " + featuresLeft.size() + " and taxa size : " + taxaCovered.size());
59 //System.out.println(taxaCovered);
60 List<DescriptionElementBase> debsDone = new ArrayList<DescriptionElementBase>();
61 List<State> statesDone = new ArrayList<State>(); // ATTENTION ONLY FOR CAT
62 //Map<Set<TaxonDescription>,DescriptionElementBase> floor = new HashMap<Set<TaxonDescription>,DescriptionElementBase>(); // local variable never read
63
64 Map<Feature,Float> scoreMap = FeatureScores(featuresLeft, taxaCovered);
65 //System.out.println(scoreMap);
66 Feature winnerFeature = DefaultWinner(taxaCovered.size(), scoreMap);
67 //System.out.println(winnerFeature.getLabel());
68 featuresLeft.remove(winnerFeature);
69 boolean childrenExist = false;
70
71 for (TaxonDescription td : taxaCovered){ // look for the different states
72 DescriptionElementBase debConcerned = null;
73 for (DescriptionElementBase deb : td.getElements()) {
74 if (deb.getFeature().equals(winnerFeature)) debConcerned = deb;
75 }
76 //if deb!= null
77
78 Map<Set<TaxonDescription>,List<State>> taxonStatesMap = runOverStates(statesDone,debConcerned,winnerFeature,taxaCovered); // /!\ ATTENTION not working yet for quantitative data
79 if (taxonStatesMap!=null && !taxonStatesMap.isEmpty()) {
80 for (Map.Entry<Set<TaxonDescription>,List<State>> e : taxonStatesMap.entrySet()){
81 Set<TaxonDescription> newTaxaCovered = e.getKey();
82 List<State> list = e.getValue(); // for the tree
83 if (!(newTaxaCovered.size()==taxaCovered.size())){// if the remaining taxa are still discriminated, continue // >1 USEFUL ?
84 //System.out.println(taxaCovered.size());
85 childrenExist = true;
86 FeatureNode son = FeatureNode.NewInstance();
87 StringBuilder questionLabel = new StringBuilder();
88 for (State st : list) questionLabel.append(st.getLabel());
89 Representation question = new Representation(null, questionLabel.toString(),null, Language.DEFAULT());
90 son.addQuestion(question);
91 son.setFeature(winnerFeature);
92 father.addChild(son);
93 //List<Feature> newFeaturesLeft = new LinkedList<Feature>(featuresLeft); // replaced by featuresLeft.remove(winnerFeature)
94 //newFeaturesLeft.remove(winnerFeature);
95 buildBranches(son,featuresLeft, newTaxaCovered);
96 }
97 else {
98 // FeatureNode son = FeatureNode.NewInstance();
99 // Representation question = new Representation(null, taxaCovered.toString(),null, Language.DEFAULT());
100 // son.addQuestion(question);
101 // father.addChild(son);// a leaf is reached
102 }
103 }
104 }
105 }
106 if (!childrenExist){
107 Representation question = father.getQuestion(Language.DEFAULT());
108 question.setLabel(question.getLabel() + " --> " + taxaCovered.toString());
109 }
110 featuresLeft.add(winnerFeature);
111 //loop over the floor, if new = old taxa -> leaf ; else -> node + loop
112
113 }
114
115 private Map<Set<TaxonDescription>,List<State>> runOverStates(List<State> statesDone, DescriptionElementBase deb, Feature winnerFeature, Set<TaxonDescription> taxaCovered){
116 if (deb==null){
117 return null;
118 }
119 if (deb.isInstanceOf(CategoricalData.class)) {
120 return rOSCategoricalDefault(statesDone, (CategoricalData)deb, winnerFeature, taxaCovered);
121 }
122 else return null;
123 }
124
125 private Map<Set<TaxonDescription>,List<State>> rOSCategoricalDefault(List<State> statesDone, CategoricalData categoricalData, Feature winnerFeature, Set<TaxonDescription> taxaCovered){
126 Map<Set<TaxonDescription>,List<State>> childrenStatesMap = new HashMap<Set<TaxonDescription>,List<State>>();
127
128 List<StateData> stateDatas = categoricalData.getStates();
129
130 List<State> states = new ArrayList<State>(); // In this function STATES ONLY ARE CONSIDERED, MODIFIERS ARE NOT
131 for (StateData sd : stateDatas){
132 states.add(sd.getState());
133 }
134 for (StateData sd : stateDatas){
135 states.add(sd.getState());
136 }
137
138 for (State featureState : states){
139 if(!statesDone.contains(featureState)){
140 statesDone.add(featureState);
141
142 StateData sd = new StateData();
143 sd.setState(featureState);
144 //((CategoricalData)debsDone.get(0)).addState(sd);// A VOIR
145
146 Set<TaxonDescription> newTaxaCovered = whichTaxa(winnerFeature,featureState,taxaCovered);
147 List<State> newStates = childrenStatesMap.get(newTaxaCovered);
148 if (newStates==null) {
149 newStates = new ArrayList<State>();
150
151 childrenStatesMap.put(newTaxaCovered,newStates);
152 }
153 newStates.add(featureState);
154 }
155 }
156 return childrenStatesMap;
157 }
158
159
160
161 // returns the list of taxa from previously covered taxa, which have the state featureState for the feature feature
162 private Set<TaxonDescription> whichTaxa(Feature feature, State featureState, Set<TaxonDescription> taxaCovered){
163 Set<TaxonDescription> newCoveredTaxa = new HashSet<TaxonDescription>();
164 for (TaxonDescription td : taxaCovered){
165 Set<DescriptionElementBase> elements = td.getElements();
166 for (DescriptionElementBase deb : elements){
167 if (deb.isInstanceOf(CategoricalData.class)) {
168 if (deb.getFeature().equals(feature)) {
169 List<StateData> stateDatas = ((CategoricalData)deb).getStates();
170 for (StateData sd : stateDatas) {
171 if (sd.getState().equals(featureState))
172 newCoveredTaxa.add(td);
173 }
174 }
175 }
176 }
177 }
178 return newCoveredTaxa;
179 }
180
181 //change names
182 private Feature DefaultWinner(int nTaxons, Map<Feature,Float> scores){
183 float meanScore = DefaultMeanScore(nTaxons);
184 float bestScore = nTaxons*nTaxons;
185 Feature feature = null;
186 Iterator it = scores.entrySet().iterator();
187 float newScore;
188 while (it.hasNext()){
189 Map.Entry<Feature,Float> pair = (Map.Entry)it.next();
190 if (pair.getValue()!=null){
191 newScore = Math.abs((Float)pair.getValue()-meanScore);
192 if (newScore < bestScore){
193 feature = (Feature)pair.getKey();
194 bestScore = newScore;
195 }
196 }
197 }
198 return feature;
199 }
200
201 // rutiliser et vrif si rien de trop <- FIXME please do not comment in french or at least use proper file encoding
202 private float DefaultMeanScore(int nTaxons){
203 int i;
204 float score=0;
205 for (i=1;i<nTaxons;i++){
206 score = score + Math.round((float)(i+1/2));
207 }
208 return score;
209 }
210
211 private Map<Feature,Float> FeatureScores(List<Feature> featuresLeft, Set<TaxonDescription> coveredTaxa){
212 Map<Feature,Float> scoreMap = new HashMap<Feature,Float>();
213 for (Feature feature : featuresLeft){
214 scoreMap.put(feature, FeatureScore(feature,coveredTaxa));
215 }
216 return scoreMap;
217 }
218
219 private float FeatureScore(Feature featureIndex, Set<TaxonDescription> coveredTaxa){
220 int i,j;
221 float score =0;
222 TaxonDescription[] coveredTaxaArray = coveredTaxa.toArray(new TaxonDescription[coveredTaxa.size()]); // I did not figure a better way to do this
223 for (i=0 ; i<coveredTaxaArray.length; i++){
224 Set<DescriptionElementBase> elements1 = coveredTaxaArray[i].getElements();
225 DescriptionElementBase deb1 = null;
226 for (DescriptionElementBase deb : elements1){
227 if (deb.getFeature().equals(featureIndex)) deb1 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature
228 }
229 for (j=i+1 ; j< coveredTaxaArray.length ; j++){
230 Set<DescriptionElementBase> elements2 = coveredTaxaArray[j].getElements();
231 DescriptionElementBase deb2 = null;
232 for (DescriptionElementBase deb : elements2){
233 if (deb.getFeature().equals(featureIndex)) deb2 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature
234 }
235 score = score + DefaultPower(deb1,deb2);
236 }
237 }
238 return score;
239 }
240
241 private float DefaultPower(DescriptionElementBase deb1, DescriptionElementBase deb2){
242 if (deb1==null || deb2==null) {
243 return -1; //what if the two taxa don't have this feature in common ?
244 }
245 if ((deb1.isInstanceOf(CategoricalData.class))&&(deb2.isInstanceOf(CategoricalData.class))) {
246 return DefaultCategoricalPower((CategoricalData)deb1, (CategoricalData)deb2);
247 }
248 else return 0;
249 }
250
251 private float DefaultCategoricalPower(CategoricalData deb1, CategoricalData deb2){
252 List<StateData> states1 = deb1.getStates();
253 List<StateData> states2 = deb2.getStates();
254 boolean bool = false;
255 Iterator<StateData> stateData1Iterator = states1.iterator() ;
256 while (!bool && stateData1Iterator.hasNext()) {
257 Iterator<StateData> stateData2Iterator = states2.iterator() ;
258 StateData stateData1 = stateData1Iterator.next();
259 //bool = states2.contains(strIterator.next());
260 while (!bool && stateData2Iterator.hasNext()) {
261 bool = stateData1.getState().equals(stateData2Iterator.next().getState()); // checks if the states are the same
262 }
263 // modifiers not taken into account for this default power
264 }
265 // one point each time two taxa have at least a state in common for a given feature
266 if (bool) return 0;
267 else return 1;
268 }
269
270 private void printTree(List<FeatureNode> fnodes){
271 Feature featureSpace = Feature.NewInstance(null, ";", null);
272 FeatureNode featureNodeSpace = FeatureNode.NewInstance(featureSpace);
273 List<FeatureNode> children = new ArrayList<FeatureNode>();
274 for (FeatureNode fnode : fnodes){
275 if (fnode.getFeature()!=null) {
276 //System.out.print(fnode.getFeature().getLabel() + " ");
277 }
278 List<FeatureNode> childrenbis = fnode.getChildren();
279 for (FeatureNode fnodebis : childrenbis){
280 children.add(fnodebis);
281 }
282 if (children.size()>0) {children.add(featureNodeSpace);}
283 }
284 //System.out.println("\n-----");
285 if (children.size()>0){
286 printTree(children);
287 }
288 }
289
290 private void printTree2(List<FeatureNode> fnodes, String spaces){
291 if (!fnodes.isEmpty()){
292 level++;
293 int levelcopy = level;
294 int j=1;
295 String newspaces = spaces.concat("\t");
296 for (FeatureNode fnode : fnodes){
297 if (fnode.getFeature()!=null) {
298 String state = null;
299 if (fnode.getQuestion(Language.DEFAULT())!=null) state = fnode.getQuestion(Language.DEFAULT()).getLabel();
300 System.out.println(newspaces + levelcopy + " : " + j + " " + fnode.getFeature().getLabel() + " = " + state);
301 j++;
302 }
303 else {
304 if (fnode.getQuestion(Language.DEFAULT())!=null) System.out.println(newspaces + "-> " + fnode.getQuestion(Language.DEFAULT()).getLabel());
305 }
306 printTree2(fnode.getChildren(),newspaces);
307 }
308 }
309 }
310
311 }