fix characters problem in KeyImport
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / markup / FeatureSorter.java
1 /**
2 *
3 */
4 package eu.etaxonomy.cdm.io.markup;
5
6 import java.util.ArrayList;
7 import java.util.Collection;
8 import java.util.HashMap;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.ListIterator;
13 import java.util.Map;
14 import java.util.Set;
15 import java.util.UUID;
16
17 /**
18 * This class is supposed to find the best sorting order for features (descriptive and other).
19 * @author a.mueller
20 *
21 */
22 public class FeatureSorter {
23
24
25 class FeatureStatistic{
26
27 private UUID uuid;
28 boolean isAlwaysHighest = true;
29 private int before = 0; //number of features before this feature
30 private int after = 0; //number of features after this feature
31 private int n = 0; //number of occurrences of this feature
32
33 public FeatureStatistic(UUID uuid) {
34 this.uuid = uuid;
35 }
36 public void addValue(int index, int total) {
37 if (index != 0){
38 isAlwaysHighest = false;
39 }
40 n++;
41 before += index;
42 after += (total - index);
43 }
44 @Override
45 public String toString(){
46 return uuid != null? uuid.toString(): super.toString();
47 }
48
49 }
50
51
52 /**
53 * Compute the order of features.
54 * @param orderLists
55 * @return
56 */
57 public List<UUID> getSortOrder(Map<String,List<FeatureSorterInfo>> listMap){
58 List<UUID> result = new ArrayList<UUID>();
59 //compute highest feature until all lists are empty
60 removeEmptyLists(listMap);
61 while (! listMap.isEmpty()){
62 Map<UUID,FeatureStatistic> statisticMap = computeStatistic(listMap);
63 FeatureStatistic best = findBest(statisticMap);
64 result.add(best.uuid);
65 removeFromLists(listMap, best.uuid);
66 }
67 return result;
68 }
69
70
71 private void removeEmptyLists(Map<String, List<FeatureSorterInfo>> listMap) {
72 Set<String> keysToRemove = new HashSet<String>();
73 for(String key : listMap.keySet()){
74 List<FeatureSorterInfo> list = listMap.get(key);
75 if (list.isEmpty()){
76 keysToRemove.add(key);
77 }
78 }
79 for (String key : keysToRemove){
80 listMap.remove(key);
81 }
82
83 }
84
85
86 /**
87 * Removes all entries for given uuid from orderLists and removes empty lists from map.
88 * @param orderLists
89 * @param uuid
90 */
91 private void removeFromLists(Map<String, List<FeatureSorterInfo>> orderLists, UUID uuid) {
92
93 Set<String> keySet = orderLists.keySet();
94 Iterator<String> keySetIterator = keySet.iterator();
95 while (keySetIterator.hasNext()){
96 String key = keySetIterator.next();
97 List<FeatureSorterInfo> list = orderLists.get(key);
98 Iterator<FeatureSorterInfo> it = list.listIterator();
99 while (it.hasNext()){
100 FeatureSorterInfo info = it.next();
101 if (info.getUuid().equals(uuid)){
102 it.remove();
103 if (list.isEmpty()){
104 keySetIterator.remove();
105 }
106 }
107 }
108 }
109 }
110
111
112 private FeatureStatistic findBest(Map<UUID, FeatureStatistic> statisticMap) {
113 FeatureStatistic result;
114 Set<FeatureStatistic> highest = getOnlyHighestFeatures(statisticMap);
115 if (highest.size() == 1){
116 result = highest.iterator().next();
117 }else if (highest.size() > 1){
118 result = findBestOfHighest(highest);
119 }else{ //<1
120 result = findBestOfNonHighest(statisticMap);
121 }
122 return result;
123 }
124
125
126 /**
127 * If multiple features do all have no higher feature in any list, this method
128 * can be called to use an alternative criteria to find the "highest" feature.
129 * @param highest
130 * @return
131 */
132 private FeatureStatistic findBestOfHighest(Set<FeatureStatistic> highest) {
133 //current implementation: find the one with the highest "after" value
134 FeatureStatistic result = highest.iterator().next();
135 int bestAfter = result.after;
136 for (FeatureStatistic statistic : highest){
137 if (statistic.after > bestAfter){
138 result = statistic;
139 bestAfter = statistic.after;
140 }
141 }
142 return result;
143 }
144
145
146 /**
147 * If no feature is always highest this method can be called to use an alternative criteria
148 * to find the "highest" feature.
149 *
150 * @param statisticMap
151 * @return
152 */
153 private FeatureStatistic findBestOfNonHighest(Map<UUID, FeatureStatistic> statisticMap) {
154 //current implementation: find the one with the lowest "before" value
155 //TODO better use before/n ??
156 Collection<FeatureStatistic> statistics = statisticMap.values();
157 FeatureStatistic result = statistics.iterator().next();
158 int bestBefore = result.before;
159 for (FeatureStatistic statistic : statistics){
160 if (statistic.before < bestBefore){
161 result = statistic;
162 bestBefore = statistic.before;
163 }
164 }
165 return result;
166 }
167
168
169 private Set<FeatureStatistic> getOnlyHighestFeatures(Map<UUID, FeatureStatistic> statisticMap) {
170 Set<FeatureStatistic> result = new HashSet<FeatureStatistic>();
171 for (FeatureStatistic statistic : statisticMap.values()){
172 if (statistic.isAlwaysHighest){
173 result.add(statistic);
174 }
175 }
176 return result;
177 }
178
179
180 private Map<UUID, FeatureStatistic> computeStatistic(Map<String,List<FeatureSorterInfo>> orderLists) {
181 Map<UUID, FeatureStatistic> result = new HashMap<UUID, FeatureStatistic>();
182 for (String key : orderLists.keySet()){
183 List<FeatureSorterInfo> list = orderLists.get(key);
184 int n = list.size();
185 for (FeatureSorterInfo info : list){
186 FeatureStatistic statistic = getFeatureStatistic(info, result);
187 int index = list.indexOf(info);
188 statistic.addValue(index,n);
189 }
190 }
191 return result;
192 }
193
194
195 private FeatureStatistic getFeatureStatistic(FeatureSorterInfo info, Map<UUID, FeatureStatistic> statisticMap) {
196 UUID uuid = info.getUuid();
197 FeatureStatistic result = statisticMap.get(uuid);
198 if (result == null){
199 result = new FeatureStatistic(uuid);
200 statisticMap.put(uuid, result);
201 }
202 return result;
203 }
204 }