Project

General

Profile

Revision d1504e5a

IDd1504e5a0dab49daf28a2a16a6a6e313c03b1e4e
Parent 85d65189
Child ca93e81e

Added by Andreas Müller almost 2 years ago

ref #8469 first handling of taxa covering all subbranches, but probably wrong approach

View differences:

cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/generate/PolytomousKeyGenerator.java
154 154
     * @param featureStatesFilter
155 155
     * @return taxa which exist in ALL sub-branches and therefore can be linked on higher level
156 156
	 */
157
	private Set<KeyTaxon> buildBranches(PolytomousKeyNode parent, List<Feature> featuresLeft, Set<KeyTaxon> taxaCovered,
157
	private Map<PolytomousKeyNode,Set<KeyTaxon>> buildBranches(PolytomousKeyNode parent, List<Feature> featuresLeft, Set<KeyTaxon> taxaCovered,
158 158
	        Map<Feature, Set<State>> featureStatesFilter){
159 159

  
160
	    Set<KeyTaxon> taxaCoveredByAllSubBranches;
160
	    Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches;
161 161

  
162 162
	    if (taxaCovered.size()<=1){
163 163
		    //do nothing
164 164
	        logger.warn("Only 1 or no taxon covered. This should currently only be possible on top level and is not yet handled. ");
165
	        taxaCoveredByAllSubBranches = taxaCovered;
165
            //old: taxaCoveredByAllSubBranches = taxaCovered;
166
	        taxaCoveredByAllSubBranches = new HashMap<>();
167
            taxaCoveredByAllSubBranches.put(parent, taxaCovered);
166 168
		}else {
167 169
			// this map stores the thresholds giving the best dichotomy of taxa for the corresponding feature supporting quantitative data
168 170
			Map<Feature,Float> quantitativeFeaturesThresholds = new HashMap<>();
......
189 191
			    // the winner features are put back to the features left once the branch is done
190 192
			    featuresLeft.add(winnerFeature);
191 193
			}else if (featuresLeft.isEmpty()){
192
			    taxaCoveredByAllSubBranches = handleLeaf(parent, taxaCovered);
194
			    //old: handleLeaf(parent, taxaCovered);
195
			    taxaCoveredByAllSubBranches = new HashMap<>();
196
			    taxaCoveredByAllSubBranches.put(parent, taxaCovered);
193 197
			}else{
194 198
			    throw new RuntimeException("No winner feature but features left to handle should not happen.");
195 199
			}
196
			handletaxaCoveredByAllSubBranches(parent, taxaCoveredByAllSubBranches);
200
//			handleTaxaCoveredByAllSubBranches(parent, taxaCoveredByAllSubBranches);
197 201
		}
198 202
        return taxaCoveredByAllSubBranches;
199 203
	}
......
227 231
     * each one of these might correspond to one child.
228 232
     * @return taxa which exist in ALL sub-branches and therefore can be linked on higher level
229 233
     */
230
    private Set<KeyTaxon> handleCategorialFeature(PolytomousKeyNode parent, List<Feature> featuresLeft,
234
    private Map<PolytomousKeyNode,Set<KeyTaxon>> handleCategorialFeature(PolytomousKeyNode parent, List<Feature> featuresLeft,
231 235
            Set<KeyTaxon> taxaCovered,
232 236
            Feature winnerFeature,
233 237
            Map<Feature, Set<State>> featureStatesFilter) {
234 238

  
235
        Set<KeyTaxon> taxaCoveredByAllSubBranches;
239
        Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches;
236 240
        Map<Set<KeyTaxon>,Boolean> reuseWinner = new HashMap<>();
237 241

  
238 242
        Set<State> states = getAllStates(winnerFeature, taxaCovered, featureStatesFilter.get(winnerFeature));
......
252 256
		        removeAddedDependendFeatures(featuresLeft, featuresAdded);
253 257
		    }else{
254 258
		        //if only 1 branch is left we can handle this as a leaf, no matter how many taxa are left
255
		        taxaCoveredByAllSubBranches = handleLeaf(parent, taxaCovered);
259
		        //old: taxaCoveredByAllSubBranches = handleLeaf(parent, taxaCovered);
260
	            taxaCoveredByAllSubBranches = new HashMap<>();
261
	            taxaCoveredByAllSubBranches.put(parent, taxaCovered);
256 262
		    }
257 263
		}else {
258 264
		    // if the merge option is ON, branches with the same discriminative power will be merged (see Vignes & Lebbes, 1989)
......
261 267
		                taxonStatesMap, featureStatesFilter.get(winnerFeature));
262 268
		    }
263 269
		    List<Set<KeyTaxon>> sortedKeys = sortKeys(taxonStatesMap);
264
		    taxaCoveredByAllSubBranches = null;
270
		    taxaCoveredByAllSubBranches = new HashMap<>();
265 271
            for (Set<KeyTaxon> newTaxaCovered : sortedKeys){
266 272
		        //handle each branch
267
		        Set<KeyTaxon> taxaCovererByBranch = handleCategoricalBranch(parent, featuresLeft, taxaCovered, winnerFeature,
268
		                reuseWinner, taxonStatesMap, newTaxaCovered, featureStatesFilter);
269
		        if (taxaCoveredByAllSubBranches == null){
270
		            taxaCoveredByAllSubBranches = taxaCovererByBranch;
271
		        }else{
272
		            taxaCoveredByAllSubBranches.retainAll(taxaCovererByBranch);
273
		        }
274
		    }
273
                Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByBranch = handleCategoricalBranch(parent, featuresLeft,
274
                        taxaCovered, winnerFeature, reuseWinner, taxonStatesMap, newTaxaCovered, featureStatesFilter);
275
                mergeBranchResults(taxaCoveredByAllSubBranches, taxaCoveredByBranch);
276
            }
277
            taxaCoveredByAllSubBranches = handleAllBranchesResult(taxaCoveredByAllSubBranches, parent);
275 278
		}
276 279
		return taxaCoveredByAllSubBranches;
277 280
    }
278 281

  
279
    /**
280
     * Sets the taxa which are linked by all sub-branches as taxa of the common parent
281
     * and removes the taxa from the sub-branches.
282
     * @param parent the common parent node of the sub-branches
283
     * @param taxaCoveredByAllSubBranches taxa linking to all subbranches
284
     */
285
    private void handletaxaCoveredByAllSubBranches(PolytomousKeyNode parent,
286
            Set<KeyTaxon> taxaCoveredByAllSubBranches) {
282
    private Map<PolytomousKeyNode, Set<KeyTaxon>> handleAllBranchesResult(
283
            Map<PolytomousKeyNode, Set<KeyTaxon>> taxaCoveredByAllSubBranches, PolytomousKeyNode parent) {
287 284

  
288
        if (taxaCoveredByAllSubBranches.isEmpty()){
285
        if(taxaCoveredByAllSubBranches.isEmpty()){
286
            return taxaCoveredByAllSubBranches;
287
        }
288
        Set<KeyTaxon> start = taxaCoveredByAllSubBranches.values().iterator().next();
289
        if(start == null){
290
            return taxaCoveredByAllSubBranches;
291
        }
292
        //compute taxon intersection
293
        Set<KeyTaxon> taxonIntersection = new HashSet<>(start);  //just taxa any as start
294
        for (Set<KeyTaxon> taxaOfSingleBranch : taxaCoveredByAllSubBranches.values()){
295
            taxonIntersection.retainAll(taxaOfSingleBranch);
296
        }
297
        Map<PolytomousKeyNode, Set<KeyTaxon>> remainingCoveredTaxa = new HashMap<>();
298
        //and add those taxa to parent
299
        remainingCoveredTaxa.put(parent, new HashSet<>(taxonIntersection));
300

  
301
        //handle non common taxa
302
        Set<PolytomousKeyNode> singularCandidates = new HashSet<>();
303
        for (PolytomousKeyNode node : taxaCoveredByAllSubBranches.keySet()){
304
            Set<KeyTaxon> remainingTaxa = taxaCoveredByAllSubBranches.get(node);
305
            remainingTaxa.removeAll(taxonIntersection);
306
            if(remainingTaxa.isEmpty() && node.getChildren().isEmpty()){
307
                PolytomousKeyNode myParent = node.getParent();
308
                myParent.removeChild(node);
309
                singularCandidates.add(myParent);
310
            }else{
311
                handleLeaf(node, remainingTaxa);
312
            }
313
        }
314
        for (PolytomousKeyNode cand : singularCandidates){
315
            checkSingularParent(cand);
316
        }
317

  
318
        return remainingCoveredTaxa;
319
    }
320

  
321
    private void checkSingularParent(PolytomousKeyNode parent) {
322
//        if(parent.getChildren().size()==1){
323
//            PolytomousKeyNode child = parent.getChildren().get(0);
324
//            parent.setFeature(child.getFeature());
325
//            parent.setTaxon(child.getTaxon());
326
//            Set<PolytomousKeyNode> children = new HashSet<>(child.getChildren());
327
//            for (PolytomousKeyNode childChild : children){
328
//                parent.addChild(childChild);
329
//            }
330
//            parent.removeChild(child);
331
//        }
332
    }
333

  
334
    private void mergeBranchResults(Map<PolytomousKeyNode, Set<KeyTaxon>> taxaCoveredByAllSubBranches,
335
            Map<PolytomousKeyNode, Set<KeyTaxon>> taxaCoveredByBranch) {
336
        if (taxaCoveredByBranch == null){
289 337
            return;
290 338
        }
291
        for (KeyTaxon taxon : taxaCoveredByAllSubBranches){
292
            for (PolytomousKeyNode node : parent.getChildren()){
293
                //TODO we do not need to do this recursive any more as we remove them in each step
294
//                removeTaxonRecursive(node, taxon);
295
                if (taxon.taxon.equals(node.getTaxon())){
296
                    //TODO remove empty leaf nodes
297
//                    node.removeTaxon();
298
                }
339

  
340
        for (PolytomousKeyNode node : taxaCoveredByBranch.keySet()){
341
            Set<KeyTaxon> existingNode = taxaCoveredByAllSubBranches.get(node);
342
            if (existingNode != null){
343
                throw new RuntimeException("Branches should be distinct.");
344
            }else{
345
                taxaCoveredByAllSubBranches.put(node, taxaCoveredByBranch.get(node));
299 346
            }
300 347
        }
301
        //TODO check if correctly handled if size > 1, might be a model problem
302
//        handleLeaf(parent, taxaCoveredByAllSubBranches);
303 348
    }
304 349

  
350
//    /**
351
//     * Sets the taxa which are linked by all sub-branches as taxa of the common parent
352
//     * and removes the taxa from the sub-branches.
353
//     * @param parent the common parent node of the sub-branches
354
//     * @param taxaCoveredByAllSubBranches taxa linking to all subbranches
355
//     */
356
//    private void handleTaxaCoveredByAllSubBranches(PolytomousKeyNode parent,
357
//            Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches) {
358
//
359
//        if (taxaCoveredByAllSubBranches.isEmpty()){
360
//            return;
361
//        }
362
//        for (KeyTaxon taxon : taxaCoveredByAllSubBranches){
363
//            for (PolytomousKeyNode node : parent.getChildren()){
364
//                //TODO we do not need to do this recursive any more as we remove them in each step
365
////                removeTaxonRecursive(node, taxon);
366
//                if (taxon.taxon.equals(node.getTaxon())){
367
//                    //TODO remove empty leaf nodes
368
//                    node.removeTaxon();
369
//                }
370
//            }
371
//        }
372
//        //TODO check if correctly handled if size > 1, might be a model problem
373
////        handleLeaf(parent, taxaCoveredByAllSubBranches);
374
//    }
375

  
305 376
    private void removeTaxonRecursive(PolytomousKeyNode parent, KeyTaxon taxon) {
306 377
        for (PolytomousKeyNode node : parent.getChildren()){
307 378
            removeTaxonRecursive(node, taxon);
......
336 407
    /**
337 408
     * @return taxa which exist in ALL sub-branches and therefore can be linked on higher level
338 409
     */
339
    private Set<KeyTaxon> handleCategoricalBranch(PolytomousKeyNode parent, List<Feature> featuresLeft,
410
    private Map<PolytomousKeyNode,Set<KeyTaxon>> handleCategoricalBranch(PolytomousKeyNode parent, List<Feature> featuresLeft,
340 411
            Set<KeyTaxon> taxaCovered,
341 412
            Feature winnerFeature, Map<Set<KeyTaxon>, Boolean> reuseWinner,
342
            Map<Set<KeyTaxon>, List<State>> taxonStatesMap, Set<KeyTaxon> newTaxaCovered, Map<Feature,Set<State>> featureStatesFilter) {
413
            Map<Set<KeyTaxon>, List<State>> taxonStatesMap,
414
            Set<KeyTaxon> newTaxaCovered,
415
            Map<Feature,Set<State>> featureStatesFilter) {
343 416

  
344
        Set<KeyTaxon> taxaCoveredByAllSubBranches;
417
        Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches;
345 418

  
346 419
        //to restore old state
347 420
        Set<State> oldFilterSet = featureStatesFilter.get(winnerFeature);
......
382 455
        if (hasChildren){
383 456
            taxaCoveredByAllSubBranches = buildBranches(childNode, featuresLeft, newTaxaCovered, featureStatesFilter);
384 457
        }else{
385
            taxaCoveredByAllSubBranches = handleLeaf(childNode, newTaxaCovered);
458
            //old: taxaCoveredByAllSubBranches = handleLeaf(childNode, newTaxaCovered);
459
            taxaCoveredByAllSubBranches = new HashMap<>();
460
            taxaCoveredByAllSubBranches.put(childNode, newTaxaCovered);
386 461
        }
387 462

  
388 463
        //restore old state before returning to parent node
......
545 620
    /**
546 621
     * @return taxa which exist in ALL sub-branches and therefore can be linked on higher level
547 622
     */
548
    private Set<KeyTaxon> handleQuantitativeData(PolytomousKeyNode parent, List<Feature> featuresLeft,
623
    private Map<PolytomousKeyNode,Set<KeyTaxon>> handleQuantitativeData(PolytomousKeyNode parent, List<Feature> featuresLeft,
549 624
            Set<KeyTaxon> taxaCovered, Map<Feature, Float> quantitativeFeaturesThresholds,
550 625
            Feature winnerFeature, Map<Feature, Set<State>> featureStatesFilter) {
551 626

  
552
        Set<KeyTaxon> taxaCoveredByAllSubBranches = null;
627
        Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches = null;
553 628

  
554 629
        // first, get the threshold
555 630
        float threshold = quantitativeFeaturesThresholds.get(winnerFeature);
......
560 635
        List<Set<KeyTaxon>> quantitativeStates = determineQuantitativeStates(threshold, winnerFeature, taxaCovered, unit);
561 636
        // thus the list contains two sets of KeyTaxon, the first corresponding to
562 637
        // those before, the second to those after the threshold
638
        taxaCoveredByAllSubBranches = new HashMap<>();
563 639
        for (int i=0; i<2; i++) {
564
            Set<KeyTaxon> taxaCovererByBranch = handleQuantitativeBranch(parent, featuresLeft, taxaCovered, winnerFeature, threshold, unit,
640
            Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByBranch = handleQuantitativeBranch(parent, featuresLeft, taxaCovered, winnerFeature, threshold, unit,
565 641
                    quantitativeStates, featureStatesFilter, i);
566
            if(taxaCoveredByAllSubBranches == null){
567
                taxaCoveredByAllSubBranches = taxaCovererByBranch;
568
            }else{
569
                taxaCoveredByAllSubBranches.retainAll(taxaCovererByBranch);
570
            }
642
            mergeBranchResults(taxaCoveredByAllSubBranches, taxaCoveredByBranch);
571 643
        }
644

  
645
        taxaCoveredByAllSubBranches = handleAllBranchesResult(taxaCoveredByAllSubBranches, parent);
646

  
572 647
        return taxaCoveredByAllSubBranches;
573 648
    }
574 649

  
......
577 652
     * TODO if the quantitative feature has dependent features they are not yet handled
578 653
     * @return taxa which exist in ALL sub-branches and therefore can be linked on higher level
579 654
     */
580
    private Set<KeyTaxon> handleQuantitativeBranch(PolytomousKeyNode parent, List<Feature> featuresLeft,
655
    private Map<PolytomousKeyNode,Set<KeyTaxon>> handleQuantitativeBranch(PolytomousKeyNode parent, List<Feature> featuresLeft,
581 656
            Set<KeyTaxon> taxaCovered, Feature winnerFeature, float threshold, StringBuilder unit,
582 657
            List<Set<KeyTaxon>> quantitativeStates, Map<Feature, Set<State>> featureStatesFilter, int i) {
583 658

  
584
        Set<KeyTaxon> taxaCoveredByAllSubBranches;
659
        Map<PolytomousKeyNode,Set<KeyTaxon>> taxaCoveredByAllSubBranches;
585 660
        String sign;
586 661
        Set<KeyTaxon> newTaxaCovered = quantitativeStates.get(i);
587 662
        if (i==0){
......
601 676
        	if (childrenExist){
602 677
        	    taxaCoveredByAllSubBranches = buildBranches(childNode, featuresLeft, newTaxaCovered, featureStatesFilter);
603 678
        	}else{
604
        	    taxaCoveredByAllSubBranches = handleLeaf(childNode, newTaxaCovered);
679
        	    //old: taxaCoveredByAllSubBranches = handleLeaf(childNode, newTaxaCovered);
680
                taxaCoveredByAllSubBranches = new HashMap<>();
681
                taxaCoveredByAllSubBranches.put(childNode, newTaxaCovered);
605 682
        	}
606 683
        }else{
607 684
            //TODO do we need to check the 0 case, can this happen at all, shouldn't we throw a warning instead?
608
            taxaCoveredByAllSubBranches = new HashSet<>();
685
            taxaCoveredByAllSubBranches = new HashMap<>();
609 686
        }
610 687
        return taxaCoveredByAllSubBranches;
611 688
    }

Also available in: Unified diff

Add picture from clipboard (Maximum size: 40 MB)