From 2106a8eed60b333bda62b160dfca848e34281c79 Mon Sep 17 00:00:00 2001
From: "m.venin" <m.venin@localhost>
Date: Thu, 2 Dec 2010 13:39:08 +0000
Subject: [PATCH] Last updates for natural language generation (added comments,
 new options, cleaned the code, etc)

---
 .gitattributes                                |   2 +
 ...DefaultQuantitativeDescriptionBuilder.java |  73 ++-
 .../api/service/DeltaTextDataProcessor.java   |  49 ++
 .../cdm/api/service/DescriptionBuilder.java   |  45 +-
 .../service/INaturalLanguageGenerator.java    |   6 +-
 .../INaturalLanguageTextDataProcessor.java    |   5 +-
 .../service/IdentificationKeyGenerator.java   | 421 +++++++++++++++++-
 ...roFormatCategoricalDescriptionBuilder.java |  60 +++
 .../api/service/NaturalLanguageGenerator.java | 351 +++++++++------
 9 files changed, 800 insertions(+), 212 deletions(-)
 create mode 100644 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DeltaTextDataProcessor.java
 create mode 100644 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatCategoricalDescriptionBuilder.java
diff --git a/.gitattributes b/.gitattributes
index bbb3b69e0e..8545626236 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1568,6 +1568,7 @@ cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/CommonServiceImpl.jav
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DatabaseServiceHibernateImpl.java -text
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultCategoricalDescriptionBuilder.java -text
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultQuantitativeDescriptionBuilder.java -text
+cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DeltaTextDataProcessor.java -text
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionBuilder.java -text
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionServiceImpl.java -text
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DistributionNodeComparator.java -text
@@ -1611,6 +1612,7 @@ cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentificationKeyServ
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/LocationServiceImpl.java -text
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MarkerServiceImpl.java -text
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MediaServiceImpl.java -text
+cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatCategoricalDescriptionBuilder.java -text
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatQuantitativeDescriptionBuilder.java -text
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NameServiceImpl.java -text
 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NamedAreaNodeComparator.java -text
diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultQuantitativeDescriptionBuilder.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultQuantitativeDescriptionBuilder.java
index 282cced849..ad7134f9cb 100644
--- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultQuantitativeDescriptionBuilder.java
+++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultQuantitativeDescriptionBuilder.java
@@ -14,26 +14,15 @@ import eu.etaxonomy.cdm.model.description.TextData;
 import eu.etaxonomy.cdm.model.description.TextFormat;
 
 public class DefaultQuantitativeDescriptionBuilder extends AbstractQuantitativeDescriptionBuilder {
+
 	
 	@Override
 	protected TextData doBuild(Map<StatisticalMeasure,Float> measures, MeasurementUnit mUnit, List<Language> languages){
 		StringBuilder QuantitativeDescription = new StringBuilder(); // this StringBuilder is used to concatenate the different words of the description before saving it in the TextData
 		TextData textData = TextData.NewInstance(); // TextData that will contain the description and the language corresponding
 		// booleans indicating whether a kind of value is present or not and the float that will eventually hold the value
-		boolean average = false;
-		float averagevalue = new Float(0);
-		boolean sd = false;
-		float sdvalue = new Float(0);
-		boolean min = false;
-		float minvalue = new Float(0);
-		boolean max = false;
-		float maxvalue = new Float(0);
-		boolean lowerb = false;
-		float lowerbvalue = new Float(0);
-		boolean upperb = false;
-		float upperbvalue = new Float(0);
 		
-		String unit = "(unknown unit)";
+		String unit = "";
 		if ((mUnit!=null)&&(mUnit.getLabel()!=null)){
 			unit = mUnit.getLabel();
 		}
@@ -54,32 +43,23 @@ public class DefaultQuantitativeDescriptionBuilder extends AbstractQuantitativeD
 		String space = " "; // should "space" be considered as a linking word and thus be stored in NaturalLanguageTerm.class ?
 		
 		// the booleans and floats are updated according to the presence or absence of values
-			if (measures.containsKey(StatisticalMeasure.AVERAGE())) {
-				average = true;
-				averagevalue = measures.get(StatisticalMeasure.AVERAGE());
-			}
-			if(measures.containsKey(StatisticalMeasure.STANDARD_DEVIATION())) {
-				sd = true;
-				sdvalue = measures.get(StatisticalMeasure.STANDARD_DEVIATION());
-			}
-			if (measures.containsKey(StatisticalMeasure.MIN())) {
-				min = true;
-				minvalue = measures.get(StatisticalMeasure.MIN());
-			}
-			if (measures.containsKey(StatisticalMeasure.MAX())) {
-				max = true;
-				maxvalue = measures.get(StatisticalMeasure.MAX());
-			}
-			if (measures.containsKey(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY())) {
-				lowerb = true;
-				lowerbvalue = measures.get(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY());
-			}
-			if (measures.containsKey(StatisticalMeasure.TYPICAL_UPPER_BOUNDARY())) {
-				upperb = true;
-				upperbvalue = measures.get(StatisticalMeasure.TYPICAL_UPPER_BOUNDARY());
-			}
-			
-			
+
+		Boolean max, min, upperb, lowerb, average, sd;
+		
+		String averagevalue = getValue(measures,StatisticalMeasure.AVERAGE());
+		if (averagevalue!=null) average=true; else average=false;
+		String sdvalue = getValue(measures,StatisticalMeasure.STANDARD_DEVIATION());
+		if (sdvalue!=null) sd=true; else sd=false;
+		String minvalue = getValue(measures,StatisticalMeasure.MIN());
+		if (minvalue!=null) min=true; else min=false;
+		String maxvalue = getValue(measures,StatisticalMeasure.MAX());
+		if (maxvalue!=null) max=true; else max=false;
+		String lowerbvalue = getValue(measures,StatisticalMeasure.TYPICAL_LOWER_BOUNDARY());
+		if (lowerbvalue!=null) lowerb=true; else lowerb=false;
+		String upperbvalue = getValue(measures,StatisticalMeasure.TYPICAL_UPPER_BOUNDARY());
+		if (upperbvalue!=null) upperb=true; else upperb=false;
+		
+		
 		// depending on the different associations of values, a sentence is built	
 		if (max && min) {
 			QuantitativeDescription.append(space + from + space + minvalue + space + to + space + maxvalue + space + unit);
@@ -120,6 +100,18 @@ public class DefaultQuantitativeDescriptionBuilder extends AbstractQuantitativeD
 		return textData;
 	}
 	
+	private String getValue(Map<StatisticalMeasure,Float> measures, Object key) {
+		Float floatValue;
+		Integer intValue;
+		if(measures.containsKey(key)) {
+			floatValue = measures.get(key);
+			intValue=floatValue.intValue();
+			if (floatValue.equals(intValue.floatValue())) return intValue.toString();
+			else return floatValue.toString();
+		}
+		else return null;
+	}
+	
 	protected String buildFeature(Feature feature, boolean doItBetter){
 		if (feature==null || feature.getLabel()==null) return "";
 		else {
@@ -134,7 +126,8 @@ public class DefaultQuantitativeDescriptionBuilder extends AbstractQuantitativeD
 			}
 			else{
 				String betterString = StringUtils.replaceChars(feature.getLabel(), "<>",""); // only remove the brackets
-				return StringUtils.substringBeforeLast(betterString," ");
+				return betterString;
+//				return StringUtils.substringBeforeLast(betterString," ");
 			}
 		}
 	}
diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DeltaTextDataProcessor.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DeltaTextDataProcessor.java
new file mode 100644
index 0000000000..65e00761e7
--- /dev/null
+++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DeltaTextDataProcessor.java
@@ -0,0 +1,49 @@
+package eu.etaxonomy.cdm.api.service;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+
+import eu.etaxonomy.cdm.model.description.TextData;
+import eu.etaxonomy.cdm.model.common.Language;
+
+import eu.etaxonomy.cdm.model.common.LanguageString;
+
+public class DeltaTextDataProcessor implements INaturalLanguageTextDataProcessor {
+
+	@Override
+	public void process(TextData textData, TextData previousTextData) {
+		boolean doItBetter = false;
+		
+		Map<Language,LanguageString> oldMultiLanguageText = previousTextData.getMultilanguageText();
+		Map<Language,LanguageString> multiLanguageText = textData.getMultilanguageText();
+		
+		for (Language language : multiLanguageText.keySet()){
+			LanguageString langString = multiLanguageText.get(language);
+			String oldText = langString.getText();
+			
+			oldText = StringUtils.remove(oldText,oldMultiLanguageText.get(language).getText());
+			
+			if (doItBetter) { //TODO remove the text between brackets
+				StringBuilder strbuilder = new StringBuilder();
+				do	{
+					strbuilder.append(StringUtils.substringBefore(oldText, "<"));
+				}
+				while (!(oldText=StringUtils.substringAfter(oldText, ">")).equals(""));
+				StringUtils.substringBeforeLast(strbuilder.toString()," ");
+			}
+			else{
+				oldText=StringUtils.replaceChars(oldText,"<>","");
+			}
+			
+			textData.removeText(language);
+			textData.putText(oldText,language);
+			
+		}
+		
+	}
+
+}
diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionBuilder.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionBuilder.java
index 8a5da917e9..0d14a1e950 100644
--- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionBuilder.java
+++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionBuilder.java
@@ -3,12 +3,53 @@ package eu.etaxonomy.cdm.api.service;
 import java.util.List;
 
 import eu.etaxonomy.cdm.model.common.Language;
+import eu.etaxonomy.cdm.model.common.Representation;
 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
 import eu.etaxonomy.cdm.model.description.Feature;
 import eu.etaxonomy.cdm.model.description.TextData;
 
 public abstract class DescriptionBuilder<T extends DescriptionElementBase> {
+	
+	protected String separator = ",";
+	private int option = 0;
+	
+	public void returnAbbreviatedLabels() {
+		option=1;
+	}
+	
+	public void returnTexts() {
+		option=2;
+	}
+	
+	public void returnLabels() {
+		option=0;
+	}
+	
+	public void setSeparator(String newSeparator) {
+		separator = newSeparator;
+	}
+	
+	public String getSeparator() {
+		return separator;
+	}
+	
 	public abstract TextData build(T descriptionElement, List<Language> languages);
 	
-	protected abstract String buildFeature(Feature feature, boolean doItBetter);
-}
+	protected String getRightText(Representation representation){
+		String result;
+		if (option==1){
+			result = representation.getAbbreviatedLabel();
+			if (result != null) return result;
+		}
+		else if (option==2){
+			result = representation.getText();
+			if (result != null) return result;
+		}
+		return representation.getLabel();
+	}
+	
+	public TextData buildTextDataFeature(Feature feature, List<Language> languages){
+		return TextData.NewInstance(getRightText(feature.getPreferredRepresentation(languages)),languages.get(0),null);
+	}
+	
+}
\ No newline at end of file
diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageGenerator.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageGenerator.java
index cbeaad7752..1d79a96a35 100644
--- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageGenerator.java
+++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageGenerator.java
@@ -26,5 +26,9 @@ public interface INaturalLanguageGenerator {
 	
 	public List<TextData> generatePreferredNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description, List<Language> languages);
 
-	public String generateStringNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description,	Language language);
+	public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description);
+	
+	public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description, Language language);
+	
+	public TextData generatePreferredSingleTextData(FeatureTree featureTree, TaxonDescription description, List<Language> languages);
 }
diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageTextDataProcessor.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageTextDataProcessor.java
index ad16ecd479..a5870f81e6 100644
--- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageTextDataProcessor.java
+++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageTextDataProcessor.java
@@ -19,13 +19,12 @@ import eu.etaxonomy.cdm.model.description.TextData;
 public interface INaturalLanguageTextDataProcessor {
 
 	/**
-	 * Applies some special proccessing to the text contained in the TextData or/and
+	 * Applies some special processing to the text contained in the TextData or/and
 	 * to the Feature label/representation
 	 * 
 	 * @param textData
 	 * @param previousTextData TODO
-	 * @return the modified TextData
 	 */
-	public TextData process(TextData textData, TextData previousTextData);
+	public void process(TextData textData, TextData previousTextData);
 	
 }
diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentificationKeyGenerator.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentificationKeyGenerator.java
index 28c8a91f60..f28f0cbcd1 100644
--- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentificationKeyGenerator.java
+++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentificationKeyGenerator.java
@@ -14,8 +14,12 @@ import eu.etaxonomy.cdm.model.description.CategoricalData;
 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
 import eu.etaxonomy.cdm.model.description.Feature;
 import eu.etaxonomy.cdm.model.description.FeatureNode;
+<<<<<<< .mine
+import eu.etaxonomy.cdm.model.description.FeatureTree;
+=======
 import eu.etaxonomy.cdm.model.description.FeatureTree;
 import eu.etaxonomy.cdm.model.description.KeyStatement;
+>>>>>>> .r10927
 import eu.etaxonomy.cdm.model.description.PolytomousKey;
 import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
 import eu.etaxonomy.cdm.model.description.QuantitativeData;
@@ -32,10 +36,17 @@ public class IdentificationKeyGenerator {
 	private PolytomousKey polytomousKey; // the Identification Key
 	private List<Feature> features; // the features used to generate the key
 	private Set<TaxonDescription> taxa; // the base of taxa
+	private FeatureTree dependenciesTree;
+	private Map<TaxonDescription,List<Integer>> paths = new HashMap<TaxonDescription,List<Integer>>(); // for statistics only
+	private boolean merge=true;
+	
+	private Map<State,Set<Feature>> iIdependencies = new HashMap<State,Set<Feature>>();
+	private Map<State,Set<Feature>> oAIdependencies = new HashMap<State,Set<Feature>>();
+	private boolean dependenciesON = true;
 	
 	private String before="<";
 	private String after=">";
-	private String separator = ", ";
+	private String separator = " or ";
 	
 	/**
 	 * Sets the features used to generate the key
@@ -55,6 +66,15 @@ public class IdentificationKeyGenerator {
 		this.taxa = taxaSet;
 	}
 	
+	/**
+	 * Sets the tree containing the dependencies between states and features
+	 * 
+	 * @param tree
+	 */
+	public void setDependencies(FeatureTree tree){
+		this.dependenciesTree = tree;
+	}
+	
 	
 	/**
 	 * Initializes the function buildBranches() with the starting parameters in order to build the key 
@@ -71,7 +91,7 @@ public class IdentificationKeyGenerator {
 	private void loop(){
 		polytomousKey = polytomousKey.NewInstance();
 		PolytomousKeyNode root = polytomousKey.getRoot();
-		buildBranches(root,features,taxa);	
+		buildBranches(root,features,taxa,false,-1);	
 	}
 	
 	
@@ -79,11 +99,18 @@ public class IdentificationKeyGenerator {
 	 * Creates the key and prints it
 	 */
 	public void makeandprint(){
+<<<<<<< .mine
+		if (dependenciesON && dependenciesTree!=null) checkDependencies(dependenciesTree.getRoot());
+		Loop();
+		List<FeatureNode> rootlist = new ArrayList<FeatureNode>();
+=======
 		loop();
 		List<PolytomousKeyNode> rootlist = new ArrayList<PolytomousKeyNode>();
+>>>>>>> .r10927
 		rootlist.add(polytomousKey.getRoot());
 		String spaces = new String();
 		printTree(rootlist,spaces);
+		System.out.println(paths.toString());
 	}
 	
 
@@ -93,7 +120,12 @@ public class IdentificationKeyGenerator {
 	 * @param father the node considered
 	 * @param featuresLeft List of features that can be used at this point
 	 * @param taxaCovered the taxa left at this point (i.e. that verify the description corresponding to the path leading to this node)
+	 * @param mybool
 	 */
+<<<<<<< .mine
+	private void buildBranches(FeatureNode father, List<Feature> featuresLeft, Set<TaxonDescription> taxaCovered, boolean mybool, int levelbis){
+		int levelhere=levelbis+1;
+=======
 	private void buildBranches_old(FeatureNode father, List<Feature> featuresLeft, Set<TaxonDescription> taxaCovered){
 //		// this map stores the thresholds giving the best dichotomy of taxa for the corresponding feature supporting quantitative data
 //		Map<Feature,Float> quantitativeFeaturesThresholds = new HashMap<Feature,Float>();
@@ -193,9 +225,51 @@ public class IdentificationKeyGenerator {
 		// the feature is removed from the list of features available to build the next level of the tree
 		featuresLeft.remove(winnerFeature);
 		// this boolean indicates if the current father node has children or not (i.e. is a leaf or not) ; (a leaf has a "Question" element)
+>>>>>>> .r10927
 		boolean childrenExist = false;
-		int i;
 		
+<<<<<<< .mine
+		Set<Feature> innapplicables = new HashSet<Feature>();
+		Set<Feature> applicables = new HashSet<Feature>();
+		
+		if (taxaCovered.size()>1) {
+			// this map stores the thresholds giving the best dichotomy of taxa for the corresponding feature supporting quantitative data
+			Map<Feature,Float> quantitativeFeaturesThresholds = new HashMap<Feature,Float>();
+			// the scores of the different features are calculated, the thresholds in the same time
+			Map<Feature,Float> scoreMap = FeatureScores(featuresLeft, taxaCovered, quantitativeFeaturesThresholds);
+			// the feature with the best score becomes the one corresponding to the current node
+//			Feature winnerFeature = DefaultWinner(taxaCovered.size(), scoreMap);
+			Feature winnerFeature = LessStatesWinner(taxaCovered.size(), scoreMap, taxaCovered);
+			// the feature is removed from the list of features available to build the next level of the tree
+			featuresLeft.remove(winnerFeature);
+			// this boolean indicates if the current father node has children or not (i.e. is a leaf or not) ; (a leaf has a "Question" element)
+			int i;
+			/************** either the feature supports quantitative data... **************/
+			// NB: in this version, "quantitative features" are dealt with in a dichotomous way
+			if (winnerFeature.isSupportsQuantitativeData()) {
+				// first, get the threshold
+				float threshold = quantitativeFeaturesThresholds.get(winnerFeature);
+				String sign;
+				StringBuilder unit= new StringBuilder("");
+				// then determine which taxa are before and which are after this threshold (dichotomy) in order to create the children of the father node
+				List<Set<TaxonDescription>> quantitativeStates = determineQuantitativeStates(threshold,winnerFeature,taxaCovered,unit);
+				for (i=0;i<2;i++) {
+					Set<TaxonDescription> newTaxaCovered = quantitativeStates.get(i);
+					if (i==0) sign = before; // the first element of the list corresponds to taxa before the threshold
+					else sign = after; // the second to those after
+					if (newTaxaCovered.size()>0 && !((newTaxaCovered.size()==taxaCovered.size()) && mybool)){ // if the taxa are discriminated compared to those of the father node, a child is created
+						childrenExist = true;
+						FeatureNode son = FeatureNode.NewInstance();
+						son.setFeature(winnerFeature);
+						Representation question = new Representation(null, " " + sign + " " + threshold +unit,null, Language.DEFAULT()); // the question attribute is used to store the state of the feature
+						son.addQuestion(question);
+						father.addChild(son);
+						boolean newbool;
+						if (newTaxaCovered.size()==taxaCovered.size()) newbool = true;
+						else newbool = false;
+						buildBranches(son,featuresLeft, newTaxaCovered,newbool,levelhere);
+					}
+=======
 		/************** either the feature supports quantitative data... **************/
 		// NB: in this version, "quantitative features" are dealt with in a dichotomous way
 		if (winnerFeature.isSupportsQuantitativeData()) {
@@ -220,8 +294,40 @@ public class IdentificationKeyGenerator {
 					son.setStatement(statement);
 					father.addChild(son);
 					buildBranches(son,featuresLeft, newTaxaCovered);
+>>>>>>> .r10927
 				}
 			}
+<<<<<<< .mine
+
+			/************** ...or it supports categorical data. **************/
+			// "categorical features" may present several different states, each one of these might correspond to one child
+			List<State> statesDone = new ArrayList<State>();
+			int numberOfStates;
+			if (winnerFeature.isSupportsCategoricalData()) {
+				for (TaxonDescription td : taxaCovered){
+					// go through all the states possible for one feature for the taxa considered
+					DescriptionElementBase debConcerned = null;
+					for (DescriptionElementBase deb : td.getElements()) {
+						if (deb.getFeature().equals(winnerFeature)) debConcerned = deb;
+					}
+					// a map is created, the key being the set of taxa that present the state(s) stored in the corresponding value
+					if (debConcerned!=null) {
+						Map<Set<TaxonDescription>,List<State>> taxonStatesMap = determineCategoricalStates(statesDone,(CategoricalData)debConcerned,winnerFeature,taxaCovered);
+						// if the merge option is ON, branches with the same discriminative power will be merged (see Vignes & Lebbes, 1989)
+						if (merge){
+							// see below
+							Map<State,Set<State>> exclusions = new HashMap<State,Set<State>>();
+							// maps the different states of the winnerFeature to the list of states "incompatible" with it
+							FeatureScoreAndMerge(winnerFeature,taxaCovered,exclusions);
+
+							Integer best=null;
+							int length;
+
+							// looks for the largest clique, i.e. the state with less exclusions
+							while (!exclusions.isEmpty()){
+								List<State> clique = returnBestClique(exclusions);
+								mergeBranches(clique,taxonStatesMap);
+=======
 		}
 		
 		/************** ...or it supports categorical data. **************/
@@ -249,17 +355,57 @@ public class IdentificationKeyGenerator {
 							for (State st : listOfStates) {
 								questionLabel.append(st.getLabel());
 								if (listOfStates.lastIndexOf(st)!=numberOfStates) questionLabel.append(separator);
+>>>>>>> .r10927
 							}
+<<<<<<< .mine
+=======
 							KeyStatement statement = KeyStatement.NewInstance(questionLabel.toString());
 							son.setStatement(statement);
 							son.setFeature(winnerFeature);
 							father.addChild(son);
 							featuresLeft.remove(winnerFeature); // TODO was commented before, why ?
 							buildBranches(son,featuresLeft, newTaxaCovered);
+>>>>>>> .r10927
+						}
+						if (taxonStatesMap!=null && !taxonStatesMap.isEmpty()) { 
+							for (Map.Entry<Set<TaxonDescription>,List<State>> e : taxonStatesMap.entrySet()){
+								Set<TaxonDescription> newTaxaCovered = e.getKey();
+								List<State> listOfStates = e.getValue();
+								if ((newTaxaCovered.size()>0) && !((newTaxaCovered.size()==taxaCovered.size()) && mybool)){ // if the taxa are discriminated compared to those of the father node, a child is created
+									childrenExist = true;
+									FeatureNode son = FeatureNode.NewInstance();
+									StringBuilder questionLabel = new StringBuilder();
+									numberOfStates = listOfStates.size()-1;
+									for (State st : listOfStates) {
+										if (dependenciesON){
+											if (iIdependencies.get(st)!= null) innapplicables.addAll(iIdependencies.get(st));
+											if (oAIdependencies.get(st)!= null) applicables.addAll(oAIdependencies.get(st));
+											for (Feature feature : innapplicables) featuresLeft.remove(feature);
+											for (Feature feature : applicables) featuresLeft.add(feature);
+										}
+										questionLabel.append(st.getLabel());
+										if (listOfStates.lastIndexOf(st)!=numberOfStates) questionLabel.append(separator);
+									}
+									Representation question = new Representation(null, questionLabel.toString(),null, Language.DEFAULT());
+									son.addQuestion(question);
+									son.setFeature(winnerFeature);
+									father.addChild(son);
+									featuresLeft.remove(winnerFeature); // TODO was commented before, why ?
+									boolean newbool;
+									if (newTaxaCovered.size()==taxaCovered.size()) newbool = true;
+									else newbool = false;
+									buildBranches(son,featuresLeft, newTaxaCovered,newbool,levelhere);
+								}
+							}
 						}
 					}
 				}
 			}
+			if (dependenciesON){
+				for (Feature feature : innapplicables) featuresLeft.add(feature);
+				for (Feature feature : applicables) featuresLeft.remove(feature);
+			}
+			featuresLeft.add(winnerFeature);
 		}
 		if (! childrenExist){
 			KeyStatement fatherStatement = father.getStatement();
@@ -268,12 +414,92 @@ public class IdentificationKeyGenerator {
 				String label = statementString + " --> " + taxaCovered.toString();
 				fatherStatement.putLabel(label, Language.DEFAULT());
 			}
+			//			for (TaxonDescription td : taxaCovered){
+			//				if (paths.containsKey(td)) paths.get(td).add(levelhere);
+			//				else {
+			//					List<Integer> pathLength = new ArrayList<Integer>();
+			//					pathLength.add(levelhere);
+			//					paths.put(td, pathLength);
+			//				}
+			//			}
 		}
-		featuresLeft.add(winnerFeature);
 	}
 
+	private void mergeBranches(List<State> clique, Map<Set<TaxonDescription>,List<State>> taxonStatesMap){
+		int i = 1;
+		boolean stateFound;
+		Map.Entry<Set<TaxonDescription>,List<State>> firstPair=null;
+		List<Set<TaxonDescription>> tdToDelete = new ArrayList<Set<TaxonDescription>>();
+		if (clique.size()>1){
+			Iterator it1 = taxonStatesMap.entrySet().iterator();
+			while (it1.hasNext()){
+				Map.Entry<Set<TaxonDescription>,List<State>> pair = (Map.Entry)it1.next();
+				Iterator<State> stateIterator = clique.iterator();
+				stateFound=false;
+				while(stateIterator.hasNext() && stateFound!=true) {
+					State state = stateIterator.next();
+					if (pair.getValue().contains(state)) {
+						stateFound=true;
+					}
+				}
+				if (stateFound==true){
+					if (firstPair==null){
+						firstPair=pair;
+					}
+					else {
+						firstPair.getKey().addAll(pair.getKey());
+						firstPair.getValue().addAll(pair.getValue());
+						tdToDelete.add(pair.getKey());
+//						taxonStatesMap.remove(pair.getKey()); //remove(pair);
+					}
+				}
+			}
+			for (Set<TaxonDescription> td : tdToDelete){
+				taxonStatesMap.remove(td);
+			}
+		}
+	}
+	
+<<<<<<< .mine
+	private List<State> returnBestClique (Map<State,Set<State>> exclusions){
+		int best=-1;;
+		int length;
+		List<State> clique = new ArrayList<State>();
+		// looks for the largest clique, i.e. the state with less exclusions
+		
+		State bestState=null;
+		for (Iterator it1 = exclusions.entrySet().iterator() ; it1.hasNext();){
+			Map.Entry<State,Set<State>> pair = (Map.Entry)it1.next();
+			length = pair.getValue().size();
+			if ((best==-1) || length<best) {
+				best=length;
+				bestState = pair.getKey();
+			}
+		}
+		clique.add(bestState);
+		exclusions.remove(bestState);
+		boolean bool;
+		for (Iterator<Map.Entry<State,Set<State>>> it0 = exclusions.entrySet().iterator() ; it0.hasNext();){
+			Map.Entry<State,Set<State>> pair = (Map.Entry)it0.next();
+			bool = true;
+			for (State state : clique) {
+				if (pair.getValue().contains(state)) bool = false;
+			}
+			if (bool){
+				clique.add(pair.getKey());
+				//exclusions.remove(pair.getKey());
+			}
+		}
+		for (State state : clique) {
+			exclusions.remove(state);
+		}
+		return clique;
+	}
+	
 	
+=======
 	
+>>>>>>> .r10927
 	/**
 	 * fills a map of the sets of taxa (key) presenting the different states (value) for the given feature.
 	 * 
@@ -297,10 +523,6 @@ public class IdentificationKeyGenerator {
 			if(!statesDone.contains(featureState)){
 				statesDone.add(featureState);
 				
-				StateData sd = new StateData();
-				sd.setState(featureState);
-				//((CategoricalData)debsDone.get(0)).addState(sd);// A VOIR
-				
 				Set<TaxonDescription> newTaxaCovered = whichTaxa(feature,featureState,taxaCovered);
 				List<State> newStates = childrenStatesMap.get(newTaxaCovered);
 				if (newStates==null) {
@@ -334,10 +556,77 @@ public class IdentificationKeyGenerator {
 		return newCoveredTaxa;
 	}
 	
+<<<<<<< .mine
+	//change names ; merge with Default (Default takes the first one of the list)
+	private Feature LessStatesWinner(int nTaxa, Map<Feature,Float> scores, Set<TaxonDescription> taxaCovered){
+		if (nTaxa==1) return null;
+		float meanScore = DefaultMeanScore(nTaxa);
+		float bestScore = nTaxa*nTaxa;
+		List<Feature> bestFeatures = new ArrayList<Feature>();
+		Feature bestFeature = null;
+		Iterator it = scores.entrySet().iterator();
+		float newScore;
+		while (it.hasNext()){
+			Map.Entry<Feature,Float> pair = (Map.Entry)it.next();
+			if (pair.getValue()!=null){
+				newScore = Math.abs((Float)pair.getValue()-meanScore);
+				if (newScore < bestScore){
+					bestFeatures.clear();
+					bestFeatures.add((Feature)pair.getKey());
+					bestScore = newScore;
+				}
+				else if (newScore==bestScore){
+					bestFeatures.add((Feature)pair.getKey());
+				}
+			}
+		}
+		if (bestFeatures.size()==1) {
+			return bestFeatures.get(0);
+		}
+		else {
+			int lessStates=-1;
+			int numberOfDifferentStates=-1;
+			for (Feature feature : bestFeatures){
+				if (feature.isSupportsCategoricalData()){
+				Set<State> differentStates = new HashSet<State>();
+				for (TaxonDescription td : taxaCovered){
+					Set<DescriptionElementBase> elements = td.getElements();
+					for (DescriptionElementBase deb : elements){
+						if (deb.isInstanceOf(CategoricalData.class)) {
+							CategoricalData catdat = (CategoricalData)deb;
+							if (catdat.getFeature().equals(feature)) {
+									List<StateData> stateDatas = catdat.getStates();
+									for (StateData sd : stateDatas) {
+										differentStates.add(sd.getState());
+									}
+								}
+							}
+						} 
+					}
+				numberOfDifferentStates=differentStates.size();
+				}
+				else if (feature.isSupportsQuantitativeData()){
+					numberOfDifferentStates=2;
+				}
+				if (lessStates==-1 || numberOfDifferentStates<lessStates){
+					lessStates=numberOfDifferentStates;
+					bestFeature = feature;
+				}
+				}
+			return bestFeature;
+		}
+	}
+	
+	private Feature DefaultWinner(int nTaxa, Map<Feature,Float> scores){
+		if (nTaxa==1) return null;
+		float meanScore = DefaultMeanScore(nTaxa);
+		float bestScore = nTaxa*nTaxa;
+=======
 	//change names
 	private Feature defaultWinner(int nTaxons, Map<Feature,Float> scores){
 		float meanScore = defaultMeanScore(nTaxons);
 		float bestScore = nTaxons*nTaxons;
+>>>>>>> .r10927
 		Feature feature = null;
 		Iterator it = scores.entrySet().iterator();
 		float newScore;
@@ -351,9 +640,6 @@ public class IdentificationKeyGenerator {
 				}
 			}
 		}
-		if (!(feature.getLabel()==null)){
-//			System.out.println(feature.getLabel() + bestScore);
-		}
 		return feature;
 	}
 	
@@ -371,7 +657,11 @@ public class IdentificationKeyGenerator {
 		Map<Feature,Float> scoreMap = new HashMap<Feature,Float>();
 		for (Feature feature : featuresLeft){
 			if (feature.isSupportsCategoricalData()) {
+<<<<<<< .mine
+				scoreMap.put(feature, categoricalFeatureScore(feature,coveredTaxa));
+=======
 				scoreMap.put(feature, featureScore(feature,coveredTaxa));
+>>>>>>> .r10927
 			}
 			if (feature.isSupportsQuantitativeData()){
 				scoreMap.put(feature, quantitativeFeatureScore(feature,coveredTaxa, quantitativeFeaturesThresholds));
@@ -380,7 +670,7 @@ public class IdentificationKeyGenerator {
 		return scoreMap;
 	}
 	
-	private List<Set<TaxonDescription>> determineQuantitativeStates (Float threshold, Feature feature, Set<TaxonDescription> taxa){
+	private List<Set<TaxonDescription>> determineQuantitativeStates (Float threshold, Feature feature, Set<TaxonDescription> taxa, StringBuilder unit){
 		List<Set<TaxonDescription>> list = new ArrayList<Set<TaxonDescription>>();
 		Set<TaxonDescription> taxaBefore = new HashSet<TaxonDescription>();
 		Set<TaxonDescription> taxaAfter = new HashSet<TaxonDescription>();
@@ -392,14 +682,17 @@ public class IdentificationKeyGenerator {
 				if (deb.getFeature().equals(feature)) {
 					if (deb.isInstanceOf(QuantitativeData.class)) {
 						QuantitativeData qd = (QuantitativeData)deb;
+						if (unit.toString().equals("") && qd.getUnit()!=null && qd.getUnit().getLabel()!=null){
+							unit.append(" " + qd.getUnit().getLabel());
+						}
 						Set<StatisticalMeasurementValue> values = qd.getStatisticalValues();
 						for (StatisticalMeasurementValue smv : values){
 							StatisticalMeasure type = smv.getType();
 							// DONT FORGET sample size, MEAN etc
-							if (type.equals(StatisticalMeasure.MAX()) || type.equals(StatisticalMeasure.TYPICAL_UPPER_BOUNDARY())) {
-								if (smv.getValue()>=threshold) taxaAfter.add(td);
+							if (type.equals(StatisticalMeasure.MAX()) || type.equals(StatisticalMeasure.TYPICAL_UPPER_BOUNDARY()) || type.equals(StatisticalMeasure.AVERAGE())) {
+								if (smv.getValue()>threshold) taxaAfter.add(td);
 							}
-							if (type.equals(StatisticalMeasure.MIN()) || type.equals(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY())) {
+							if (type.equals(StatisticalMeasure.MIN()) || type.equals(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY()) || type.equals(StatisticalMeasure.AVERAGE())) {
 								if (smv.getValue()<=threshold) taxaBefore.add(td);
 							}
 						}
@@ -407,6 +700,7 @@ public class IdentificationKeyGenerator {
 				}
 			}
 		}
+//		if (unit==null) unit=new String("");
 		return list;
 	}
 	
@@ -444,6 +738,13 @@ public class IdentificationKeyGenerator {
 								lowerboundary = smv.getValue();
 								lowerboundarypresent=true;
 							}
+							// TODO improve
+							if (type.equals(StatisticalMeasure.AVERAGE()) && upperboundarypresent==false && lowerboundarypresent==false) {
+								lowerboundary = smv.getValue();
+								upperboundary = lowerboundary;
+								lowerboundarypresent=true;
+								upperboundarypresent=true;
+							}
 						}
 						if (lowerboundarypresent && upperboundarypresent) {
 							allValues.add(lowerboundary);
@@ -466,7 +767,7 @@ public class IdentificationKeyGenerator {
 			taxaAfter=0;
 			for (j=0;j<allValues.size()/2;j++) {
 				if (allValues.get(j*2+1)<=threshold) taxaBefore++;
-				if (allValues.get(j*2)>=threshold) taxaAfter++;
+				if (allValues.get(j*2)>threshold) taxaAfter++;
 			}
 			difference = Math.abs(taxaBefore-taxaAfter);
 			if (difference<differenceMin){
@@ -479,13 +780,17 @@ public class IdentificationKeyGenerator {
 		for (i=0;i<taxaBefore;i++) {
 			defaultQuantitativeScore += taxaAfter - i;
 		}
-		System.out.println(taxaBefore + ", " + taxaAfter + ", " +defaultQuantitativeScore);
 		return (float)(defaultQuantitativeScore);
 	}
 	
+<<<<<<< .mine
+	private float categoricalFeatureScore(Feature feature, Set<TaxonDescription> coveredTaxa){
+=======
 	private float featureScore(Feature feature, Set<TaxonDescription> coveredTaxa){
+>>>>>>> .r10927
 		int i,j;
 		float score =0;
+		float power=0;
 		TaxonDescription[] coveredTaxaArray = coveredTaxa.toArray(new TaxonDescription[coveredTaxa.size()]); // I did not figure a better way to do this
 		for (i=0 ; i<coveredTaxaArray.length; i++){
 			Set<DescriptionElementBase> elements1 = coveredTaxaArray[i].getElements();
@@ -499,13 +804,83 @@ public class IdentificationKeyGenerator {
 				for (DescriptionElementBase deb : elements2){
 					if (deb.getFeature().equals(feature)) deb2 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature
 				}
+<<<<<<< .mine
+				power = DefaultPower(deb1,deb2);
+				score = score + power;
+=======
 				score = score + defaultPower(deb1,deb2);
+>>>>>>> .r10927
+			}
+		}
+		return score;
+		}
+	
+<<<<<<< .mine
+	private void checkDependencies(FeatureNode node){
+		if (node.getOnlyApplicableIf()!=null){
+			Set<State> addToOAI = node.getOnlyApplicableIf();
+			for (State state : addToOAI){
+				if (oAIdependencies.containsKey(state)) oAIdependencies.put(state, new HashSet<Feature>());
+				oAIdependencies.get(state).add(node.getFeature());
+			}
+		}
+		if (node.getInapplicableIf()!=null){
+			Set<State> addToiI = node.getInapplicableIf();
+			for (State state : addToiI){
+				if (iIdependencies.containsKey(state)) iIdependencies.put(state, new HashSet<Feature>());
+				iIdependencies.get(state).add(node.getFeature());
+			}
+		}
+		if (node.getChildren()!=null) {
+			for (FeatureNode fn : node.getChildren()){
+				checkDependencies(fn);
+			}
+		}
+	}
+	
+	private float FeatureScoreAndMerge(Feature feature, Set<TaxonDescription> coveredTaxa, Map<State,Set<State>> exclusions){
+		int i,j;
+		float score =0;
+		float power=0;
+		TaxonDescription[] coveredTaxaArray = coveredTaxa.toArray(new TaxonDescription[coveredTaxa.size()]); // I did not figure a better way to do this
+		for (i=0 ; i<coveredTaxaArray.length; i++){
+			Set<DescriptionElementBase> elements1 = coveredTaxaArray[i].getElements();
+			DescriptionElementBase deb1 = null;
+			for (DescriptionElementBase deb : elements1){
+				if (deb.getFeature().equals(feature)) deb1 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature
+			}
+			for (j=i+1 ; j< coveredTaxaArray.length ; j++){
+				Set<DescriptionElementBase> elements2 = coveredTaxaArray[j].getElements();
+				DescriptionElementBase deb2 = null;
+				for (DescriptionElementBase deb : elements2){
+					if (deb.getFeature().equals(feature)) deb2 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature
+				}
+				power = DefaultPower(deb1,deb2);
+				score = score + power;
+				if (power>0) // if there is no state in common between deb1 and deb2
+				{
+					CategoricalData cat1 = (CategoricalData)deb1;
+					CategoricalData cat2 = (CategoricalData)deb2;
+					for (StateData statedata1 : cat1.getStates()){
+						State state1 = statedata1.getState();
+						if (!exclusions.containsKey(state1)) exclusions.put(state1, new HashSet<State>());
+						for (StateData statedata2 : cat2.getStates()){
+							State state2 = statedata2.getState();
+							if (!exclusions.containsKey(state2)) exclusions.put(state2, new HashSet<State>());
+							exclusions.get(state1).add(state2);
+							exclusions.get(state2).add(state1);
+						}
+					}
+				}
 			}
 		}
 		return score;
 		}
 	
+	private float DefaultPower(DescriptionElementBase deb1, DescriptionElementBase deb2){
+=======
 	private float defaultPower(DescriptionElementBase deb1, DescriptionElementBase deb2){
+>>>>>>> .r10927
 		if (deb1==null || deb2==null) {
 			return -1; //what if the two taxa don't have this feature in common ?
 		}
@@ -529,7 +904,7 @@ public class IdentificationKeyGenerator {
 			}
 			// modifiers not taken into account for this default power
 		}
-		// one point each time two taxa have at least a state in common for a given feature
+		// one point each time two taxa can be discriminated for a given feature
 		if (bool) return 0;
 		else return 1;
 	}
@@ -539,14 +914,24 @@ public class IdentificationKeyGenerator {
 			level++;
 			int levelcopy = level;
 			int j=1;
+			String delimiter;
+			String equals = " = ";
+			String quantitative = "";
 			String newspaces = spaces.concat("\t");
 			for (PolytomousKeyNode polytomousKeyNode : polytomousKeyNodes){
 				if (polytomousKeyNode.getQuestion() != null) {
 					String state = null;
+<<<<<<< .mine
+					if (fnode.getQuestion(Language.DEFAULT())!=null) state = fnode.getQuestion(Language.DEFAULT()).getLabel();
+					if (fnode.getFeature().isSupportsQuantitativeData()) delimiter = quantitative;
+					else delimiter = equals;
+					System.out.println(newspaces + levelcopy + " : " + j + " " + fnode.getFeature().getLabel() + delimiter + state);
+=======
 					if (polytomousKeyNode.getStatement().getLabel(Language.DEFAULT() ) != null){
 						state = polytomousKeyNode.getStatement().getLabelText(Language.DEFAULT());
 					}
 					System.out.println(newspaces + levelcopy + " : " + j + " " + polytomousKeyNode.getQuestion().getLabelText(Language.DEFAULT()) + " = " + state);
+>>>>>>> .r10927
 					j++;
 				}
 				else { // TODO never read ?
diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatCategoricalDescriptionBuilder.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatCategoricalDescriptionBuilder.java
new file mode 100644
index 0000000000..ff5a3cfb3a
--- /dev/null
+++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatCategoricalDescriptionBuilder.java
@@ -0,0 +1,60 @@
+package eu.etaxonomy.cdm.api.service;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+
+import eu.etaxonomy.cdm.model.common.Language;
+import eu.etaxonomy.cdm.model.description.Feature;
+import eu.etaxonomy.cdm.model.description.Modifier;
+import eu.etaxonomy.cdm.model.description.State;
+import eu.etaxonomy.cdm.model.description.StateData;
+import eu.etaxonomy.cdm.model.description.TextData;
+
+public class MicroFormatCategoricalDescriptionBuilder extends AbstractCategoricalDescriptionBuilder{
+	
+	private String spanEnd = "</span>";
+	
+	protected TextData doBuild(List<StateData> states, List<Language> languages){
+		TextData textData = TextData.NewInstance();// TextData that will contain the description and the language corresponding
+		StringBuilder CategoricalDescription = new StringBuilder();
+		Language language = null;
+		for (Iterator<StateData> sd = states.iterator() ; sd.hasNext() ;){
+			StateData stateData = sd.next();
+			State s = stateData.getState();
+			Set<Modifier> modifiers = stateData.getModifiers(); // the states and their according modifiers are simply written one after the other
+			for (Iterator<Modifier> mod = modifiers.iterator() ; mod.hasNext() ;){
+				Modifier modifier = mod.next();
+				CategoricalDescription.append(" " + spanClass("modifier") + modifier.getPreferredRepresentation(languages).getLabel() + spanEnd);
+			}
+			CategoricalDescription.append(" " + spanClass("state") + s.getPreferredRepresentation(languages).getLabel() + spanEnd);
+			if (sd.hasNext()) CategoricalDescription.append(',');
+			if (language==null) {
+				language = s.getPreferredRepresentation(languages).getLanguage(); // TODO What if there are different languages ?
+			}
+		}
+		if (language==null) {
+			language = Language.DEFAULT();
+		}
+		textData.putText(CategoricalDescription.toString(), language);
+		
+		return textData;
+	}
+
+	protected String buildFeature(Feature feature, boolean doItBetter){
+		if (feature==null || feature.getLabel()==null) return "";
+		else {
+			if (doItBetter) {
+				String betterString = StringUtils.substringBefore(feature.getLabel(), "<");
+				return (spanClass("feature") + StringUtils.removeEnd(betterString, " ") + spanEnd);
+			}
+			else	return (spanClass("feature") + feature.getLabel() + spanEnd);
+		}
+	}
+	
+	private String spanClass(String classString){
+		return("<span class=\""+classString+"\">");
+	}
+}
diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NaturalLanguageGenerator.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NaturalLanguageGenerator.java
index 0c9cb75655..25e643715c 100644
--- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NaturalLanguageGenerator.java
+++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NaturalLanguageGenerator.java
@@ -1,13 +1,11 @@
 package eu.etaxonomy.cdm.api.service;
 
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.regex.Pattern;
 
 import org.apache.commons.lang.StringUtils;
 import org.springframework.stereotype.Component;
@@ -20,20 +18,78 @@ import eu.etaxonomy.cdm.model.description.FeatureTree;
 import eu.etaxonomy.cdm.model.description.QuantitativeData;
 import eu.etaxonomy.cdm.model.description.TaxonDescription;
 import eu.etaxonomy.cdm.model.description.TextData;
+import eu.etaxonomy.cdm.model.description.TextFormat;
 import eu.etaxonomy.cdm.model.common.Annotation;
 import eu.etaxonomy.cdm.model.common.AnnotationType;
 import eu.etaxonomy.cdm.model.common.Language;
 
+
+/**
+ * Generator of natural language descriptions from TaxonDescriptions.
+ * 
+ * @author m.venin
+ * @created 13.04.2010
+ * @version 1.0
+ */
 @Component
 public class NaturalLanguageGenerator implements INaturalLanguageGenerator {
 
+	private String firstSeparator = ",";
+	private String secondSeparator = ".";
+	private List<Integer> levels = new ArrayList<Integer>();
+	
 	private DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder = new DefaultQuantitativeDescriptionBuilder();
 	private DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder = new DefaultCategoricalDescriptionBuilder();
-	
-	private String previousFeatureName;
+
+	private TextData previousTextData;
 	
 	private Map<String, INaturalLanguageTextDataProcessor> elementProcessors;
 	
+	private Set<INaturalLanguageTextDataProcessor> applicableElementProcessors = new HashSet<INaturalLanguageTextDataProcessor>();
+	
+	/**
+	 * Change the first separator used by generateSingleTextData. By default ",".
+	 * 
+	 * @param separator
+	 */
+	public void setFirstSeparator(String separator){
+		firstSeparator=separator;
+	}
+	
+	public String getFirstSeparator(){
+		return firstSeparator;
+	}
+	
+	/**
+	 * Change the second separator used by generateSingleTextData. By default ".".
+	 * 
+	 * @param separator
+	 */
+	public void setSecondSeparator(String separator){
+		secondSeparator=separator;
+	}
+	
+	public String getSecondSeparator(){
+		return secondSeparator;
+	}
+	
+	/**
+	 * @param quantitativeDescriptionBuilder
+	 */
+	public void setQuantitativeDescriptionBuilder(DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder){
+		this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder;
+	}
+	
+	/**
+	 * @param categoricalDescriptionBuilder
+	 */
+	public void setCategoricalDescriptionBuilder(DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder){
+		this.categoricalDescriptionBuilder = categoricalDescriptionBuilder;
+	}
+	
+	/**
+	 * @return the element processors of this generator
+	 */
 	public Map<String, INaturalLanguageTextDataProcessor> getElementProcessors() {
 		return elementProcessors;
 	}
@@ -49,29 +105,37 @@ public class NaturalLanguageGenerator implements INaturalLanguageGenerator {
 			Map<String, INaturalLanguageTextDataProcessor> elementProcessors) {
 		this.elementProcessors = elementProcessors;
 	}
-
-	private Set<INaturalLanguageTextDataProcessor> applicableElementProcessors = new HashSet<INaturalLanguageTextDataProcessor>();
 	
 	/**
-	 * @param annotations
+	 * Looks for technical annotations, if one matches a regular expression of the element processors
+	 * the associated processor is added to the applicable element processors which will then be applied
+	 * when generating the description.
+	 * 
+	 * @param annotations the set of annotations of the description
 	 */
 	private void initNaturalLanguageDescriptionElementProcessors(Set<Annotation> annotations) {
 		 
 		if(annotations != null){
 			for(Annotation annotation : annotations){
 				if(annotation.getAnnotationType().equals(AnnotationType.TECHNICAL())){
+					if (elementProcessors!=null){
 					for(String regex : elementProcessors.keySet()){
 						if(annotation.getText().matches(regex)){
 							applicableElementProcessors.add(elementProcessors.get(regex));
 						}
 					}
 				}
+					}
 			}
 		}
 	}
 	
+
 	/**
-	 * @param textData
+	 * Applies the list of applicable processors to a TextData.
+	 * 
+	 * @param textData the TextData to be modified
+	 * @param previousTextData the TextData corresponding to the feature of the previous level in the tree
 	 */
 	private void applyNaturalLanguageDescriptionElementProcessors(TextData textData, TextData previousTextData){
 		for(INaturalLanguageTextDataProcessor processor : applicableElementProcessors){
@@ -79,201 +143,192 @@ public class NaturalLanguageGenerator implements INaturalLanguageGenerator {
 		}
 	}
 	
+
 	/**
+	 * The most simple function to generate a description. The language used is the default one.
+	 * 
+	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
+	 * @param description the TaxonDescription with all the data
 	 * 
+	 * @return a list of TextData, each one being a basic element of the natural language description
 	 */
 	public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description) {
-		List<Language> languages = new ArrayList<Language>();
-		languages.add(Language.DEFAULT());
-		return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages);
+		return generateNaturalLanguageDescription(featureTree,description,Language.DEFAULT());
 	}
 	
+
 	
 	/**
+	 * Generate a description in a specified language.
 	 * 
+	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
+	 * @param description the TaxonDescription with all the data
+	 * @param language the language in which the description has to be printed
+	 * 
+	 * @return a list of TextData, each one being a basic element of the natural language description
 	 */
-	public List<TextData> generatePreferredNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description, List<Language> languages) {
-		return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages);
-	}
-	
-	
 	public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description,	Language language) {
 		List<Language> languages = new ArrayList<Language>();
 		languages.add(language);
-		
 		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
-		
-		return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages);
+		return generatePreferredNaturalLanguageDescription(featureTree,description,languages);
 	}
 	
-
-
-	/** recursive function that goes through a tree containing the order in which the description has to be generated,
-	 *  if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
-	 * 
-	 * @param children
-	 * @param parent
-	 * @param description
-	 * @param language The language in which the description has to be written
-	 * @return
-	 */
-	private List<TextData> buildBranchesDescr(List<FeatureNode> children, FeatureNode parent, TaxonDescription description, List<Language> languages) {
-		List<TextData> listTextData = new ArrayList<TextData>(); ;
-		if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
-			Feature fref = parent.getFeature();
-			for (Iterator<FeatureNode> ifn = children.iterator() ; ifn.hasNext() ;){
-				FeatureNode fn = ifn.next();
-				listTextData.addAll(buildBranchesDescr(fn.getChildren(),fn,description, languages));
-			}
-		}
-		else { //once a leaf is reached
-			Feature fref = parent.getFeature();
-			if (fref!=null) { // needs a better algorithm
-				int k=0;
-					Set<DescriptionElementBase> elements = description.getElements();
-					for (Iterator<DescriptionElementBase> deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
-						DescriptionElementBase descriptionElement = deb.next();
-						TextData textData;
-						if (descriptionElement.getFeature().equals(fref)){ // if one matches the corresponding feature associated to this leaf
-							if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData
-								CategoricalData categoricalData = (CategoricalData) descriptionElement;
-								//textData = buildCategoricalDescr(categoricalData, language);
-								textData = categoricalDescriptionBuilder.build(categoricalData, languages);
-								//textData.putText(fref.getLabel(), Language.DEFAULT());
-								TextData featureName = TextData.NewInstance(fref.getLabel(), Language.DEFAULT(), null);
-								listTextData.add(featureName); // if you want to print the name of the feature (Should it be an option ?)
-								listTextData.add(textData);
-							}
-							if (descriptionElement instanceof QuantitativeData) { // if this description is a QuantitativeData, generate the according TextData
-								QuantitativeData quantitativeData = (QuantitativeData) descriptionElement;
-								textData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
-								TextData featureName = TextData.NewInstance(fref.getLabel(), Language.DEFAULT(), null);
-								listTextData.add(featureName); // if you want to print the name of the feature
-								listTextData.add(textData);
-							}
-						}
-					}
-			}
-		}
-		return listTextData;
-	}
-
 	/**
+	 * Generate a description with a specified list of preferred languages.
 	 * 
+	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
+	 * @param description the TaxonDescription with all the data
+	 * @param languages the ordered list of languages preferred for printing the description
 	 * 
-	 * @param quantitativeDescriptionBuilder
+	 * @return a list of TextData, each one being a basic element of the natural language description
 	 */
-	public void setQuantitativeDescriptionBuilder(DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder){
-		this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder;
+	public List<TextData> generatePreferredNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description, List<Language> languages) {
+		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
+		return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0);
 	}
 	
 	/**
+	 * Generate a description as a single paragraph in a TextData.
 	 * 
+	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
+	 * @param description the TaxonDescription with all the data
 	 * 
-	 * @param categoricalDescriptionBuilder
+	 * @return a TextData in the default language.
 	 */
-	public void setCategoricalDescriptionBuilder(DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder){
-		this.categoricalDescriptionBuilder = categoricalDescriptionBuilder;
+	public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description) {
+		return generateSingleTextData(featureTree,description,Language.DEFAULT());
 	}
 	
-	
 	/**
-	 * @param featureTree
-	 * @param description
-	 * @param language
-	 * @return
+	 * Generate a description as a single paragraph in a TextData.
+	 * 
+	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
+	 * @param description the TaxonDescription with all the data
+	 * @param language the language in which the description has to be printed
+	 * 
+	 * @return a TextData in the specified language.
 	 */
-	public String generateStringNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description,	Language language) {
+	public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description, Language language) {
 		List<Language> languages = new ArrayList<Language>();
 		languages.add(language);
-		return buildString(featureTree.getRootChildren(), featureTree.getRoot(), description, languages).toString();
+		return generatePreferredSingleTextData(featureTree,description,languages);
 	}
 	
 	/**
-	 * recursive function that goes through a tree containing the order in which
-	 * the description has to be generated, if an element of this tree matches
-	 * one of the TaxonDescription, a DescriptionBuilder is called which returns
-	 * a TextData with the corresponding description.
-	 * <p>
-	 * Also applies the folowing formatting rules which are special for data coming from Delta, DeltaAccess, DiversityDescriptions:
+	 * Generate a description with a specified list of preferred languages.
 	 * 
-	 * <ul>
-	 * <li><b>(1.A) if( doSkipTextInBrackets) : </b>Label Text in brackets is always skipped the remaining text string I the TEXT_TO_DISPLAY. The text may contain multiple substrings tagged with the brackets. A tagged substring may also occur in the middle of the whole string.</li>
-	 * <li><b>(1.B) else : </b>just remove the brackets
-	 * <li><b>(2) : </b> If the TEXT_TO_DISPLAY is equal the TEXT_TO_DISPLAY of the previous element output of this text is suppressed.</li>
-	 * </ul>
+	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
+	 * @param description the TaxonDescription with all the data
+	 * @param languages the ordered list of languages preferred for printing the description
 	 * 
-	 * @param children
-	 * @param parent
-	 * @param description
-	 * @param language
-	 *            The language in which the description has to be written
-	 * @return
+	 * @return a TextData using the languages (in the given order of preference)
 	 */
-	private StringBuilder buildString(List<FeatureNode> children, FeatureNode parent, TaxonDescription description, List<Language> languages) {
-		StringBuilder stringbuilder = new StringBuilder();
+	public TextData generatePreferredSingleTextData(FeatureTree featureTree, TaxonDescription description, List<Language> languages) {
+		
+		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
+		
+		List<TextData> texts = buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0);
+		StringBuilder sb = new StringBuilder();
+		int i = 0,j;
+		boolean startSentence = false, firstOne = true;
+		Integer level;
+		levels.clear();
+		for (j=0 ; j<levels.size() ; j++){
+			level = levels.get(j);
+			if (level.equals(-1)){
+				if ((j+1)<levels.size() && levels.get(j+1).equals(0)){
+				if (!firstOne) sb.append(secondSeparator + " ");
+				startSentence=true;
+				firstOne=false;
+				String asString = texts.get(i).getText(Language.DEFAULT()).toString();
+				if (asString.length()>1) sb.append(asString.substring(0,1).toUpperCase() + asString.substring(1));
+			}
+				i++;
+			}
+			else if (level.equals(0)) {
+				if (startSentence) sb.append(texts.get(i).getText(Language.DEFAULT()));
+				else sb.append(firstSeparator + texts.get(i).getText(Language.DEFAULT()));
+				startSentence=false;
+				i++;
+			}
+			else if (!level.equals(0) && !level.equals(-1)){
+				if (!firstOne && levels.get(j-1).equals(0)){
+					if (i<texts.size()) sb.append(texts.get(i).getText(Language.DEFAULT()));
+					i++;
+				}
+			}
+		}
+		sb.append(secondSeparator);
+		String returnString = sb.toString();
+		returnString = StringUtils.replace(returnString, "  ", " ");
+		returnString = StringUtils.removeStart(returnString, secondSeparator);
+		return TextData.NewInstance(StringUtils.replace(sb.toString(), "  ", " "),Language.DEFAULT(),TextFormat.NewInstance("", "Text", ""));
+	}
+	
+
+
+	/** recursive function that goes through a tree containing the order in which the description has to be generated,
+	 *  if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
+	 * 
+	 * @param children the children of the feature node considered
+	 * @param parent the feature node considered
+	 * @param description the TaxonDescription element for which we want a natural language output
+	 * @param language The language in which the description has to be written
+	 * @return a list of TextData elements containing the part of description corresponding to the feature node considered
+	 */
+	private List<TextData> buildBranchesDescr(List<FeatureNode> children, FeatureNode parent, TaxonDescription description, List<Language> languages, int floor) {
+		List<TextData> listTextData = new ArrayList<TextData>();
+		floor++; // counter to know the current level in the tree
+		
 		if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
+			levels.add(new Integer(floor)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List<TextData>
+			Feature feature = parent.getFeature();
+			TextData featureName;
+			if (feature!=null && feature.getLabel()!=null){ // if a node is associated to a feature
+				featureName = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
+				levels.add(new Integer(-1)); // it is indicated by a '-1' after its level
+				listTextData.add(featureName); // the TextData representing the name of the feature is concatenated to the list
+			}
+			else featureName = new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached)
+			
 			for (Iterator<FeatureNode> ifn = children.iterator() ; ifn.hasNext() ;){
+				previousTextData = featureName; // this allows to keep track of the name of the feature one level up in the tree
 				FeatureNode fn = ifn.next();
-				StringBuilder tempsb = buildString(fn.getChildren(),fn,description, languages);
-				if (tempsb.length()>1) stringbuilder.append(tempsb.deleteCharAt(tempsb.length()-1));
-//				if (tempsb.length()>1) stringbuilder.append(tempsb);
+				listTextData.addAll(buildBranchesDescr(fn.getChildren(),fn,description, languages, floor));
 			}
-			stringbuilder.append('.');
 		}
 		else { //once a leaf is reached
-			Feature fref = parent.getFeature();
-			if (fref!=null) { // needs a better algorithm
-				int k=0;
+			Feature feature = parent.getFeature();
+			if (feature!=null && (feature.isSupportsQuantitativeData() || feature.isSupportsCategoricalData())) {
 					Set<DescriptionElementBase> elements = description.getElements();
 					for (Iterator<DescriptionElementBase> deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
 						DescriptionElementBase descriptionElement = deb.next();
-						TextData textData;
-						if (descriptionElement.getFeature().equals(fref)){ // if one matches the corresponding feature associated to this leaf
+						if (descriptionElement.getFeature().equals(feature)){ // if one matches the corresponding feature associated to this leaf
+							if (descriptionElement instanceof CategoricalData || descriptionElement instanceof QuantitativeData){
+								TextData featureTextData;
+								TextData statesTextData;
 							if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData
 								CategoricalData categoricalData = (CategoricalData) descriptionElement;
-								//textData = buildCategoricalDescr(categoricalData, language);
-								textData = categoricalDescriptionBuilder.build(categoricalData, languages);
-								//textData.putText(fref.getLabel(), Language.DEFAULT());
-								String featureName = StringUtils.substringBefore(fref.getLabel(), "<");
-								if (previousFeatureName==null){
-									previousFeatureName = featureName;
-									String featureString = categoricalDescriptionBuilder.buildFeature(fref,true);
-									stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1));
-								}
-								else if (!featureName.contains(previousFeatureName)) {
-									stringbuilder.append(". ");
-									previousFeatureName = featureName;
-									String featureString = categoricalDescriptionBuilder.buildFeature(fref,true);
-									stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1)); // if you want to print the name of the feature (Should it be an option ?)
-								}
-								stringbuilder.append(textData.getText(Language.DEFAULT()));
-								stringbuilder.append(',');
+								statesTextData = categoricalDescriptionBuilder.build(categoricalData, languages);
+								featureTextData = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
 							}
-							if (descriptionElement instanceof QuantitativeData) { // if this description is a QuantitativeData, generate the according TextData
+							else { // if this description is a QuantitativeData, generate the according TextData
 								QuantitativeData quantitativeData = (QuantitativeData) descriptionElement;
-								textData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
-								String featureName = StringUtils.substringBefore(fref.getLabel(), "<");
-								if (previousFeatureName==null){
-									previousFeatureName = featureName;
-									String featureString = quantitativeDescriptionBuilder.buildFeature(fref,true);
-									stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1));
-								}
-								else if (!featureName.contains(previousFeatureName)) {
-									stringbuilder.append(". ");
-									previousFeatureName = featureName;
-									String featureString = quantitativeDescriptionBuilder.buildFeature(fref,true);
-									stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1)); // if you want to print the name of the feature (Should it be an option ?)
-								}
-								stringbuilder.append(textData.getText(Language.DEFAULT()));
-								stringbuilder.append(',');
+								statesTextData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
+								featureTextData = quantitativeDescriptionBuilder.buildTextDataFeature(feature, languages);
+							}
+							applyNaturalLanguageDescriptionElementProcessors(featureTextData, previousTextData);
+							levels.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree
+							listTextData.add(featureTextData);
+							levels.add(new Integer(floor)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature
+							listTextData.add(statesTextData);
 							}
 						}
 					}
 			}
 		}
-		return stringbuilder;
-	}
-
+		return listTextData;
+	}	
 
 }
-- 
2.34.1