From 2106a8eed60b333bda62b160dfca848e34281c79 Mon Sep 17 00:00:00 2001 From: "m.venin" Date: Thu, 2 Dec 2010 13:39:08 +0000 Subject: [PATCH] Last updates for natural language generation (added comments, new options, cleaned the code, etc) --- .gitattributes | 2 + ...DefaultQuantitativeDescriptionBuilder.java | 73 ++- .../api/service/DeltaTextDataProcessor.java | 49 ++ .../cdm/api/service/DescriptionBuilder.java | 45 +- .../service/INaturalLanguageGenerator.java | 6 +- .../INaturalLanguageTextDataProcessor.java | 5 +- .../service/IdentificationKeyGenerator.java | 421 +++++++++++++++++- ...roFormatCategoricalDescriptionBuilder.java | 60 +++ .../api/service/NaturalLanguageGenerator.java | 351 +++++++++------ 9 files changed, 800 insertions(+), 212 deletions(-) create mode 100644 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DeltaTextDataProcessor.java create mode 100644 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatCategoricalDescriptionBuilder.java diff --git a/.gitattributes b/.gitattributes index bbb3b69e0e..8545626236 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1568,6 +1568,7 @@ cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/CommonServiceImpl.jav cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DatabaseServiceHibernateImpl.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultCategoricalDescriptionBuilder.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultQuantitativeDescriptionBuilder.java -text +cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DeltaTextDataProcessor.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionBuilder.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionServiceImpl.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DistributionNodeComparator.java -text @@ -1611,6 +1612,7 @@ cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentificationKeyServ cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/LocationServiceImpl.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MarkerServiceImpl.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MediaServiceImpl.java -text +cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatCategoricalDescriptionBuilder.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatQuantitativeDescriptionBuilder.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NameServiceImpl.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NamedAreaNodeComparator.java -text diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultQuantitativeDescriptionBuilder.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultQuantitativeDescriptionBuilder.java index 282cced849..ad7134f9cb 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultQuantitativeDescriptionBuilder.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DefaultQuantitativeDescriptionBuilder.java @@ -14,26 +14,15 @@ import eu.etaxonomy.cdm.model.description.TextData; import eu.etaxonomy.cdm.model.description.TextFormat; public class DefaultQuantitativeDescriptionBuilder extends AbstractQuantitativeDescriptionBuilder { + @Override protected TextData doBuild(Map measures, MeasurementUnit mUnit, List languages){ StringBuilder QuantitativeDescription = new StringBuilder(); // this StringBuilder is used to concatenate the different words of the description before saving it in the TextData TextData textData = TextData.NewInstance(); // TextData that will contain the description and the language corresponding // booleans indicating whether a kind of value is present or not and the float that will eventually hold the value - boolean average = false; - float averagevalue = new Float(0); - boolean sd = false; - float sdvalue = new Float(0); - boolean min = false; - float minvalue = new Float(0); - boolean max = false; - float maxvalue = new Float(0); - boolean lowerb = false; - float lowerbvalue = new Float(0); - boolean upperb = false; - float upperbvalue = new Float(0); - String unit = "(unknown unit)"; + String unit = ""; if ((mUnit!=null)&&(mUnit.getLabel()!=null)){ unit = mUnit.getLabel(); } @@ -54,32 +43,23 @@ public class DefaultQuantitativeDescriptionBuilder extends AbstractQuantitativeD String space = " "; // should "space" be considered as a linking word and thus be stored in NaturalLanguageTerm.class ? // the booleans and floats are updated according to the presence or absence of values - if (measures.containsKey(StatisticalMeasure.AVERAGE())) { - average = true; - averagevalue = measures.get(StatisticalMeasure.AVERAGE()); - } - if(measures.containsKey(StatisticalMeasure.STANDARD_DEVIATION())) { - sd = true; - sdvalue = measures.get(StatisticalMeasure.STANDARD_DEVIATION()); - } - if (measures.containsKey(StatisticalMeasure.MIN())) { - min = true; - minvalue = measures.get(StatisticalMeasure.MIN()); - } - if (measures.containsKey(StatisticalMeasure.MAX())) { - max = true; - maxvalue = measures.get(StatisticalMeasure.MAX()); - } - if (measures.containsKey(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY())) { - lowerb = true; - lowerbvalue = measures.get(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY()); - } - if (measures.containsKey(StatisticalMeasure.TYPICAL_UPPER_BOUNDARY())) { - upperb = true; - upperbvalue = measures.get(StatisticalMeasure.TYPICAL_UPPER_BOUNDARY()); - } - - + + Boolean max, min, upperb, lowerb, average, sd; + + String averagevalue = getValue(measures,StatisticalMeasure.AVERAGE()); + if (averagevalue!=null) average=true; else average=false; + String sdvalue = getValue(measures,StatisticalMeasure.STANDARD_DEVIATION()); + if (sdvalue!=null) sd=true; else sd=false; + String minvalue = getValue(measures,StatisticalMeasure.MIN()); + if (minvalue!=null) min=true; else min=false; + String maxvalue = getValue(measures,StatisticalMeasure.MAX()); + if (maxvalue!=null) max=true; else max=false; + String lowerbvalue = getValue(measures,StatisticalMeasure.TYPICAL_LOWER_BOUNDARY()); + if (lowerbvalue!=null) lowerb=true; else lowerb=false; + String upperbvalue = getValue(measures,StatisticalMeasure.TYPICAL_UPPER_BOUNDARY()); + if (upperbvalue!=null) upperb=true; else upperb=false; + + // depending on the different associations of values, a sentence is built if (max && min) { QuantitativeDescription.append(space + from + space + minvalue + space + to + space + maxvalue + space + unit); @@ -120,6 +100,18 @@ public class DefaultQuantitativeDescriptionBuilder extends AbstractQuantitativeD return textData; } + private String getValue(Map measures, Object key) { + Float floatValue; + Integer intValue; + if(measures.containsKey(key)) { + floatValue = measures.get(key); + intValue=floatValue.intValue(); + if (floatValue.equals(intValue.floatValue())) return intValue.toString(); + else return floatValue.toString(); + } + else return null; + } + protected String buildFeature(Feature feature, boolean doItBetter){ if (feature==null || feature.getLabel()==null) return ""; else { @@ -134,7 +126,8 @@ public class DefaultQuantitativeDescriptionBuilder extends AbstractQuantitativeD } else{ String betterString = StringUtils.replaceChars(feature.getLabel(), "<>",""); // only remove the brackets - return StringUtils.substringBeforeLast(betterString," "); + return betterString; +// return StringUtils.substringBeforeLast(betterString," "); } } } diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DeltaTextDataProcessor.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DeltaTextDataProcessor.java new file mode 100644 index 0000000000..65e00761e7 --- /dev/null +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DeltaTextDataProcessor.java @@ -0,0 +1,49 @@ +package eu.etaxonomy.cdm.api.service; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; + +import eu.etaxonomy.cdm.model.description.TextData; +import eu.etaxonomy.cdm.model.common.Language; + +import eu.etaxonomy.cdm.model.common.LanguageString; + +public class DeltaTextDataProcessor implements INaturalLanguageTextDataProcessor { + + @Override + public void process(TextData textData, TextData previousTextData) { + boolean doItBetter = false; + + Map oldMultiLanguageText = previousTextData.getMultilanguageText(); + Map multiLanguageText = textData.getMultilanguageText(); + + for (Language language : multiLanguageText.keySet()){ + LanguageString langString = multiLanguageText.get(language); + String oldText = langString.getText(); + + oldText = StringUtils.remove(oldText,oldMultiLanguageText.get(language).getText()); + + if (doItBetter) { //TODO remove the text between brackets + StringBuilder strbuilder = new StringBuilder(); + do { + strbuilder.append(StringUtils.substringBefore(oldText, "<")); + } + while (!(oldText=StringUtils.substringAfter(oldText, ">")).equals("")); + StringUtils.substringBeforeLast(strbuilder.toString()," "); + } + else{ + oldText=StringUtils.replaceChars(oldText,"<>",""); + } + + textData.removeText(language); + textData.putText(oldText,language); + + } + + } + +} diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionBuilder.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionBuilder.java index 8a5da917e9..0d14a1e950 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionBuilder.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/DescriptionBuilder.java @@ -3,12 +3,53 @@ package eu.etaxonomy.cdm.api.service; import java.util.List; import eu.etaxonomy.cdm.model.common.Language; +import eu.etaxonomy.cdm.model.common.Representation; import eu.etaxonomy.cdm.model.description.DescriptionElementBase; import eu.etaxonomy.cdm.model.description.Feature; import eu.etaxonomy.cdm.model.description.TextData; public abstract class DescriptionBuilder { + + protected String separator = ","; + private int option = 0; + + public void returnAbbreviatedLabels() { + option=1; + } + + public void returnTexts() { + option=2; + } + + public void returnLabels() { + option=0; + } + + public void setSeparator(String newSeparator) { + separator = newSeparator; + } + + public String getSeparator() { + return separator; + } + public abstract TextData build(T descriptionElement, List languages); - protected abstract String buildFeature(Feature feature, boolean doItBetter); -} + protected String getRightText(Representation representation){ + String result; + if (option==1){ + result = representation.getAbbreviatedLabel(); + if (result != null) return result; + } + else if (option==2){ + result = representation.getText(); + if (result != null) return result; + } + return representation.getLabel(); + } + + public TextData buildTextDataFeature(Feature feature, List languages){ + return TextData.NewInstance(getRightText(feature.getPreferredRepresentation(languages)),languages.get(0),null); + } + +} \ No newline at end of file diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageGenerator.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageGenerator.java index cbeaad7752..1d79a96a35 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageGenerator.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageGenerator.java @@ -26,5 +26,9 @@ public interface INaturalLanguageGenerator { public List generatePreferredNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description, List languages); - public String generateStringNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description, Language language); + public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description); + + public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description, Language language); + + public TextData generatePreferredSingleTextData(FeatureTree featureTree, TaxonDescription description, List languages); } diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageTextDataProcessor.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageTextDataProcessor.java index ad16ecd479..a5870f81e6 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageTextDataProcessor.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/INaturalLanguageTextDataProcessor.java @@ -19,13 +19,12 @@ import eu.etaxonomy.cdm.model.description.TextData; public interface INaturalLanguageTextDataProcessor { /** - * Applies some special proccessing to the text contained in the TextData or/and + * Applies some special processing to the text contained in the TextData or/and * to the Feature label/representation * * @param textData * @param previousTextData TODO - * @return the modified TextData */ - public TextData process(TextData textData, TextData previousTextData); + public void process(TextData textData, TextData previousTextData); } diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentificationKeyGenerator.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentificationKeyGenerator.java index 28c8a91f60..f28f0cbcd1 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentificationKeyGenerator.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/IdentificationKeyGenerator.java @@ -14,8 +14,12 @@ import eu.etaxonomy.cdm.model.description.CategoricalData; import eu.etaxonomy.cdm.model.description.DescriptionElementBase; import eu.etaxonomy.cdm.model.description.Feature; import eu.etaxonomy.cdm.model.description.FeatureNode; +<<<<<<< .mine +import eu.etaxonomy.cdm.model.description.FeatureTree; +======= import eu.etaxonomy.cdm.model.description.FeatureTree; import eu.etaxonomy.cdm.model.description.KeyStatement; +>>>>>>> .r10927 import eu.etaxonomy.cdm.model.description.PolytomousKey; import eu.etaxonomy.cdm.model.description.PolytomousKeyNode; import eu.etaxonomy.cdm.model.description.QuantitativeData; @@ -32,10 +36,17 @@ public class IdentificationKeyGenerator { private PolytomousKey polytomousKey; // the Identification Key private List features; // the features used to generate the key private Set taxa; // the base of taxa + private FeatureTree dependenciesTree; + private Map> paths = new HashMap>(); // for statistics only + private boolean merge=true; + + private Map> iIdependencies = new HashMap>(); + private Map> oAIdependencies = new HashMap>(); + private boolean dependenciesON = true; private String before="<"; private String after=">"; - private String separator = ", "; + private String separator = " or "; /** * Sets the features used to generate the key @@ -55,6 +66,15 @@ public class IdentificationKeyGenerator { this.taxa = taxaSet; } + /** + * Sets the tree containing the dependencies between states and features + * + * @param tree + */ + public void setDependencies(FeatureTree tree){ + this.dependenciesTree = tree; + } + /** * Initializes the function buildBranches() with the starting parameters in order to build the key @@ -71,7 +91,7 @@ public class IdentificationKeyGenerator { private void loop(){ polytomousKey = polytomousKey.NewInstance(); PolytomousKeyNode root = polytomousKey.getRoot(); - buildBranches(root,features,taxa); + buildBranches(root,features,taxa,false,-1); } @@ -79,11 +99,18 @@ public class IdentificationKeyGenerator { * Creates the key and prints it */ public void makeandprint(){ +<<<<<<< .mine + if (dependenciesON && dependenciesTree!=null) checkDependencies(dependenciesTree.getRoot()); + Loop(); + List rootlist = new ArrayList(); +======= loop(); List rootlist = new ArrayList(); +>>>>>>> .r10927 rootlist.add(polytomousKey.getRoot()); String spaces = new String(); printTree(rootlist,spaces); + System.out.println(paths.toString()); } @@ -93,7 +120,12 @@ public class IdentificationKeyGenerator { * @param father the node considered * @param featuresLeft List of features that can be used at this point * @param taxaCovered the taxa left at this point (i.e. that verify the description corresponding to the path leading to this node) + * @param mybool */ +<<<<<<< .mine + private void buildBranches(FeatureNode father, List featuresLeft, Set taxaCovered, boolean mybool, int levelbis){ + int levelhere=levelbis+1; +======= private void buildBranches_old(FeatureNode father, List featuresLeft, Set taxaCovered){ // // this map stores the thresholds giving the best dichotomy of taxa for the corresponding feature supporting quantitative data // Map quantitativeFeaturesThresholds = new HashMap(); @@ -193,9 +225,51 @@ public class IdentificationKeyGenerator { // the feature is removed from the list of features available to build the next level of the tree featuresLeft.remove(winnerFeature); // this boolean indicates if the current father node has children or not (i.e. is a leaf or not) ; (a leaf has a "Question" element) +>>>>>>> .r10927 boolean childrenExist = false; - int i; +<<<<<<< .mine + Set innapplicables = new HashSet(); + Set applicables = new HashSet(); + + if (taxaCovered.size()>1) { + // this map stores the thresholds giving the best dichotomy of taxa for the corresponding feature supporting quantitative data + Map quantitativeFeaturesThresholds = new HashMap(); + // the scores of the different features are calculated, the thresholds in the same time + Map scoreMap = FeatureScores(featuresLeft, taxaCovered, quantitativeFeaturesThresholds); + // the feature with the best score becomes the one corresponding to the current node +// Feature winnerFeature = DefaultWinner(taxaCovered.size(), scoreMap); + Feature winnerFeature = LessStatesWinner(taxaCovered.size(), scoreMap, taxaCovered); + // the feature is removed from the list of features available to build the next level of the tree + featuresLeft.remove(winnerFeature); + // this boolean indicates if the current father node has children or not (i.e. is a leaf or not) ; (a leaf has a "Question" element) + int i; + /************** either the feature supports quantitative data... **************/ + // NB: in this version, "quantitative features" are dealt with in a dichotomous way + if (winnerFeature.isSupportsQuantitativeData()) { + // first, get the threshold + float threshold = quantitativeFeaturesThresholds.get(winnerFeature); + String sign; + StringBuilder unit= new StringBuilder(""); + // then determine which taxa are before and which are after this threshold (dichotomy) in order to create the children of the father node + List> quantitativeStates = determineQuantitativeStates(threshold,winnerFeature,taxaCovered,unit); + for (i=0;i<2;i++) { + Set newTaxaCovered = quantitativeStates.get(i); + if (i==0) sign = before; // the first element of the list corresponds to taxa before the threshold + else sign = after; // the second to those after + if (newTaxaCovered.size()>0 && !((newTaxaCovered.size()==taxaCovered.size()) && mybool)){ // if the taxa are discriminated compared to those of the father node, a child is created + childrenExist = true; + FeatureNode son = FeatureNode.NewInstance(); + son.setFeature(winnerFeature); + Representation question = new Representation(null, " " + sign + " " + threshold +unit,null, Language.DEFAULT()); // the question attribute is used to store the state of the feature + son.addQuestion(question); + father.addChild(son); + boolean newbool; + if (newTaxaCovered.size()==taxaCovered.size()) newbool = true; + else newbool = false; + buildBranches(son,featuresLeft, newTaxaCovered,newbool,levelhere); + } +======= /************** either the feature supports quantitative data... **************/ // NB: in this version, "quantitative features" are dealt with in a dichotomous way if (winnerFeature.isSupportsQuantitativeData()) { @@ -220,8 +294,40 @@ public class IdentificationKeyGenerator { son.setStatement(statement); father.addChild(son); buildBranches(son,featuresLeft, newTaxaCovered); +>>>>>>> .r10927 } } +<<<<<<< .mine + + /************** ...or it supports categorical data. **************/ + // "categorical features" may present several different states, each one of these might correspond to one child + List statesDone = new ArrayList(); + int numberOfStates; + if (winnerFeature.isSupportsCategoricalData()) { + for (TaxonDescription td : taxaCovered){ + // go through all the states possible for one feature for the taxa considered + DescriptionElementBase debConcerned = null; + for (DescriptionElementBase deb : td.getElements()) { + if (deb.getFeature().equals(winnerFeature)) debConcerned = deb; + } + // a map is created, the key being the set of taxa that present the state(s) stored in the corresponding value + if (debConcerned!=null) { + Map,List> taxonStatesMap = determineCategoricalStates(statesDone,(CategoricalData)debConcerned,winnerFeature,taxaCovered); + // if the merge option is ON, branches with the same discriminative power will be merged (see Vignes & Lebbes, 1989) + if (merge){ + // see below + Map> exclusions = new HashMap>(); + // maps the different states of the winnerFeature to the list of states "incompatible" with it + FeatureScoreAndMerge(winnerFeature,taxaCovered,exclusions); + + Integer best=null; + int length; + + // looks for the largest clique, i.e. the state with less exclusions + while (!exclusions.isEmpty()){ + List clique = returnBestClique(exclusions); + mergeBranches(clique,taxonStatesMap); +======= } /************** ...or it supports categorical data. **************/ @@ -249,17 +355,57 @@ public class IdentificationKeyGenerator { for (State st : listOfStates) { questionLabel.append(st.getLabel()); if (listOfStates.lastIndexOf(st)!=numberOfStates) questionLabel.append(separator); +>>>>>>> .r10927 } +<<<<<<< .mine +======= KeyStatement statement = KeyStatement.NewInstance(questionLabel.toString()); son.setStatement(statement); son.setFeature(winnerFeature); father.addChild(son); featuresLeft.remove(winnerFeature); // TODO was commented before, why ? buildBranches(son,featuresLeft, newTaxaCovered); +>>>>>>> .r10927 + } + if (taxonStatesMap!=null && !taxonStatesMap.isEmpty()) { + for (Map.Entry,List> e : taxonStatesMap.entrySet()){ + Set newTaxaCovered = e.getKey(); + List listOfStates = e.getValue(); + if ((newTaxaCovered.size()>0) && !((newTaxaCovered.size()==taxaCovered.size()) && mybool)){ // if the taxa are discriminated compared to those of the father node, a child is created + childrenExist = true; + FeatureNode son = FeatureNode.NewInstance(); + StringBuilder questionLabel = new StringBuilder(); + numberOfStates = listOfStates.size()-1; + for (State st : listOfStates) { + if (dependenciesON){ + if (iIdependencies.get(st)!= null) innapplicables.addAll(iIdependencies.get(st)); + if (oAIdependencies.get(st)!= null) applicables.addAll(oAIdependencies.get(st)); + for (Feature feature : innapplicables) featuresLeft.remove(feature); + for (Feature feature : applicables) featuresLeft.add(feature); + } + questionLabel.append(st.getLabel()); + if (listOfStates.lastIndexOf(st)!=numberOfStates) questionLabel.append(separator); + } + Representation question = new Representation(null, questionLabel.toString(),null, Language.DEFAULT()); + son.addQuestion(question); + son.setFeature(winnerFeature); + father.addChild(son); + featuresLeft.remove(winnerFeature); // TODO was commented before, why ? + boolean newbool; + if (newTaxaCovered.size()==taxaCovered.size()) newbool = true; + else newbool = false; + buildBranches(son,featuresLeft, newTaxaCovered,newbool,levelhere); + } + } } } } } + if (dependenciesON){ + for (Feature feature : innapplicables) featuresLeft.add(feature); + for (Feature feature : applicables) featuresLeft.remove(feature); + } + featuresLeft.add(winnerFeature); } if (! childrenExist){ KeyStatement fatherStatement = father.getStatement(); @@ -268,12 +414,92 @@ public class IdentificationKeyGenerator { String label = statementString + " --> " + taxaCovered.toString(); fatherStatement.putLabel(label, Language.DEFAULT()); } + // for (TaxonDescription td : taxaCovered){ + // if (paths.containsKey(td)) paths.get(td).add(levelhere); + // else { + // List pathLength = new ArrayList(); + // pathLength.add(levelhere); + // paths.put(td, pathLength); + // } + // } } - featuresLeft.add(winnerFeature); } + private void mergeBranches(List clique, Map,List> taxonStatesMap){ + int i = 1; + boolean stateFound; + Map.Entry,List> firstPair=null; + List> tdToDelete = new ArrayList>(); + if (clique.size()>1){ + Iterator it1 = taxonStatesMap.entrySet().iterator(); + while (it1.hasNext()){ + Map.Entry,List> pair = (Map.Entry)it1.next(); + Iterator stateIterator = clique.iterator(); + stateFound=false; + while(stateIterator.hasNext() && stateFound!=true) { + State state = stateIterator.next(); + if (pair.getValue().contains(state)) { + stateFound=true; + } + } + if (stateFound==true){ + if (firstPair==null){ + firstPair=pair; + } + else { + firstPair.getKey().addAll(pair.getKey()); + firstPair.getValue().addAll(pair.getValue()); + tdToDelete.add(pair.getKey()); +// taxonStatesMap.remove(pair.getKey()); //remove(pair); + } + } + } + for (Set td : tdToDelete){ + taxonStatesMap.remove(td); + } + } + } + +<<<<<<< .mine + private List returnBestClique (Map> exclusions){ + int best=-1;; + int length; + List clique = new ArrayList(); + // looks for the largest clique, i.e. the state with less exclusions + + State bestState=null; + for (Iterator it1 = exclusions.entrySet().iterator() ; it1.hasNext();){ + Map.Entry> pair = (Map.Entry)it1.next(); + length = pair.getValue().size(); + if ((best==-1) || length>> it0 = exclusions.entrySet().iterator() ; it0.hasNext();){ + Map.Entry> pair = (Map.Entry)it0.next(); + bool = true; + for (State state : clique) { + if (pair.getValue().contains(state)) bool = false; + } + if (bool){ + clique.add(pair.getKey()); + //exclusions.remove(pair.getKey()); + } + } + for (State state : clique) { + exclusions.remove(state); + } + return clique; + } + +======= +>>>>>>> .r10927 /** * fills a map of the sets of taxa (key) presenting the different states (value) for the given feature. * @@ -297,10 +523,6 @@ public class IdentificationKeyGenerator { if(!statesDone.contains(featureState)){ statesDone.add(featureState); - StateData sd = new StateData(); - sd.setState(featureState); - //((CategoricalData)debsDone.get(0)).addState(sd);// A VOIR - Set newTaxaCovered = whichTaxa(feature,featureState,taxaCovered); List newStates = childrenStatesMap.get(newTaxaCovered); if (newStates==null) { @@ -334,10 +556,77 @@ public class IdentificationKeyGenerator { return newCoveredTaxa; } +<<<<<<< .mine + //change names ; merge with Default (Default takes the first one of the list) + private Feature LessStatesWinner(int nTaxa, Map scores, Set taxaCovered){ + if (nTaxa==1) return null; + float meanScore = DefaultMeanScore(nTaxa); + float bestScore = nTaxa*nTaxa; + List bestFeatures = new ArrayList(); + Feature bestFeature = null; + Iterator it = scores.entrySet().iterator(); + float newScore; + while (it.hasNext()){ + Map.Entry pair = (Map.Entry)it.next(); + if (pair.getValue()!=null){ + newScore = Math.abs((Float)pair.getValue()-meanScore); + if (newScore < bestScore){ + bestFeatures.clear(); + bestFeatures.add((Feature)pair.getKey()); + bestScore = newScore; + } + else if (newScore==bestScore){ + bestFeatures.add((Feature)pair.getKey()); + } + } + } + if (bestFeatures.size()==1) { + return bestFeatures.get(0); + } + else { + int lessStates=-1; + int numberOfDifferentStates=-1; + for (Feature feature : bestFeatures){ + if (feature.isSupportsCategoricalData()){ + Set differentStates = new HashSet(); + for (TaxonDescription td : taxaCovered){ + Set elements = td.getElements(); + for (DescriptionElementBase deb : elements){ + if (deb.isInstanceOf(CategoricalData.class)) { + CategoricalData catdat = (CategoricalData)deb; + if (catdat.getFeature().equals(feature)) { + List stateDatas = catdat.getStates(); + for (StateData sd : stateDatas) { + differentStates.add(sd.getState()); + } + } + } + } + } + numberOfDifferentStates=differentStates.size(); + } + else if (feature.isSupportsQuantitativeData()){ + numberOfDifferentStates=2; + } + if (lessStates==-1 || numberOfDifferentStates scores){ + if (nTaxa==1) return null; + float meanScore = DefaultMeanScore(nTaxa); + float bestScore = nTaxa*nTaxa; +======= //change names private Feature defaultWinner(int nTaxons, Map scores){ float meanScore = defaultMeanScore(nTaxons); float bestScore = nTaxons*nTaxons; +>>>>>>> .r10927 Feature feature = null; Iterator it = scores.entrySet().iterator(); float newScore; @@ -351,9 +640,6 @@ public class IdentificationKeyGenerator { } } } - if (!(feature.getLabel()==null)){ -// System.out.println(feature.getLabel() + bestScore); - } return feature; } @@ -371,7 +657,11 @@ public class IdentificationKeyGenerator { Map scoreMap = new HashMap(); for (Feature feature : featuresLeft){ if (feature.isSupportsCategoricalData()) { +<<<<<<< .mine + scoreMap.put(feature, categoricalFeatureScore(feature,coveredTaxa)); +======= scoreMap.put(feature, featureScore(feature,coveredTaxa)); +>>>>>>> .r10927 } if (feature.isSupportsQuantitativeData()){ scoreMap.put(feature, quantitativeFeatureScore(feature,coveredTaxa, quantitativeFeaturesThresholds)); @@ -380,7 +670,7 @@ public class IdentificationKeyGenerator { return scoreMap; } - private List> determineQuantitativeStates (Float threshold, Feature feature, Set taxa){ + private List> determineQuantitativeStates (Float threshold, Feature feature, Set taxa, StringBuilder unit){ List> list = new ArrayList>(); Set taxaBefore = new HashSet(); Set taxaAfter = new HashSet(); @@ -392,14 +682,17 @@ public class IdentificationKeyGenerator { if (deb.getFeature().equals(feature)) { if (deb.isInstanceOf(QuantitativeData.class)) { QuantitativeData qd = (QuantitativeData)deb; + if (unit.toString().equals("") && qd.getUnit()!=null && qd.getUnit().getLabel()!=null){ + unit.append(" " + qd.getUnit().getLabel()); + } Set values = qd.getStatisticalValues(); for (StatisticalMeasurementValue smv : values){ StatisticalMeasure type = smv.getType(); // DONT FORGET sample size, MEAN etc - if (type.equals(StatisticalMeasure.MAX()) || type.equals(StatisticalMeasure.TYPICAL_UPPER_BOUNDARY())) { - if (smv.getValue()>=threshold) taxaAfter.add(td); + if (type.equals(StatisticalMeasure.MAX()) || type.equals(StatisticalMeasure.TYPICAL_UPPER_BOUNDARY()) || type.equals(StatisticalMeasure.AVERAGE())) { + if (smv.getValue()>threshold) taxaAfter.add(td); } - if (type.equals(StatisticalMeasure.MIN()) || type.equals(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY())) { + if (type.equals(StatisticalMeasure.MIN()) || type.equals(StatisticalMeasure.TYPICAL_LOWER_BOUNDARY()) || type.equals(StatisticalMeasure.AVERAGE())) { if (smv.getValue()<=threshold) taxaBefore.add(td); } } @@ -407,6 +700,7 @@ public class IdentificationKeyGenerator { } } } +// if (unit==null) unit=new String(""); return list; } @@ -444,6 +738,13 @@ public class IdentificationKeyGenerator { lowerboundary = smv.getValue(); lowerboundarypresent=true; } + // TODO improve + if (type.equals(StatisticalMeasure.AVERAGE()) && upperboundarypresent==false && lowerboundarypresent==false) { + lowerboundary = smv.getValue(); + upperboundary = lowerboundary; + lowerboundarypresent=true; + upperboundarypresent=true; + } } if (lowerboundarypresent && upperboundarypresent) { allValues.add(lowerboundary); @@ -466,7 +767,7 @@ public class IdentificationKeyGenerator { taxaAfter=0; for (j=0;j=threshold) taxaAfter++; + if (allValues.get(j*2)>threshold) taxaAfter++; } difference = Math.abs(taxaBefore-taxaAfter); if (difference coveredTaxa){ +======= private float featureScore(Feature feature, Set coveredTaxa){ +>>>>>>> .r10927 int i,j; float score =0; + float power=0; TaxonDescription[] coveredTaxaArray = coveredTaxa.toArray(new TaxonDescription[coveredTaxa.size()]); // I did not figure a better way to do this for (i=0 ; i elements1 = coveredTaxaArray[i].getElements(); @@ -499,13 +804,83 @@ public class IdentificationKeyGenerator { for (DescriptionElementBase deb : elements2){ if (deb.getFeature().equals(feature)) deb2 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature } +<<<<<<< .mine + power = DefaultPower(deb1,deb2); + score = score + power; +======= score = score + defaultPower(deb1,deb2); +>>>>>>> .r10927 + } + } + return score; + } + +<<<<<<< .mine + private void checkDependencies(FeatureNode node){ + if (node.getOnlyApplicableIf()!=null){ + Set addToOAI = node.getOnlyApplicableIf(); + for (State state : addToOAI){ + if (oAIdependencies.containsKey(state)) oAIdependencies.put(state, new HashSet()); + oAIdependencies.get(state).add(node.getFeature()); + } + } + if (node.getInapplicableIf()!=null){ + Set addToiI = node.getInapplicableIf(); + for (State state : addToiI){ + if (iIdependencies.containsKey(state)) iIdependencies.put(state, new HashSet()); + iIdependencies.get(state).add(node.getFeature()); + } + } + if (node.getChildren()!=null) { + for (FeatureNode fn : node.getChildren()){ + checkDependencies(fn); + } + } + } + + private float FeatureScoreAndMerge(Feature feature, Set coveredTaxa, Map> exclusions){ + int i,j; + float score =0; + float power=0; + TaxonDescription[] coveredTaxaArray = coveredTaxa.toArray(new TaxonDescription[coveredTaxa.size()]); // I did not figure a better way to do this + for (i=0 ; i elements1 = coveredTaxaArray[i].getElements(); + DescriptionElementBase deb1 = null; + for (DescriptionElementBase deb : elements1){ + if (deb.getFeature().equals(feature)) deb1 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature + } + for (j=i+1 ; j< coveredTaxaArray.length ; j++){ + Set elements2 = coveredTaxaArray[j].getElements(); + DescriptionElementBase deb2 = null; + for (DescriptionElementBase deb : elements2){ + if (deb.getFeature().equals(feature)) deb2 = deb; // finds the DescriptionElementBase corresponding to the concerned Feature + } + power = DefaultPower(deb1,deb2); + score = score + power; + if (power>0) // if there is no state in common between deb1 and deb2 + { + CategoricalData cat1 = (CategoricalData)deb1; + CategoricalData cat2 = (CategoricalData)deb2; + for (StateData statedata1 : cat1.getStates()){ + State state1 = statedata1.getState(); + if (!exclusions.containsKey(state1)) exclusions.put(state1, new HashSet()); + for (StateData statedata2 : cat2.getStates()){ + State state2 = statedata2.getState(); + if (!exclusions.containsKey(state2)) exclusions.put(state2, new HashSet()); + exclusions.get(state1).add(state2); + exclusions.get(state2).add(state1); + } + } + } } } return score; } + private float DefaultPower(DescriptionElementBase deb1, DescriptionElementBase deb2){ +======= private float defaultPower(DescriptionElementBase deb1, DescriptionElementBase deb2){ +>>>>>>> .r10927 if (deb1==null || deb2==null) { return -1; //what if the two taxa don't have this feature in common ? } @@ -529,7 +904,7 @@ public class IdentificationKeyGenerator { } // modifiers not taken into account for this default power } - // one point each time two taxa have at least a state in common for a given feature + // one point each time two taxa can be discriminated for a given feature if (bool) return 0; else return 1; } @@ -539,14 +914,24 @@ public class IdentificationKeyGenerator { level++; int levelcopy = level; int j=1; + String delimiter; + String equals = " = "; + String quantitative = ""; String newspaces = spaces.concat("\t"); for (PolytomousKeyNode polytomousKeyNode : polytomousKeyNodes){ if (polytomousKeyNode.getQuestion() != null) { String state = null; +<<<<<<< .mine + if (fnode.getQuestion(Language.DEFAULT())!=null) state = fnode.getQuestion(Language.DEFAULT()).getLabel(); + if (fnode.getFeature().isSupportsQuantitativeData()) delimiter = quantitative; + else delimiter = equals; + System.out.println(newspaces + levelcopy + " : " + j + " " + fnode.getFeature().getLabel() + delimiter + state); +======= if (polytomousKeyNode.getStatement().getLabel(Language.DEFAULT() ) != null){ state = polytomousKeyNode.getStatement().getLabelText(Language.DEFAULT()); } System.out.println(newspaces + levelcopy + " : " + j + " " + polytomousKeyNode.getQuestion().getLabelText(Language.DEFAULT()) + " = " + state); +>>>>>>> .r10927 j++; } else { // TODO never read ? diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatCategoricalDescriptionBuilder.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatCategoricalDescriptionBuilder.java new file mode 100644 index 0000000000..ff5a3cfb3a --- /dev/null +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/MicroFormatCategoricalDescriptionBuilder.java @@ -0,0 +1,60 @@ +package eu.etaxonomy.cdm.api.service; + +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; + +import eu.etaxonomy.cdm.model.common.Language; +import eu.etaxonomy.cdm.model.description.Feature; +import eu.etaxonomy.cdm.model.description.Modifier; +import eu.etaxonomy.cdm.model.description.State; +import eu.etaxonomy.cdm.model.description.StateData; +import eu.etaxonomy.cdm.model.description.TextData; + +public class MicroFormatCategoricalDescriptionBuilder extends AbstractCategoricalDescriptionBuilder{ + + private String spanEnd = ""; + + protected TextData doBuild(List states, List languages){ + TextData textData = TextData.NewInstance();// TextData that will contain the description and the language corresponding + StringBuilder CategoricalDescription = new StringBuilder(); + Language language = null; + for (Iterator sd = states.iterator() ; sd.hasNext() ;){ + StateData stateData = sd.next(); + State s = stateData.getState(); + Set modifiers = stateData.getModifiers(); // the states and their according modifiers are simply written one after the other + for (Iterator mod = modifiers.iterator() ; mod.hasNext() ;){ + Modifier modifier = mod.next(); + CategoricalDescription.append(" " + spanClass("modifier") + modifier.getPreferredRepresentation(languages).getLabel() + spanEnd); + } + CategoricalDescription.append(" " + spanClass("state") + s.getPreferredRepresentation(languages).getLabel() + spanEnd); + if (sd.hasNext()) CategoricalDescription.append(','); + if (language==null) { + language = s.getPreferredRepresentation(languages).getLanguage(); // TODO What if there are different languages ? + } + } + if (language==null) { + language = Language.DEFAULT(); + } + textData.putText(CategoricalDescription.toString(), language); + + return textData; + } + + protected String buildFeature(Feature feature, boolean doItBetter){ + if (feature==null || feature.getLabel()==null) return ""; + else { + if (doItBetter) { + String betterString = StringUtils.substringBefore(feature.getLabel(), "<"); + return (spanClass("feature") + StringUtils.removeEnd(betterString, " ") + spanEnd); + } + else return (spanClass("feature") + feature.getLabel() + spanEnd); + } + } + + private String spanClass(String classString){ + return(""); + } +} diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NaturalLanguageGenerator.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NaturalLanguageGenerator.java index 0c9cb75655..25e643715c 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NaturalLanguageGenerator.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NaturalLanguageGenerator.java @@ -1,13 +1,11 @@ package eu.etaxonomy.cdm.api.service; import java.util.ArrayList; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.springframework.stereotype.Component; @@ -20,20 +18,78 @@ import eu.etaxonomy.cdm.model.description.FeatureTree; import eu.etaxonomy.cdm.model.description.QuantitativeData; import eu.etaxonomy.cdm.model.description.TaxonDescription; import eu.etaxonomy.cdm.model.description.TextData; +import eu.etaxonomy.cdm.model.description.TextFormat; import eu.etaxonomy.cdm.model.common.Annotation; import eu.etaxonomy.cdm.model.common.AnnotationType; import eu.etaxonomy.cdm.model.common.Language; + +/** + * Generator of natural language descriptions from TaxonDescriptions. + * + * @author m.venin + * @created 13.04.2010 + * @version 1.0 + */ @Component public class NaturalLanguageGenerator implements INaturalLanguageGenerator { + private String firstSeparator = ","; + private String secondSeparator = "."; + private List levels = new ArrayList(); + private DescriptionBuilder quantitativeDescriptionBuilder = new DefaultQuantitativeDescriptionBuilder(); private DescriptionBuilder categoricalDescriptionBuilder = new DefaultCategoricalDescriptionBuilder(); - - private String previousFeatureName; + + private TextData previousTextData; private Map elementProcessors; + private Set applicableElementProcessors = new HashSet(); + + /** + * Change the first separator used by generateSingleTextData. By default ",". + * + * @param separator + */ + public void setFirstSeparator(String separator){ + firstSeparator=separator; + } + + public String getFirstSeparator(){ + return firstSeparator; + } + + /** + * Change the second separator used by generateSingleTextData. By default ".". + * + * @param separator + */ + public void setSecondSeparator(String separator){ + secondSeparator=separator; + } + + public String getSecondSeparator(){ + return secondSeparator; + } + + /** + * @param quantitativeDescriptionBuilder + */ + public void setQuantitativeDescriptionBuilder(DescriptionBuilder quantitativeDescriptionBuilder){ + this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder; + } + + /** + * @param categoricalDescriptionBuilder + */ + public void setCategoricalDescriptionBuilder(DescriptionBuilder categoricalDescriptionBuilder){ + this.categoricalDescriptionBuilder = categoricalDescriptionBuilder; + } + + /** + * @return the element processors of this generator + */ public Map getElementProcessors() { return elementProcessors; } @@ -49,29 +105,37 @@ public class NaturalLanguageGenerator implements INaturalLanguageGenerator { Map elementProcessors) { this.elementProcessors = elementProcessors; } - - private Set applicableElementProcessors = new HashSet(); /** - * @param annotations + * Looks for technical annotations, if one matches a regular expression of the element processors + * the associated processor is added to the applicable element processors which will then be applied + * when generating the description. + * + * @param annotations the set of annotations of the description */ private void initNaturalLanguageDescriptionElementProcessors(Set annotations) { if(annotations != null){ for(Annotation annotation : annotations){ if(annotation.getAnnotationType().equals(AnnotationType.TECHNICAL())){ + if (elementProcessors!=null){ for(String regex : elementProcessors.keySet()){ if(annotation.getText().matches(regex)){ applicableElementProcessors.add(elementProcessors.get(regex)); } } } + } } } } + /** - * @param textData + * Applies the list of applicable processors to a TextData. + * + * @param textData the TextData to be modified + * @param previousTextData the TextData corresponding to the feature of the previous level in the tree */ private void applyNaturalLanguageDescriptionElementProcessors(TextData textData, TextData previousTextData){ for(INaturalLanguageTextDataProcessor processor : applicableElementProcessors){ @@ -79,201 +143,192 @@ public class NaturalLanguageGenerator implements INaturalLanguageGenerator { } } + /** + * The most simple function to generate a description. The language used is the default one. + * + * @param featureTree the FeatureTree holding the order in which features and their states must be printed + * @param description the TaxonDescription with all the data * + * @return a list of TextData, each one being a basic element of the natural language description */ public List generateNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description) { - List languages = new ArrayList(); - languages.add(Language.DEFAULT()); - return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages); + return generateNaturalLanguageDescription(featureTree,description,Language.DEFAULT()); } + /** + * Generate a description in a specified language. * + * @param featureTree the FeatureTree holding the order in which features and their states must be printed + * @param description the TaxonDescription with all the data + * @param language the language in which the description has to be printed + * + * @return a list of TextData, each one being a basic element of the natural language description */ - public List generatePreferredNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description, List languages) { - return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages); - } - - public List generateNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description, Language language) { List languages = new ArrayList(); languages.add(language); - initNaturalLanguageDescriptionElementProcessors(description.getAnnotations()); - - return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages); + return generatePreferredNaturalLanguageDescription(featureTree,description,languages); } - - - /** recursive function that goes through a tree containing the order in which the description has to be generated, - * if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description. - * - * @param children - * @param parent - * @param description - * @param language The language in which the description has to be written - * @return - */ - private List buildBranchesDescr(List children, FeatureNode parent, TaxonDescription description, List languages) { - List listTextData = new ArrayList(); ; - if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states) - Feature fref = parent.getFeature(); - for (Iterator ifn = children.iterator() ; ifn.hasNext() ;){ - FeatureNode fn = ifn.next(); - listTextData.addAll(buildBranchesDescr(fn.getChildren(),fn,description, languages)); - } - } - else { //once a leaf is reached - Feature fref = parent.getFeature(); - if (fref!=null) { // needs a better algorithm - int k=0; - Set elements = description.getElements(); - for (Iterator deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription - DescriptionElementBase descriptionElement = deb.next(); - TextData textData; - if (descriptionElement.getFeature().equals(fref)){ // if one matches the corresponding feature associated to this leaf - if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData - CategoricalData categoricalData = (CategoricalData) descriptionElement; - //textData = buildCategoricalDescr(categoricalData, language); - textData = categoricalDescriptionBuilder.build(categoricalData, languages); - //textData.putText(fref.getLabel(), Language.DEFAULT()); - TextData featureName = TextData.NewInstance(fref.getLabel(), Language.DEFAULT(), null); - listTextData.add(featureName); // if you want to print the name of the feature (Should it be an option ?) - listTextData.add(textData); - } - if (descriptionElement instanceof QuantitativeData) { // if this description is a QuantitativeData, generate the according TextData - QuantitativeData quantitativeData = (QuantitativeData) descriptionElement; - textData = quantitativeDescriptionBuilder.build(quantitativeData, languages); - TextData featureName = TextData.NewInstance(fref.getLabel(), Language.DEFAULT(), null); - listTextData.add(featureName); // if you want to print the name of the feature - listTextData.add(textData); - } - } - } - } - } - return listTextData; - } - /** + * Generate a description with a specified list of preferred languages. * + * @param featureTree the FeatureTree holding the order in which features and their states must be printed + * @param description the TaxonDescription with all the data + * @param languages the ordered list of languages preferred for printing the description * - * @param quantitativeDescriptionBuilder + * @return a list of TextData, each one being a basic element of the natural language description */ - public void setQuantitativeDescriptionBuilder(DescriptionBuilder quantitativeDescriptionBuilder){ - this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder; + public List generatePreferredNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description, List languages) { + initNaturalLanguageDescriptionElementProcessors(description.getAnnotations()); + return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0); } /** + * Generate a description as a single paragraph in a TextData. * + * @param featureTree the FeatureTree holding the order in which features and their states must be printed + * @param description the TaxonDescription with all the data * - * @param categoricalDescriptionBuilder + * @return a TextData in the default language. */ - public void setCategoricalDescriptionBuilder(DescriptionBuilder categoricalDescriptionBuilder){ - this.categoricalDescriptionBuilder = categoricalDescriptionBuilder; + public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description) { + return generateSingleTextData(featureTree,description,Language.DEFAULT()); } - /** - * @param featureTree - * @param description - * @param language - * @return + * Generate a description as a single paragraph in a TextData. + * + * @param featureTree the FeatureTree holding the order in which features and their states must be printed + * @param description the TaxonDescription with all the data + * @param language the language in which the description has to be printed + * + * @return a TextData in the specified language. */ - public String generateStringNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description, Language language) { + public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description, Language language) { List languages = new ArrayList(); languages.add(language); - return buildString(featureTree.getRootChildren(), featureTree.getRoot(), description, languages).toString(); + return generatePreferredSingleTextData(featureTree,description,languages); } /** - * recursive function that goes through a tree containing the order in which - * the description has to be generated, if an element of this tree matches - * one of the TaxonDescription, a DescriptionBuilder is called which returns - * a TextData with the corresponding description. - *

- * Also applies the folowing formatting rules which are special for data coming from Delta, DeltaAccess, DiversityDescriptions: + * Generate a description with a specified list of preferred languages. * - *

    - *
  • (1.A) if( doSkipTextInBrackets) : Label Text in brackets is always skipped the remaining text string I the TEXT_TO_DISPLAY. The text may contain multiple substrings tagged with the brackets. A tagged substring may also occur in the middle of the whole string.
  • - *
  • (1.B) else : just remove the brackets - *
  • (2) : If the TEXT_TO_DISPLAY is equal the TEXT_TO_DISPLAY of the previous element output of this text is suppressed.
  • - *
+ * @param featureTree the FeatureTree holding the order in which features and their states must be printed + * @param description the TaxonDescription with all the data + * @param languages the ordered list of languages preferred for printing the description * - * @param children - * @param parent - * @param description - * @param language - * The language in which the description has to be written - * @return + * @return a TextData using the languages (in the given order of preference) */ - private StringBuilder buildString(List children, FeatureNode parent, TaxonDescription description, List languages) { - StringBuilder stringbuilder = new StringBuilder(); + public TextData generatePreferredSingleTextData(FeatureTree featureTree, TaxonDescription description, List languages) { + + initNaturalLanguageDescriptionElementProcessors(description.getAnnotations()); + + List texts = buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0); + StringBuilder sb = new StringBuilder(); + int i = 0,j; + boolean startSentence = false, firstOne = true; + Integer level; + levels.clear(); + for (j=0 ; j1) sb.append(asString.substring(0,1).toUpperCase() + asString.substring(1)); + } + i++; + } + else if (level.equals(0)) { + if (startSentence) sb.append(texts.get(i).getText(Language.DEFAULT())); + else sb.append(firstSeparator + texts.get(i).getText(Language.DEFAULT())); + startSentence=false; + i++; + } + else if (!level.equals(0) && !level.equals(-1)){ + if (!firstOne && levels.get(j-1).equals(0)){ + if (i buildBranchesDescr(List children, FeatureNode parent, TaxonDescription description, List languages, int floor) { + List listTextData = new ArrayList(); + floor++; // counter to know the current level in the tree + if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states) + levels.add(new Integer(floor)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List + Feature feature = parent.getFeature(); + TextData featureName; + if (feature!=null && feature.getLabel()!=null){ // if a node is associated to a feature + featureName = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages); + levels.add(new Integer(-1)); // it is indicated by a '-1' after its level + listTextData.add(featureName); // the TextData representing the name of the feature is concatenated to the list + } + else featureName = new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached) + for (Iterator ifn = children.iterator() ; ifn.hasNext() ;){ + previousTextData = featureName; // this allows to keep track of the name of the feature one level up in the tree FeatureNode fn = ifn.next(); - StringBuilder tempsb = buildString(fn.getChildren(),fn,description, languages); - if (tempsb.length()>1) stringbuilder.append(tempsb.deleteCharAt(tempsb.length()-1)); -// if (tempsb.length()>1) stringbuilder.append(tempsb); + listTextData.addAll(buildBranchesDescr(fn.getChildren(),fn,description, languages, floor)); } - stringbuilder.append('.'); } else { //once a leaf is reached - Feature fref = parent.getFeature(); - if (fref!=null) { // needs a better algorithm - int k=0; + Feature feature = parent.getFeature(); + if (feature!=null && (feature.isSupportsQuantitativeData() || feature.isSupportsCategoricalData())) { Set elements = description.getElements(); for (Iterator deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription DescriptionElementBase descriptionElement = deb.next(); - TextData textData; - if (descriptionElement.getFeature().equals(fref)){ // if one matches the corresponding feature associated to this leaf + if (descriptionElement.getFeature().equals(feature)){ // if one matches the corresponding feature associated to this leaf + if (descriptionElement instanceof CategoricalData || descriptionElement instanceof QuantitativeData){ + TextData featureTextData; + TextData statesTextData; if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData CategoricalData categoricalData = (CategoricalData) descriptionElement; - //textData = buildCategoricalDescr(categoricalData, language); - textData = categoricalDescriptionBuilder.build(categoricalData, languages); - //textData.putText(fref.getLabel(), Language.DEFAULT()); - String featureName = StringUtils.substringBefore(fref.getLabel(), "<"); - if (previousFeatureName==null){ - previousFeatureName = featureName; - String featureString = categoricalDescriptionBuilder.buildFeature(fref,true); - stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1)); - } - else if (!featureName.contains(previousFeatureName)) { - stringbuilder.append(". "); - previousFeatureName = featureName; - String featureString = categoricalDescriptionBuilder.buildFeature(fref,true); - stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1)); // if you want to print the name of the feature (Should it be an option ?) - } - stringbuilder.append(textData.getText(Language.DEFAULT())); - stringbuilder.append(','); + statesTextData = categoricalDescriptionBuilder.build(categoricalData, languages); + featureTextData = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages); } - if (descriptionElement instanceof QuantitativeData) { // if this description is a QuantitativeData, generate the according TextData + else { // if this description is a QuantitativeData, generate the according TextData QuantitativeData quantitativeData = (QuantitativeData) descriptionElement; - textData = quantitativeDescriptionBuilder.build(quantitativeData, languages); - String featureName = StringUtils.substringBefore(fref.getLabel(), "<"); - if (previousFeatureName==null){ - previousFeatureName = featureName; - String featureString = quantitativeDescriptionBuilder.buildFeature(fref,true); - stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1)); - } - else if (!featureName.contains(previousFeatureName)) { - stringbuilder.append(". "); - previousFeatureName = featureName; - String featureString = quantitativeDescriptionBuilder.buildFeature(fref,true); - stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1)); // if you want to print the name of the feature (Should it be an option ?) - } - stringbuilder.append(textData.getText(Language.DEFAULT())); - stringbuilder.append(','); + statesTextData = quantitativeDescriptionBuilder.build(quantitativeData, languages); + featureTextData = quantitativeDescriptionBuilder.buildTextDataFeature(feature, languages); + } + applyNaturalLanguageDescriptionElementProcessors(featureTextData, previousTextData); + levels.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree + listTextData.add(featureTextData); + levels.add(new Integer(floor)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature + listTextData.add(statesTextData); } } } } } - return stringbuilder; - } - + return listTextData; + } } -- 2.34.1