1 package eu
.etaxonomy
.cdm
.api
.service
;
3 import java
.util
.ArrayList
;
4 import java
.util
.HashSet
;
5 import java
.util
.Iterator
;
10 import org
.apache
.commons
.lang
.StringUtils
;
11 import org
.apache
.log4j
.Logger
;
12 import org
.springframework
.stereotype
.Component
;
14 import eu
.etaxonomy
.cdm
.model
.description
.CategoricalData
;
15 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
16 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
17 import eu
.etaxonomy
.cdm
.model
.description
.FeatureNode
;
18 import eu
.etaxonomy
.cdm
.model
.description
.FeatureTree
;
19 import eu
.etaxonomy
.cdm
.model
.description
.QuantitativeData
;
20 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
21 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
22 import eu
.etaxonomy
.cdm
.model
.description
.TextFormat
;
23 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
24 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
25 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
29 * Generator of natural language descriptions from TaxonDescriptions.
36 public class NaturalLanguageGenerator
implements INaturalLanguageGenerator
{
37 @SuppressWarnings("unused")
38 private static final Logger logger
= Logger
.getLogger(NaturalLanguageGenerator
.class);
40 private String firstSeparator
= ",";
41 private String secondSeparator
= ".";
42 private List
<Integer
> levels
= new ArrayList
<Integer
>();
44 private DescriptionBuilder
<QuantitativeData
> quantitativeDescriptionBuilder
= new DefaultQuantitativeDescriptionBuilder();
45 private DescriptionBuilder
<CategoricalData
> categoricalDescriptionBuilder
= new DefaultCategoricalDescriptionBuilder();
47 private TextData previousTextData
;
49 DeltaTextDataProcessor deltaTextDataProcessor
= new DeltaTextDataProcessor();
51 private Map
<String
, INaturalLanguageTextDataProcessor
> elementProcessors
;
53 private Set
<INaturalLanguageTextDataProcessor
> applicableElementProcessors
= new HashSet
<INaturalLanguageTextDataProcessor
>();
56 * Change the first separator used by generateSingleTextData. By default ",".
60 public void setFirstSeparator(String separator
){
61 firstSeparator
=separator
;
64 public String
getFirstSeparator(){
65 return firstSeparator
;
69 * Change the second separator used by generateSingleTextData. By default ".".
73 public void setSecondSeparator(String separator
){
74 secondSeparator
=separator
;
77 public String
getSecondSeparator(){
78 return secondSeparator
;
82 * @param quantitativeDescriptionBuilder
84 public void setQuantitativeDescriptionBuilder(DescriptionBuilder
<QuantitativeData
> quantitativeDescriptionBuilder
){
85 this.quantitativeDescriptionBuilder
= quantitativeDescriptionBuilder
;
89 * @param categoricalDescriptionBuilder
91 public void setCategoricalDescriptionBuilder(DescriptionBuilder
<CategoricalData
> categoricalDescriptionBuilder
){
92 this.categoricalDescriptionBuilder
= categoricalDescriptionBuilder
;
96 * @return the element processors of this generator
98 public Map
<String
, INaturalLanguageTextDataProcessor
> getElementProcessors() {
99 return elementProcessors
;
103 * The keys of the elementProcessors map are regular expressions which are
104 * being used to identify the those Descriptions to which the mapped
105 * NaturalLanguageTextDataProcessor is applicable.
107 * @param elementProcessors
109 public void setElementProcessors(
110 Map
<String
, INaturalLanguageTextDataProcessor
> elementProcessors
) {
111 this.elementProcessors
= elementProcessors
;
115 * Looks for technical annotations, if one matches a regular expression of the element processors
116 * the associated processor is added to the applicable element processors which will then be applied
117 * when generating the description.
119 * @param annotations the set of annotations of the description
121 private void initNaturalLanguageDescriptionElementProcessors(Set
<Annotation
> annotations
) {
123 if(annotations
!= null){
124 for(Annotation annotation
: annotations
){
125 if(annotation
.getAnnotationType().equals(AnnotationType
.TECHNICAL())){
126 if (elementProcessors
!=null){
127 for(String regex
: elementProcessors
.keySet()){
128 if(annotation
.getText().matches(regex
)){
129 applicableElementProcessors
.add(elementProcessors
.get(regex
));
140 * Applies the list of applicable processors to a TextData.
142 * @param textData the TextData to be modified
143 * @param previousTextData the TextData corresponding to the feature of the previous level in the tree
145 private void applyNaturalLanguageDescriptionElementProcessors(TextData textData
, TextData previousTextData
){
146 for(INaturalLanguageTextDataProcessor processor
: applicableElementProcessors
){
147 processor
.process(textData
, previousTextData
);
153 * The most simple function to generate a description. The language used is the default one.
155 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
156 * @param description the TaxonDescription with all the data
158 * @return a list of TextData, each one being a basic element of the natural language description
160 public List
<TextData
> generateNaturalLanguageDescription(FeatureTree featureTree
,TaxonDescription description
) {
161 return generateNaturalLanguageDescription(featureTree
,description
,Language
.DEFAULT());
167 * Generate a description in a specified language.
169 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
170 * @param description the TaxonDescription with all the data
171 * @param language the language in which the description has to be printed
173 * @return a list of TextData, each one being a basic element of the natural language description
175 public List
<TextData
> generateNaturalLanguageDescription(FeatureTree featureTree
, TaxonDescription description
, Language language
) {
176 List
<Language
> languages
= new ArrayList
<Language
>();
177 languages
.add(language
);
178 initNaturalLanguageDescriptionElementProcessors(description
.getAnnotations());
179 return generatePreferredNaturalLanguageDescription(featureTree
,description
,languages
);
183 * Generate a description with a specified list of preferred languages.
185 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
186 * @param description the TaxonDescription with all the data
187 * @param languages the ordered list of languages preferred for printing the description
189 * @return a list of TextData, each one being a basic element of the natural language description
191 public List
<TextData
> generatePreferredNaturalLanguageDescription(FeatureTree featureTree
,TaxonDescription description
, List
<Language
> languages
) {
192 initNaturalLanguageDescriptionElementProcessors(description
.getAnnotations());
193 return buildBranchesDescr(featureTree
.getRootChildren(), featureTree
.getRoot(), description
, languages
,0);
197 * Generate a description as a single paragraph in a TextData.
199 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
200 * @param description the TaxonDescription with all the data
202 * @return a TextData in the default language.
204 public TextData
generateSingleTextData(FeatureTree featureTree
, TaxonDescription description
) {
205 return generateSingleTextData(featureTree
,description
,Language
.DEFAULT());
209 * Generate a description as a single paragraph in a TextData.
211 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
212 * @param description the TaxonDescription with all the data
213 * @param language the language in which the description has to be printed
215 * @return a TextData in the specified language.
217 public TextData
generateSingleTextData(FeatureTree featureTree
, TaxonDescription description
, Language language
) {
218 List
<Language
> languages
= new ArrayList
<Language
>();
219 languages
.add(language
);
220 return generatePreferredSingleTextData(featureTree
,description
,languages
);
224 * Generate a description with a specified list of preferred languages.
226 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
227 * @param description the TaxonDescription with all the data
228 * @param languages the ordered list of languages preferred for printing the description
230 * @return a TextData using the languages (in the given order of preference)
232 public TextData
generatePreferredSingleTextData(FeatureTree featureTree
, TaxonDescription description
, List
<Language
> languages
) {
233 levels
.clear(); // before the start, the table containing the levels of each node must be cleared
234 // Note: this is not the most efficient way to keep track of the levels of the nodes but it allows some flexibility
235 List
<TextData
> texts
= generatePreferredNaturalLanguageDescription(featureTree
,description
, languages
);// first get the description as a raw list of TextData
237 StringBuilder descriptionStringBuilder
= new StringBuilder(); // the StringBuilder used to generate the description
238 int i
= 0,j
,level
; // i is used to store the index of the TextData to use
239 boolean startSentence
= false, firstOne
= true;
241 for (j
=0 ; j
<levels
.size() ; j
++){
242 level
= levels
.get(j
);
244 if ((j
+1)<levels
.size() && levels
.get(j
+1).equals(0)){ // if this node is the direct father of a leaf
245 descriptionStringBuilder
.append(secondSeparator
+ " ");
248 String asString
= texts
.get(i
).getText(Language
.DEFAULT()).toString();
249 if (asString
.length()>1) descriptionStringBuilder
.append(asString
.substring(0,1).toUpperCase() + asString
.substring(1));
253 else if (level
==0) { // if this node is a leaf
254 if (startSentence
) descriptionStringBuilder
.append(texts
.get(i
).getText(Language
.DEFAULT()));
255 else descriptionStringBuilder
.append(firstSeparator
+ texts
.get(i
).getText(Language
.DEFAULT()));
260 if (!firstOne
&& levels
.get(j
-1).equals(0)){ // if this node corresponds to the states linked to the previous leaf
261 if (i
<texts
.size()) descriptionStringBuilder
.append(texts
.get(i
).getText(Language
.DEFAULT()));
266 descriptionStringBuilder
.append(secondSeparator
);
267 String returnString
= descriptionStringBuilder
.toString();
268 returnString
= StringUtils
.replace(returnString
, " ", " ");
269 returnString
= StringUtils
.removeStart(returnString
, secondSeparator
+ " ");
270 return TextData
.NewInstance(returnString
,Language
.DEFAULT(),TextFormat
.NewInstance("", "Text", ""));
275 /** recursive function that goes through a tree containing the order in which the description has to be generated,
276 * if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
278 * @param children the children of the feature node considered
279 * @param parent the feature node considered
280 * @param description the TaxonDescription element for which we want a natural language output
281 * @param language The language in which the description has to be written
282 * @param floor integer to keep track of the level in the tree
283 * @return a list of TextData elements containing the part of description corresponding to the feature node considered
285 private List
<TextData
> buildBranchesDescr(List
<FeatureNode
> children
, FeatureNode parent
, TaxonDescription description
, List
<Language
> languages
, int floor
) {
286 List
<TextData
> listTextData
= new ArrayList
<TextData
>();
287 floor
++; // counter to know the current level in the tree
289 if (!parent
.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
290 levels
.add(new Integer(floor
)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List<TextData>
291 Feature feature
= parent
.getFeature();
292 TextData featureName
;
293 if (feature
!=null && feature
.getLabel()!=null){ // if a node is associated to a feature
294 featureName
= categoricalDescriptionBuilder
.buildTextDataFeature(feature
, languages
);
295 levels
.add(new Integer(-1)); // it is indicated by a '-1' after its level
296 listTextData
.add(featureName
); // the TextData representing the name of the feature is concatenated to the list
298 else featureName
= new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached)
300 for (Iterator
<FeatureNode
> ifn
= children
.iterator() ; ifn
.hasNext() ;){
301 previousTextData
= featureName
; // this allows to keep track of the name of the feature one level up in the tree
302 FeatureNode fn
= ifn
.next();
303 listTextData
.addAll(buildBranchesDescr(fn
.getChildren(),fn
,description
, languages
, floor
));
306 else { //once a leaf is reached
307 Feature feature
= parent
.getFeature();
308 if (feature
!=null && (feature
.isSupportsQuantitativeData() || feature
.isSupportsCategoricalData())) {
309 Set
<DescriptionElementBase
> elements
= description
.getElements();
310 for (Iterator
<DescriptionElementBase
> deb
= elements
.iterator() ; deb
.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
311 DescriptionElementBase descriptionElement
= deb
.next();
312 if (descriptionElement
.getFeature().equals(feature
)){ // if one matches the corresponding feature associated to this leaf
313 if (descriptionElement
instanceof CategoricalData
|| descriptionElement
instanceof QuantitativeData
){
314 TextData featureTextData
;
315 TextData statesTextData
;
316 if (descriptionElement
instanceof CategoricalData
) { // if this description is a CategoricalData, generate the according TextData
317 CategoricalData categoricalData
= (CategoricalData
) descriptionElement
;
318 statesTextData
= categoricalDescriptionBuilder
.build(categoricalData
, languages
);
319 featureTextData
= categoricalDescriptionBuilder
.buildTextDataFeature(feature
, languages
);
321 else { // if this description is a QuantitativeData, generate the according TextData
322 QuantitativeData quantitativeData
= (QuantitativeData
) descriptionElement
;
323 statesTextData
= quantitativeDescriptionBuilder
.build(quantitativeData
, languages
);
324 featureTextData
= quantitativeDescriptionBuilder
.buildTextDataFeature(feature
, languages
);
326 applyNaturalLanguageDescriptionElementProcessors(featureTextData
, previousTextData
);
327 levels
.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree
328 listTextData
.add(featureTextData
);
329 levels
.add(new Integer(floor
)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature
330 listTextData
.add(statesTextData
);