1 package eu
.etaxonomy
.cdm
.api
.service
;
3 import java
.util
.ArrayList
;
4 import java
.util
.HashSet
;
5 import java
.util
.Iterator
;
10 import org
.apache
.commons
.lang
.StringUtils
;
11 import org
.apache
.log4j
.Logger
;
12 import org
.springframework
.stereotype
.Component
;
14 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
15 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
16 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
17 import eu
.etaxonomy
.cdm
.model
.description
.CategoricalData
;
18 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
19 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
20 import eu
.etaxonomy
.cdm
.model
.description
.QuantitativeData
;
21 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
22 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
23 import eu
.etaxonomy
.cdm
.model
.description
.TextFormat
;
24 import eu
.etaxonomy
.cdm
.model
.term
.TermTree
;
25 import eu
.etaxonomy
.cdm
.model
.term
.TermNode
;
29 * Generator of natural language descriptions from TaxonDescriptions.
35 public class NaturalLanguageGenerator
implements INaturalLanguageGenerator
{
36 @SuppressWarnings("unused")
37 private static final Logger logger
= Logger
.getLogger(NaturalLanguageGenerator
.class);
39 private String firstSeparator
= ",";
40 private String secondSeparator
= ".";
41 private List
<Integer
> levels
= new ArrayList
<Integer
>();
43 private DescriptionBuilder
<QuantitativeData
> quantitativeDescriptionBuilder
= new DefaultQuantitativeDescriptionBuilder();
44 private DescriptionBuilder
<CategoricalData
> categoricalDescriptionBuilder
= new DefaultCategoricalDescriptionBuilder();
46 private TextData previousTextData
;
48 DeltaTextDataProcessor deltaTextDataProcessor
= new DeltaTextDataProcessor();
50 private Map
<String
, INaturalLanguageTextDataProcessor
> elementProcessors
;
52 private Set
<INaturalLanguageTextDataProcessor
> applicableElementProcessors
= new HashSet
<INaturalLanguageTextDataProcessor
>();
55 * Change the first separator used by generateSingleTextData. By default ",".
59 public void setFirstSeparator(String separator
){
60 firstSeparator
=separator
;
63 public String
getFirstSeparator(){
64 return firstSeparator
;
68 * Change the second separator used by generateSingleTextData. By default ".".
72 public void setSecondSeparator(String separator
){
73 secondSeparator
=separator
;
76 public String
getSecondSeparator(){
77 return secondSeparator
;
81 * @param quantitativeDescriptionBuilder
83 public void setQuantitativeDescriptionBuilder(DescriptionBuilder
<QuantitativeData
> quantitativeDescriptionBuilder
){
84 this.quantitativeDescriptionBuilder
= quantitativeDescriptionBuilder
;
88 * @param categoricalDescriptionBuilder
90 public void setCategoricalDescriptionBuilder(DescriptionBuilder
<CategoricalData
> categoricalDescriptionBuilder
){
91 this.categoricalDescriptionBuilder
= categoricalDescriptionBuilder
;
95 * @return the element processors of this generator
97 public Map
<String
, INaturalLanguageTextDataProcessor
> getElementProcessors() {
98 return elementProcessors
;
102 * The keys of the elementProcessors map are regular expressions which are
103 * being used to identify the those Descriptions to which the mapped
104 * NaturalLanguageTextDataProcessor is applicable.
106 * @param elementProcessors
108 public void setElementProcessors(
109 Map
<String
, INaturalLanguageTextDataProcessor
> elementProcessors
) {
110 this.elementProcessors
= elementProcessors
;
114 * Looks for technical annotations, if one matches a regular expression of the element processors
115 * the associated processor is added to the applicable element processors which will then be applied
116 * when generating the description.
118 * @param annotations the set of annotations of the description
120 private void initNaturalLanguageDescriptionElementProcessors(Set
<Annotation
> annotations
) {
122 if(annotations
!= null){
123 for(Annotation annotation
: annotations
){
124 if(annotation
.getAnnotationType().equals(AnnotationType
.TECHNICAL())){
125 if (elementProcessors
!=null){
126 for(String regex
: elementProcessors
.keySet()){
127 if(annotation
.getText().matches(regex
)){
128 applicableElementProcessors
.add(elementProcessors
.get(regex
));
139 * Applies the list of applicable processors to a TextData.
141 * @param textData the TextData to be modified
142 * @param previousTextData the TextData corresponding to the feature of the previous level in the tree
144 private void applyNaturalLanguageDescriptionElementProcessors(TextData textData
, TextData previousTextData
){
145 for(INaturalLanguageTextDataProcessor processor
: applicableElementProcessors
){
146 processor
.process(textData
, previousTextData
);
152 * The most simple function to generate a description. The language used is the default one.
154 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
155 * @param description the TaxonDescription with all the data
157 * @return a list of TextData, each one being a basic element of the natural language description
160 public List
<TextData
> generateNaturalLanguageDescription(TermTree featureTree
,TaxonDescription description
) {
161 return generateNaturalLanguageDescription(featureTree
,description
,Language
.DEFAULT());
167 * Generate a description in a specified language.
169 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
170 * @param description the TaxonDescription with all the data
171 * @param language the language in which the description has to be printed
173 * @return a list of TextData, each one being a basic element of the natural language description
176 public List
<TextData
> generateNaturalLanguageDescription(TermTree featureTree
, TaxonDescription description
, Language language
) {
177 List
<Language
> languages
= new ArrayList
<Language
>();
178 languages
.add(language
);
179 initNaturalLanguageDescriptionElementProcessors(description
.getAnnotations());
180 return generatePreferredNaturalLanguageDescription(featureTree
,description
,languages
);
184 * Generate a description with a specified list of preferred languages.
186 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
187 * @param description the TaxonDescription with all the data
188 * @param languages the ordered list of languages preferred for printing the description
190 * @return a list of TextData, each one being a basic element of the natural language description
193 public List
<TextData
> generatePreferredNaturalLanguageDescription(TermTree featureTree
,TaxonDescription description
, List
<Language
> languages
) {
194 initNaturalLanguageDescriptionElementProcessors(description
.getAnnotations());
195 return buildBranchesDescr(featureTree
.getRootChildren(), featureTree
.getRoot(), description
, languages
,0);
199 * Generate a description as a single paragraph in a TextData.
201 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
202 * @param description the TaxonDescription with all the data
204 * @return a TextData in the default language.
207 public TextData
generateSingleTextData(TermTree featureTree
, TaxonDescription description
) {
208 return generateSingleTextData(featureTree
,description
,Language
.DEFAULT());
212 * Generate a description as a single paragraph in a TextData.
214 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
215 * @param description the TaxonDescription with all the data
216 * @param language the language in which the description has to be printed
218 * @return a TextData in the specified language.
221 public TextData
generateSingleTextData(TermTree featureTree
, TaxonDescription description
, Language language
) {
222 List
<Language
> languages
= new ArrayList
<Language
>();
223 languages
.add(language
);
224 return generatePreferredSingleTextData(featureTree
,description
,languages
);
228 * Generate a description with a specified list of preferred languages.
230 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
231 * @param description the TaxonDescription with all the data
232 * @param languages the ordered list of languages preferred for printing the description
234 * @return a TextData using the languages (in the given order of preference)
237 public TextData
generatePreferredSingleTextData(TermTree featureTree
, TaxonDescription description
, List
<Language
> languages
) {
238 levels
.clear(); // before the start, the table containing the levels of each node must be cleared
239 // Note: this is not the most efficient way to keep track of the levels of the nodes but it allows some flexibility
240 List
<TextData
> texts
= generatePreferredNaturalLanguageDescription(featureTree
,description
, languages
);// first get the description as a raw list of TextData
242 StringBuilder descriptionStringBuilder
= new StringBuilder(); // the StringBuilder used to generate the description
243 int i
= 0,j
,level
; // i is used to store the index of the TextData to use
244 boolean startSentence
= false, firstOne
= true;
246 for (j
=0 ; j
<levels
.size() ; j
++){
247 level
= levels
.get(j
);
249 if ((j
+1)<levels
.size() && levels
.get(j
+1).equals(0)){ // if this node is the direct father of a leaf
250 descriptionStringBuilder
.append(secondSeparator
+ " ");
253 String asString
= texts
.get(i
).getText(Language
.DEFAULT()).toString();
254 if (asString
.length()>1) {
255 descriptionStringBuilder
.append(asString
.substring(0,1).toUpperCase() + asString
.substring(1));
260 else if (level
==0) { // if this node is a leaf
262 descriptionStringBuilder
.append(texts
.get(i
).getText(Language
.DEFAULT()));
264 descriptionStringBuilder
.append(firstSeparator
+ texts
.get(i
).getText(Language
.DEFAULT()));
270 if (!firstOne
&& levels
.get(j
-1).equals(0)){ // if this node corresponds to the states linked to the previous leaf
271 if (i
<texts
.size()) {
272 descriptionStringBuilder
.append(texts
.get(i
).getText(Language
.DEFAULT()));
278 descriptionStringBuilder
.append(secondSeparator
);
279 String returnString
= descriptionStringBuilder
.toString();
280 returnString
= StringUtils
.replace(returnString
, " ", " ");
281 returnString
= StringUtils
.removeStart(returnString
, secondSeparator
+ " ");
282 return TextData
.NewInstance(returnString
,Language
.DEFAULT(),TextFormat
.NewInstance("", "Text", ""));
287 /** recursive function that goes through a tree containing the order in which the description has to be generated,
288 * if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
290 * @param children the children of the feature node considered
291 * @param parent the feature node considered
292 * @param description the TaxonDescription element for which we want a natural language output
293 * @param language The language in which the description has to be written
294 * @param floor integer to keep track of the level in the tree
295 * @return a list of TextData elements containing the part of description corresponding to the feature node considered
297 private List
<TextData
> buildBranchesDescr(List
<TermNode
> children
, TermNode
<Feature
> parent
, TaxonDescription description
, List
<Language
> languages
, int floor
) {
298 List
<TextData
> listTextData
= new ArrayList
<TextData
>();
299 floor
++; // counter to know the current level in the tree
301 if (!parent
.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
302 levels
.add(new Integer(floor
)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List<TextData>
303 Feature feature
= parent
.getTerm();
304 TextData featureName
;
305 if (feature
!=null && feature
.getLabel()!=null){ // if a node is associated to a feature
306 featureName
= categoricalDescriptionBuilder
.buildTextDataFeature(feature
, languages
);
307 levels
.add(new Integer(-1)); // it is indicated by a '-1' after its level
308 listTextData
.add(featureName
); // the TextData representing the name of the feature is concatenated to the list
311 featureName
= new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached)
314 for (Iterator
<TermNode
> ifn
= children
.iterator() ; ifn
.hasNext() ;){
315 previousTextData
= featureName
; // this allows to keep track of the name of the feature one level up in the tree
316 TermNode fn
= ifn
.next();
317 listTextData
.addAll(buildBranchesDescr(fn
.getChildNodes(),fn
,description
, languages
, floor
));
320 else { //once a leaf is reached
321 Feature feature
= parent
.getTerm();
322 if (feature
!=null && (feature
.isSupportsQuantitativeData() || feature
.isSupportsCategoricalData())) {
323 Set
<DescriptionElementBase
> elements
= description
.getElements();
324 for (Iterator
<DescriptionElementBase
> deb
= elements
.iterator() ; deb
.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
325 DescriptionElementBase descriptionElement
= deb
.next();
326 if (descriptionElement
.getFeature().equals(feature
)){ // if one matches the corresponding feature associated to this leaf
327 if (descriptionElement
instanceof CategoricalData
|| descriptionElement
instanceof QuantitativeData
){
328 TextData featureTextData
;
329 TextData statesTextData
;
330 if (descriptionElement
instanceof CategoricalData
) { // if this description is a CategoricalData, generate the according TextData
331 CategoricalData categoricalData
= (CategoricalData
) descriptionElement
;
332 statesTextData
= categoricalDescriptionBuilder
.build(categoricalData
, languages
);
333 featureTextData
= categoricalDescriptionBuilder
.buildTextDataFeature(feature
, languages
);
335 else { // if this description is a QuantitativeData, generate the according TextData
336 QuantitativeData quantitativeData
= (QuantitativeData
) descriptionElement
;
337 statesTextData
= quantitativeDescriptionBuilder
.build(quantitativeData
, languages
);
338 featureTextData
= quantitativeDescriptionBuilder
.buildTextDataFeature(feature
, languages
);
340 applyNaturalLanguageDescriptionElementProcessors(featureTextData
, previousTextData
);
341 levels
.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree
342 listTextData
.add(featureTextData
);
343 levels
.add(new Integer(floor
)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature
344 listTextData
.add(statesTextData
);