1 package eu
.etaxonomy
.cdm
.api
.service
;
3 import java
.util
.ArrayList
;
4 import java
.util
.HashSet
;
5 import java
.util
.Iterator
;
10 import org
.apache
.commons
.lang
.StringUtils
;
11 import org
.springframework
.stereotype
.Component
;
13 import eu
.etaxonomy
.cdm
.model
.description
.CategoricalData
;
14 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
15 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
16 import eu
.etaxonomy
.cdm
.model
.description
.FeatureNode
;
17 import eu
.etaxonomy
.cdm
.model
.description
.FeatureTree
;
18 import eu
.etaxonomy
.cdm
.model
.description
.QuantitativeData
;
19 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
20 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
21 import eu
.etaxonomy
.cdm
.model
.description
.TextFormat
;
22 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
23 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
24 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
28 * Generator of natural language descriptions from TaxonDescriptions.
35 public class NaturalLanguageGenerator
implements INaturalLanguageGenerator
{
37 private String firstSeparator
= ",";
38 private String secondSeparator
= ".";
39 private List
<Integer
> levels
= new ArrayList
<Integer
>();
41 private DescriptionBuilder
<QuantitativeData
> quantitativeDescriptionBuilder
= new DefaultQuantitativeDescriptionBuilder();
42 private DescriptionBuilder
<CategoricalData
> categoricalDescriptionBuilder
= new DefaultCategoricalDescriptionBuilder();
44 private TextData previousTextData
;
46 private Map
<String
, INaturalLanguageTextDataProcessor
> elementProcessors
;
48 private Set
<INaturalLanguageTextDataProcessor
> applicableElementProcessors
= new HashSet
<INaturalLanguageTextDataProcessor
>();
51 * Change the first separator used by generateSingleTextData. By default ",".
55 public void setFirstSeparator(String separator
){
56 firstSeparator
=separator
;
59 public String
getFirstSeparator(){
60 return firstSeparator
;
64 * Change the second separator used by generateSingleTextData. By default ".".
68 public void setSecondSeparator(String separator
){
69 secondSeparator
=separator
;
72 public String
getSecondSeparator(){
73 return secondSeparator
;
77 * @param quantitativeDescriptionBuilder
79 public void setQuantitativeDescriptionBuilder(DescriptionBuilder
<QuantitativeData
> quantitativeDescriptionBuilder
){
80 this.quantitativeDescriptionBuilder
= quantitativeDescriptionBuilder
;
84 * @param categoricalDescriptionBuilder
86 public void setCategoricalDescriptionBuilder(DescriptionBuilder
<CategoricalData
> categoricalDescriptionBuilder
){
87 this.categoricalDescriptionBuilder
= categoricalDescriptionBuilder
;
91 * @return the element processors of this generator
93 public Map
<String
, INaturalLanguageTextDataProcessor
> getElementProcessors() {
94 return elementProcessors
;
98 * The keys of the elementProcessors map are regular expressions which are
99 * being used to identify the those Descriptions to which the mapped
100 * NaturalLanguageTextDataProcessor is applicable.
102 * @param elementProcessors
104 public void setElementProcessors(
105 Map
<String
, INaturalLanguageTextDataProcessor
> elementProcessors
) {
106 this.elementProcessors
= elementProcessors
;
110 * Looks for technical annotations, if one matches a regular expression of the element processors
111 * the associated processor is added to the applicable element processors which will then be applied
112 * when generating the description.
114 * @param annotations the set of annotations of the description
116 private void initNaturalLanguageDescriptionElementProcessors(Set
<Annotation
> annotations
) {
118 if(annotations
!= null){
119 for(Annotation annotation
: annotations
){
120 if(annotation
.getAnnotationType().equals(AnnotationType
.TECHNICAL())){
121 if (elementProcessors
!=null){
122 for(String regex
: elementProcessors
.keySet()){
123 if(annotation
.getText().matches(regex
)){
124 applicableElementProcessors
.add(elementProcessors
.get(regex
));
135 * Applies the list of applicable processors to a TextData.
137 * @param textData the TextData to be modified
138 * @param previousTextData the TextData corresponding to the feature of the previous level in the tree
140 private void applyNaturalLanguageDescriptionElementProcessors(TextData textData
, TextData previousTextData
){
141 for(INaturalLanguageTextDataProcessor processor
: applicableElementProcessors
){
142 processor
.process(textData
, previousTextData
);
148 * The most simple function to generate a description. The language used is the default one.
150 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
151 * @param description the TaxonDescription with all the data
153 * @return a list of TextData, each one being a basic element of the natural language description
155 public List
<TextData
> generateNaturalLanguageDescription(FeatureTree featureTree
,TaxonDescription description
) {
156 return generateNaturalLanguageDescription(featureTree
,description
,Language
.DEFAULT());
162 * Generate a description in a specified language.
164 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
165 * @param description the TaxonDescription with all the data
166 * @param language the language in which the description has to be printed
168 * @return a list of TextData, each one being a basic element of the natural language description
170 public List
<TextData
> generateNaturalLanguageDescription(FeatureTree featureTree
, TaxonDescription description
, Language language
) {
171 List
<Language
> languages
= new ArrayList
<Language
>();
172 languages
.add(language
);
173 initNaturalLanguageDescriptionElementProcessors(description
.getAnnotations());
174 return generatePreferredNaturalLanguageDescription(featureTree
,description
,languages
);
178 * Generate a description with a specified list of preferred languages.
180 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
181 * @param description the TaxonDescription with all the data
182 * @param languages the ordered list of languages preferred for printing the description
184 * @return a list of TextData, each one being a basic element of the natural language description
186 public List
<TextData
> generatePreferredNaturalLanguageDescription(FeatureTree featureTree
,TaxonDescription description
, List
<Language
> languages
) {
187 initNaturalLanguageDescriptionElementProcessors(description
.getAnnotations());
188 return buildBranchesDescr(featureTree
.getRootChildren(), featureTree
.getRoot(), description
, languages
,0);
192 * Generate a description as a single paragraph in a TextData.
194 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
195 * @param description the TaxonDescription with all the data
197 * @return a TextData in the default language.
199 public TextData
generateSingleTextData(FeatureTree featureTree
, TaxonDescription description
) {
200 return generateSingleTextData(featureTree
,description
,Language
.DEFAULT());
204 * Generate a description as a single paragraph in a TextData.
206 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
207 * @param description the TaxonDescription with all the data
208 * @param language the language in which the description has to be printed
210 * @return a TextData in the specified language.
212 public TextData
generateSingleTextData(FeatureTree featureTree
, TaxonDescription description
, Language language
) {
213 List
<Language
> languages
= new ArrayList
<Language
>();
214 languages
.add(language
);
215 return generatePreferredSingleTextData(featureTree
,description
,languages
);
219 * Generate a description with a specified list of preferred languages.
221 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
222 * @param description the TaxonDescription with all the data
223 * @param languages the ordered list of languages preferred for printing the description
225 * @return a TextData using the languages (in the given order of preference)
227 public TextData
generatePreferredSingleTextData(FeatureTree featureTree
, TaxonDescription description
, List
<Language
> languages
) {
229 initNaturalLanguageDescriptionElementProcessors(description
.getAnnotations());
231 List
<TextData
> texts
= buildBranchesDescr(featureTree
.getRootChildren(), featureTree
.getRoot(), description
, languages
,0);
232 StringBuilder sb
= new StringBuilder();
234 boolean startSentence
= false, firstOne
= true;
237 for (j
=0 ; j
<levels
.size() ; j
++){
238 level
= levels
.get(j
);
239 if (level
.equals(-1)){
240 if ((j
+1)<levels
.size() && levels
.get(j
+1).equals(0)){
241 if (!firstOne
) sb
.append(secondSeparator
+ " ");
244 String asString
= texts
.get(i
).getText(Language
.DEFAULT()).toString();
245 if (asString
.length()>1) sb
.append(asString
.substring(0,1).toUpperCase() + asString
.substring(1));
249 else if (level
.equals(0)) {
250 if (startSentence
) sb
.append(texts
.get(i
).getText(Language
.DEFAULT()));
251 else sb
.append(firstSeparator
+ texts
.get(i
).getText(Language
.DEFAULT()));
255 else if (!level
.equals(0) && !level
.equals(-1)){
256 if (!firstOne
&& levels
.get(j
-1).equals(0)){
257 if (i
<texts
.size()) sb
.append(texts
.get(i
).getText(Language
.DEFAULT()));
262 sb
.append(secondSeparator
);
263 String returnString
= sb
.toString();
264 returnString
= StringUtils
.replace(returnString
, " ", " ");
265 returnString
= StringUtils
.removeStart(returnString
, secondSeparator
);
266 return TextData
.NewInstance(StringUtils
.replace(sb
.toString(), " ", " "),Language
.DEFAULT(),TextFormat
.NewInstance("", "Text", ""));
271 /** recursive function that goes through a tree containing the order in which the description has to be generated,
272 * if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
274 * @param children the children of the feature node considered
275 * @param parent the feature node considered
276 * @param description the TaxonDescription element for which we want a natural language output
277 * @param language The language in which the description has to be written
278 * @return a list of TextData elements containing the part of description corresponding to the feature node considered
280 private List
<TextData
> buildBranchesDescr(List
<FeatureNode
> children
, FeatureNode parent
, TaxonDescription description
, List
<Language
> languages
, int floor
) {
281 List
<TextData
> listTextData
= new ArrayList
<TextData
>();
282 floor
++; // counter to know the current level in the tree
284 if (!parent
.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
285 levels
.add(new Integer(floor
)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List<TextData>
286 Feature feature
= parent
.getFeature();
287 TextData featureName
;
288 if (feature
!=null && feature
.getLabel()!=null){ // if a node is associated to a feature
289 featureName
= categoricalDescriptionBuilder
.buildTextDataFeature(feature
, languages
);
290 levels
.add(new Integer(-1)); // it is indicated by a '-1' after its level
291 listTextData
.add(featureName
); // the TextData representing the name of the feature is concatenated to the list
293 else featureName
= new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached)
295 for (Iterator
<FeatureNode
> ifn
= children
.iterator() ; ifn
.hasNext() ;){
296 previousTextData
= featureName
; // this allows to keep track of the name of the feature one level up in the tree
297 FeatureNode fn
= ifn
.next();
298 listTextData
.addAll(buildBranchesDescr(fn
.getChildren(),fn
,description
, languages
, floor
));
301 else { //once a leaf is reached
302 Feature feature
= parent
.getFeature();
303 if (feature
!=null && (feature
.isSupportsQuantitativeData() || feature
.isSupportsCategoricalData())) {
304 Set
<DescriptionElementBase
> elements
= description
.getElements();
305 for (Iterator
<DescriptionElementBase
> deb
= elements
.iterator() ; deb
.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
306 DescriptionElementBase descriptionElement
= deb
.next();
307 if (descriptionElement
.getFeature().equals(feature
)){ // if one matches the corresponding feature associated to this leaf
308 if (descriptionElement
instanceof CategoricalData
|| descriptionElement
instanceof QuantitativeData
){
309 TextData featureTextData
;
310 TextData statesTextData
;
311 if (descriptionElement
instanceof CategoricalData
) { // if this description is a CategoricalData, generate the according TextData
312 CategoricalData categoricalData
= (CategoricalData
) descriptionElement
;
313 statesTextData
= categoricalDescriptionBuilder
.build(categoricalData
, languages
);
314 featureTextData
= categoricalDescriptionBuilder
.buildTextDataFeature(feature
, languages
);
316 else { // if this description is a QuantitativeData, generate the according TextData
317 QuantitativeData quantitativeData
= (QuantitativeData
) descriptionElement
;
318 statesTextData
= quantitativeDescriptionBuilder
.build(quantitativeData
, languages
);
319 featureTextData
= quantitativeDescriptionBuilder
.buildTextDataFeature(feature
, languages
);
321 applyNaturalLanguageDescriptionElementProcessors(featureTextData
, previousTextData
);
322 levels
.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree
323 listTextData
.add(featureTextData
);
324 levels
.add(new Integer(floor
)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature
325 listTextData
.add(statesTextData
);