Corrected the inheritance of service configurators
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / NaturalLanguageGenerator.java
1 package eu.etaxonomy.cdm.api.service;
2
3 import java.util.ArrayList;
4 import java.util.HashSet;
5 import java.util.Iterator;
6 import java.util.List;
7 import java.util.Map;
8 import java.util.Set;
9
10 import org.apache.commons.lang.StringUtils;
11 import org.apache.log4j.Logger;
12 import org.springframework.stereotype.Component;
13
14 import eu.etaxonomy.cdm.model.description.CategoricalData;
15 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
16 import eu.etaxonomy.cdm.model.description.Feature;
17 import eu.etaxonomy.cdm.model.description.FeatureNode;
18 import eu.etaxonomy.cdm.model.description.FeatureTree;
19 import eu.etaxonomy.cdm.model.description.QuantitativeData;
20 import eu.etaxonomy.cdm.model.description.TaxonDescription;
21 import eu.etaxonomy.cdm.model.description.TextData;
22 import eu.etaxonomy.cdm.model.description.TextFormat;
23 import eu.etaxonomy.cdm.model.common.Annotation;
24 import eu.etaxonomy.cdm.model.common.AnnotationType;
25 import eu.etaxonomy.cdm.model.common.Language;
26
27
28 /**
29 * Generator of natural language descriptions from TaxonDescriptions.
30 *
31 * @author m.venin
32 * @created 13.04.2010
33 * @version 1.0
34 */
35 @Component
36 public class NaturalLanguageGenerator implements INaturalLanguageGenerator {
37 @SuppressWarnings("unused")
38 private static final Logger logger = Logger.getLogger(NaturalLanguageGenerator.class);
39
40 private String firstSeparator = ",";
41 private String secondSeparator = ".";
42 private List<Integer> levels = new ArrayList<Integer>();
43
44 private DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder = new DefaultQuantitativeDescriptionBuilder();
45 private DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder = new DefaultCategoricalDescriptionBuilder();
46
47 private TextData previousTextData;
48
49 private Map<String, INaturalLanguageTextDataProcessor> elementProcessors;
50
51 private Set<INaturalLanguageTextDataProcessor> applicableElementProcessors = new HashSet<INaturalLanguageTextDataProcessor>();
52
53 /**
54 * Change the first separator used by generateSingleTextData. By default ",".
55 *
56 * @param separator
57 */
58 public void setFirstSeparator(String separator){
59 firstSeparator=separator;
60 }
61
62 public String getFirstSeparator(){
63 return firstSeparator;
64 }
65
66 /**
67 * Change the second separator used by generateSingleTextData. By default ".".
68 *
69 * @param separator
70 */
71 public void setSecondSeparator(String separator){
72 secondSeparator=separator;
73 }
74
75 public String getSecondSeparator(){
76 return secondSeparator;
77 }
78
79 /**
80 * @param quantitativeDescriptionBuilder
81 */
82 public void setQuantitativeDescriptionBuilder(DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder){
83 this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder;
84 }
85
86 /**
87 * @param categoricalDescriptionBuilder
88 */
89 public void setCategoricalDescriptionBuilder(DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder){
90 this.categoricalDescriptionBuilder = categoricalDescriptionBuilder;
91 }
92
93 /**
94 * @return the element processors of this generator
95 */
96 public Map<String, INaturalLanguageTextDataProcessor> getElementProcessors() {
97 return elementProcessors;
98 }
99
100 /**
101 * The keys of the elementProcessors map are regular expressions which are
102 * being used to identify the those Descriptions to which the mapped
103 * NaturalLanguageTextDataProcessor is applicable.
104 *
105 * @param elementProcessors
106 */
107 public void setElementProcessors(
108 Map<String, INaturalLanguageTextDataProcessor> elementProcessors) {
109 this.elementProcessors = elementProcessors;
110 }
111
112 /**
113 * Looks for technical annotations, if one matches a regular expression of the element processors
114 * the associated processor is added to the applicable element processors which will then be applied
115 * when generating the description.
116 *
117 * @param annotations the set of annotations of the description
118 */
119 private void initNaturalLanguageDescriptionElementProcessors(Set<Annotation> annotations) {
120
121 if(annotations != null){
122 for(Annotation annotation : annotations){
123 if(annotation.getAnnotationType().equals(AnnotationType.TECHNICAL())){
124 if (elementProcessors!=null){
125 for(String regex : elementProcessors.keySet()){
126 if(annotation.getText().matches(regex)){
127 applicableElementProcessors.add(elementProcessors.get(regex));
128 }
129 }
130 }
131 }
132 }
133 }
134 }
135
136
137 /**
138 * Applies the list of applicable processors to a TextData.
139 *
140 * @param textData the TextData to be modified
141 * @param previousTextData the TextData corresponding to the feature of the previous level in the tree
142 */
143 private void applyNaturalLanguageDescriptionElementProcessors(TextData textData, TextData previousTextData){
144 for(INaturalLanguageTextDataProcessor processor : applicableElementProcessors){
145 processor.process(textData, previousTextData);
146 }
147 }
148
149
150 /**
151 * The most simple function to generate a description. The language used is the default one.
152 *
153 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
154 * @param description the TaxonDescription with all the data
155 *
156 * @return a list of TextData, each one being a basic element of the natural language description
157 */
158 public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description) {
159 return generateNaturalLanguageDescription(featureTree,description,Language.DEFAULT());
160 }
161
162
163
164 /**
165 * Generate a description in a specified language.
166 *
167 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
168 * @param description the TaxonDescription with all the data
169 * @param language the language in which the description has to be printed
170 *
171 * @return a list of TextData, each one being a basic element of the natural language description
172 */
173 public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description, Language language) {
174 List<Language> languages = new ArrayList<Language>();
175 languages.add(language);
176 initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
177 return generatePreferredNaturalLanguageDescription(featureTree,description,languages);
178 }
179
180 /**
181 * Generate a description with a specified list of preferred languages.
182 *
183 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
184 * @param description the TaxonDescription with all the data
185 * @param languages the ordered list of languages preferred for printing the description
186 *
187 * @return a list of TextData, each one being a basic element of the natural language description
188 */
189 public List<TextData> generatePreferredNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description, List<Language> languages) {
190 initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
191 return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0);
192 }
193
194 /**
195 * Generate a description as a single paragraph in a TextData.
196 *
197 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
198 * @param description the TaxonDescription with all the data
199 *
200 * @return a TextData in the default language.
201 */
202 public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description) {
203 return generateSingleTextData(featureTree,description,Language.DEFAULT());
204 }
205
206 /**
207 * Generate a description as a single paragraph in a TextData.
208 *
209 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
210 * @param description the TaxonDescription with all the data
211 * @param language the language in which the description has to be printed
212 *
213 * @return a TextData in the specified language.
214 */
215 public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description, Language language) {
216 List<Language> languages = new ArrayList<Language>();
217 languages.add(language);
218 return generatePreferredSingleTextData(featureTree,description,languages);
219 }
220
221 /**
222 * Generate a description with a specified list of preferred languages.
223 *
224 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
225 * @param description the TaxonDescription with all the data
226 * @param languages the ordered list of languages preferred for printing the description
227 *
228 * @return a TextData using the languages (in the given order of preference)
229 */
230 public TextData generatePreferredSingleTextData(FeatureTree featureTree, TaxonDescription description, List<Language> languages) {
231 levels.clear(); // before the start, the table containing the levels of each node must be cleared
232 // Note: this is not the most efficient way to keep track of the levels of the nodes but it allows some flexibility
233 List<TextData> texts = generatePreferredNaturalLanguageDescription(featureTree,description, languages);// first get the description as a raw list of TextData
234
235 StringBuilder descriptionStringBuilder = new StringBuilder(); // the StringBuilder used to generate the description
236 int i = 0,j,level; // i is used to store the index of the TextData to use
237 boolean startSentence = false, firstOne = true;
238
239 for (j=0 ; j<levels.size() ; j++){
240 level = levels.get(j);
241 if (level==-1){
242 if ((j+1)<levels.size() && levels.get(j+1).equals(0)){ // if this node is the direct father of a leaf
243 descriptionStringBuilder.append(secondSeparator + " ");
244 startSentence=true;
245 firstOne=false;
246 String asString = texts.get(i).getText(Language.DEFAULT()).toString();
247 if (asString.length()>1) descriptionStringBuilder.append(asString.substring(0,1).toUpperCase() + asString.substring(1));
248 }
249 i++;
250 }
251 else if (level==0) { // if this node is a leaf
252 if (startSentence) descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
253 else descriptionStringBuilder.append(firstSeparator + texts.get(i).getText(Language.DEFAULT()));
254 startSentence=false;
255 i++;
256 }
257 else {
258 if (!firstOne && levels.get(j-1).equals(0)){ // if this node corresponds to the states linked to the previous leaf
259 if (i<texts.size()) descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
260 i++;
261 }
262 }
263 }
264 descriptionStringBuilder.append(secondSeparator);
265 String returnString = descriptionStringBuilder.toString();
266 returnString = StringUtils.replace(returnString, " ", " ");
267 returnString = StringUtils.removeStart(returnString, secondSeparator + " ");
268 return TextData.NewInstance(returnString,Language.DEFAULT(),TextFormat.NewInstance("", "Text", ""));
269 }
270
271
272
273 /** recursive function that goes through a tree containing the order in which the description has to be generated,
274 * if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
275 *
276 * @param children the children of the feature node considered
277 * @param parent the feature node considered
278 * @param description the TaxonDescription element for which we want a natural language output
279 * @param language The language in which the description has to be written
280 * @param floor integer to keep track of the level in the tree
281 * @return a list of TextData elements containing the part of description corresponding to the feature node considered
282 */
283 private List<TextData> buildBranchesDescr(List<FeatureNode> children, FeatureNode parent, TaxonDescription description, List<Language> languages, int floor) {
284 List<TextData> listTextData = new ArrayList<TextData>();
285 floor++; // counter to know the current level in the tree
286
287 if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
288 levels.add(new Integer(floor)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List<TextData>
289 Feature feature = parent.getFeature();
290 TextData featureName;
291 if (feature!=null && feature.getLabel()!=null){ // if a node is associated to a feature
292 featureName = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
293 levels.add(new Integer(-1)); // it is indicated by a '-1' after its level
294 listTextData.add(featureName); // the TextData representing the name of the feature is concatenated to the list
295 }
296 else featureName = new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached)
297
298 for (Iterator<FeatureNode> ifn = children.iterator() ; ifn.hasNext() ;){
299 previousTextData = featureName; // this allows to keep track of the name of the feature one level up in the tree
300 FeatureNode fn = ifn.next();
301 listTextData.addAll(buildBranchesDescr(fn.getChildren(),fn,description, languages, floor));
302 }
303 }
304 else { //once a leaf is reached
305 Feature feature = parent.getFeature();
306 if (feature!=null && (feature.isSupportsQuantitativeData() || feature.isSupportsCategoricalData())) {
307 Set<DescriptionElementBase> elements = description.getElements();
308 for (Iterator<DescriptionElementBase> deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
309 DescriptionElementBase descriptionElement = deb.next();
310 if (descriptionElement.getFeature().equals(feature)){ // if one matches the corresponding feature associated to this leaf
311 if (descriptionElement instanceof CategoricalData || descriptionElement instanceof QuantitativeData){
312 TextData featureTextData;
313 TextData statesTextData;
314 if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData
315 CategoricalData categoricalData = (CategoricalData) descriptionElement;
316 statesTextData = categoricalDescriptionBuilder.build(categoricalData, languages);
317 featureTextData = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
318 }
319 else { // if this description is a QuantitativeData, generate the according TextData
320 QuantitativeData quantitativeData = (QuantitativeData) descriptionElement;
321 statesTextData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
322 featureTextData = quantitativeDescriptionBuilder.buildTextDataFeature(feature, languages);
323 }
324 applyNaturalLanguageDescriptionElementProcessors(featureTextData, previousTextData);
325 levels.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree
326 listTextData.add(featureTextData);
327 levels.add(new Integer(floor)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature
328 listTextData.add(statesTextData);
329 }
330 }
331 }
332 }
333 }
334 return listTextData;
335 }
336
337 }