latest changes for findByIdentifier service
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / NaturalLanguageGenerator.java
1 package eu.etaxonomy.cdm.api.service;
2
3 import java.util.ArrayList;
4 import java.util.HashSet;
5 import java.util.Iterator;
6 import java.util.List;
7 import java.util.Map;
8 import java.util.Set;
9
10 import org.apache.commons.lang.StringUtils;
11 import org.apache.log4j.Logger;
12 import org.springframework.stereotype.Component;
13
14 import eu.etaxonomy.cdm.model.description.CategoricalData;
15 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
16 import eu.etaxonomy.cdm.model.description.Feature;
17 import eu.etaxonomy.cdm.model.description.FeatureNode;
18 import eu.etaxonomy.cdm.model.description.FeatureTree;
19 import eu.etaxonomy.cdm.model.description.QuantitativeData;
20 import eu.etaxonomy.cdm.model.description.TaxonDescription;
21 import eu.etaxonomy.cdm.model.description.TextData;
22 import eu.etaxonomy.cdm.model.description.TextFormat;
23 import eu.etaxonomy.cdm.model.common.Annotation;
24 import eu.etaxonomy.cdm.model.common.AnnotationType;
25 import eu.etaxonomy.cdm.model.common.Language;
26
27
28 /**
29 * Generator of natural language descriptions from TaxonDescriptions.
30 *
31 * @author m.venin
32 * @created 13.04.2010
33 * @version 1.0
34 */
35 @Component
36 public class NaturalLanguageGenerator implements INaturalLanguageGenerator {
37 @SuppressWarnings("unused")
38 private static final Logger logger = Logger.getLogger(NaturalLanguageGenerator.class);
39
40 private String firstSeparator = ",";
41 private String secondSeparator = ".";
42 private List<Integer> levels = new ArrayList<Integer>();
43
44 private DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder = new DefaultQuantitativeDescriptionBuilder();
45 private DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder = new DefaultCategoricalDescriptionBuilder();
46
47 private TextData previousTextData;
48
49 DeltaTextDataProcessor deltaTextDataProcessor = new DeltaTextDataProcessor();
50
51 private Map<String, INaturalLanguageTextDataProcessor> elementProcessors;
52
53 private Set<INaturalLanguageTextDataProcessor> applicableElementProcessors = new HashSet<INaturalLanguageTextDataProcessor>();
54
55 /**
56 * Change the first separator used by generateSingleTextData. By default ",".
57 *
58 * @param separator
59 */
60 public void setFirstSeparator(String separator){
61 firstSeparator=separator;
62 }
63
64 public String getFirstSeparator(){
65 return firstSeparator;
66 }
67
68 /**
69 * Change the second separator used by generateSingleTextData. By default ".".
70 *
71 * @param separator
72 */
73 public void setSecondSeparator(String separator){
74 secondSeparator=separator;
75 }
76
77 public String getSecondSeparator(){
78 return secondSeparator;
79 }
80
81 /**
82 * @param quantitativeDescriptionBuilder
83 */
84 public void setQuantitativeDescriptionBuilder(DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder){
85 this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder;
86 }
87
88 /**
89 * @param categoricalDescriptionBuilder
90 */
91 public void setCategoricalDescriptionBuilder(DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder){
92 this.categoricalDescriptionBuilder = categoricalDescriptionBuilder;
93 }
94
95 /**
96 * @return the element processors of this generator
97 */
98 public Map<String, INaturalLanguageTextDataProcessor> getElementProcessors() {
99 return elementProcessors;
100 }
101
102 /**
103 * The keys of the elementProcessors map are regular expressions which are
104 * being used to identify the those Descriptions to which the mapped
105 * NaturalLanguageTextDataProcessor is applicable.
106 *
107 * @param elementProcessors
108 */
109 public void setElementProcessors(
110 Map<String, INaturalLanguageTextDataProcessor> elementProcessors) {
111 this.elementProcessors = elementProcessors;
112 }
113
114 /**
115 * Looks for technical annotations, if one matches a regular expression of the element processors
116 * the associated processor is added to the applicable element processors which will then be applied
117 * when generating the description.
118 *
119 * @param annotations the set of annotations of the description
120 */
121 private void initNaturalLanguageDescriptionElementProcessors(Set<Annotation> annotations) {
122
123 if(annotations != null){
124 for(Annotation annotation : annotations){
125 if(annotation.getAnnotationType().equals(AnnotationType.TECHNICAL())){
126 if (elementProcessors!=null){
127 for(String regex : elementProcessors.keySet()){
128 if(annotation.getText().matches(regex)){
129 applicableElementProcessors.add(elementProcessors.get(regex));
130 }
131 }
132 }
133 }
134 }
135 }
136 }
137
138
139 /**
140 * Applies the list of applicable processors to a TextData.
141 *
142 * @param textData the TextData to be modified
143 * @param previousTextData the TextData corresponding to the feature of the previous level in the tree
144 */
145 private void applyNaturalLanguageDescriptionElementProcessors(TextData textData, TextData previousTextData){
146 for(INaturalLanguageTextDataProcessor processor : applicableElementProcessors){
147 processor.process(textData, previousTextData);
148 }
149 }
150
151
152 /**
153 * The most simple function to generate a description. The language used is the default one.
154 *
155 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
156 * @param description the TaxonDescription with all the data
157 *
158 * @return a list of TextData, each one being a basic element of the natural language description
159 */
160 public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description) {
161 return generateNaturalLanguageDescription(featureTree,description,Language.DEFAULT());
162 }
163
164
165
166 /**
167 * Generate a description in a specified language.
168 *
169 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
170 * @param description the TaxonDescription with all the data
171 * @param language the language in which the description has to be printed
172 *
173 * @return a list of TextData, each one being a basic element of the natural language description
174 */
175 public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description, Language language) {
176 List<Language> languages = new ArrayList<Language>();
177 languages.add(language);
178 initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
179 return generatePreferredNaturalLanguageDescription(featureTree,description,languages);
180 }
181
182 /**
183 * Generate a description with a specified list of preferred languages.
184 *
185 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
186 * @param description the TaxonDescription with all the data
187 * @param languages the ordered list of languages preferred for printing the description
188 *
189 * @return a list of TextData, each one being a basic element of the natural language description
190 */
191 public List<TextData> generatePreferredNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description, List<Language> languages) {
192 initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
193 return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0);
194 }
195
196 /**
197 * Generate a description as a single paragraph in a TextData.
198 *
199 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
200 * @param description the TaxonDescription with all the data
201 *
202 * @return a TextData in the default language.
203 */
204 public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description) {
205 return generateSingleTextData(featureTree,description,Language.DEFAULT());
206 }
207
208 /**
209 * Generate a description as a single paragraph in a TextData.
210 *
211 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
212 * @param description the TaxonDescription with all the data
213 * @param language the language in which the description has to be printed
214 *
215 * @return a TextData in the specified language.
216 */
217 public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description, Language language) {
218 List<Language> languages = new ArrayList<Language>();
219 languages.add(language);
220 return generatePreferredSingleTextData(featureTree,description,languages);
221 }
222
223 /**
224 * Generate a description with a specified list of preferred languages.
225 *
226 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
227 * @param description the TaxonDescription with all the data
228 * @param languages the ordered list of languages preferred for printing the description
229 *
230 * @return a TextData using the languages (in the given order of preference)
231 */
232 public TextData generatePreferredSingleTextData(FeatureTree featureTree, TaxonDescription description, List<Language> languages) {
233 levels.clear(); // before the start, the table containing the levels of each node must be cleared
234 // Note: this is not the most efficient way to keep track of the levels of the nodes but it allows some flexibility
235 List<TextData> texts = generatePreferredNaturalLanguageDescription(featureTree,description, languages);// first get the description as a raw list of TextData
236
237 StringBuilder descriptionStringBuilder = new StringBuilder(); // the StringBuilder used to generate the description
238 int i = 0,j,level; // i is used to store the index of the TextData to use
239 boolean startSentence = false, firstOne = true;
240
241 for (j=0 ; j<levels.size() ; j++){
242 level = levels.get(j);
243 if (level==-1){
244 if ((j+1)<levels.size() && levels.get(j+1).equals(0)){ // if this node is the direct father of a leaf
245 descriptionStringBuilder.append(secondSeparator + " ");
246 startSentence=true;
247 firstOne=false;
248 String asString = texts.get(i).getText(Language.DEFAULT()).toString();
249 if (asString.length()>1) descriptionStringBuilder.append(asString.substring(0,1).toUpperCase() + asString.substring(1));
250 }
251 i++;
252 }
253 else if (level==0) { // if this node is a leaf
254 if (startSentence) descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
255 else descriptionStringBuilder.append(firstSeparator + texts.get(i).getText(Language.DEFAULT()));
256 startSentence=false;
257 i++;
258 }
259 else {
260 if (!firstOne && levels.get(j-1).equals(0)){ // if this node corresponds to the states linked to the previous leaf
261 if (i<texts.size()) descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
262 i++;
263 }
264 }
265 }
266 descriptionStringBuilder.append(secondSeparator);
267 String returnString = descriptionStringBuilder.toString();
268 returnString = StringUtils.replace(returnString, " ", " ");
269 returnString = StringUtils.removeStart(returnString, secondSeparator + " ");
270 return TextData.NewInstance(returnString,Language.DEFAULT(),TextFormat.NewInstance("", "Text", ""));
271 }
272
273
274
275 /** recursive function that goes through a tree containing the order in which the description has to be generated,
276 * if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
277 *
278 * @param children the children of the feature node considered
279 * @param parent the feature node considered
280 * @param description the TaxonDescription element for which we want a natural language output
281 * @param language The language in which the description has to be written
282 * @param floor integer to keep track of the level in the tree
283 * @return a list of TextData elements containing the part of description corresponding to the feature node considered
284 */
285 private List<TextData> buildBranchesDescr(List<FeatureNode> children, FeatureNode parent, TaxonDescription description, List<Language> languages, int floor) {
286 List<TextData> listTextData = new ArrayList<TextData>();
287 floor++; // counter to know the current level in the tree
288
289 if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
290 levels.add(new Integer(floor)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List<TextData>
291 Feature feature = parent.getFeature();
292 TextData featureName;
293 if (feature!=null && feature.getLabel()!=null){ // if a node is associated to a feature
294 featureName = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
295 levels.add(new Integer(-1)); // it is indicated by a '-1' after its level
296 listTextData.add(featureName); // the TextData representing the name of the feature is concatenated to the list
297 }
298 else featureName = new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached)
299
300 for (Iterator<FeatureNode> ifn = children.iterator() ; ifn.hasNext() ;){
301 previousTextData = featureName; // this allows to keep track of the name of the feature one level up in the tree
302 FeatureNode fn = ifn.next();
303 listTextData.addAll(buildBranchesDescr(fn.getChildNodes(),fn,description, languages, floor));
304 }
305 }
306 else { //once a leaf is reached
307 Feature feature = parent.getFeature();
308 if (feature!=null && (feature.isSupportsQuantitativeData() || feature.isSupportsCategoricalData())) {
309 Set<DescriptionElementBase> elements = description.getElements();
310 for (Iterator<DescriptionElementBase> deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
311 DescriptionElementBase descriptionElement = deb.next();
312 if (descriptionElement.getFeature().equals(feature)){ // if one matches the corresponding feature associated to this leaf
313 if (descriptionElement instanceof CategoricalData || descriptionElement instanceof QuantitativeData){
314 TextData featureTextData;
315 TextData statesTextData;
316 if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData
317 CategoricalData categoricalData = (CategoricalData) descriptionElement;
318 statesTextData = categoricalDescriptionBuilder.build(categoricalData, languages);
319 featureTextData = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
320 }
321 else { // if this description is a QuantitativeData, generate the according TextData
322 QuantitativeData quantitativeData = (QuantitativeData) descriptionElement;
323 statesTextData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
324 featureTextData = quantitativeDescriptionBuilder.buildTextDataFeature(feature, languages);
325 }
326 applyNaturalLanguageDescriptionElementProcessors(featureTextData, previousTextData);
327 levels.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree
328 listTextData.add(featureTextData);
329 levels.add(new Integer(floor)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature
330 listTextData.add(statesTextData);
331 }
332 }
333 }
334 }
335 }
336 return listTextData;
337 }
338
339 }