minor
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / NaturalLanguageGenerator.java
1 package eu.etaxonomy.cdm.api.service;
2
3 import java.util.ArrayList;
4 import java.util.HashMap;
5 import java.util.HashSet;
6 import java.util.Iterator;
7 import java.util.List;
8 import java.util.Map;
9 import java.util.Set;
10 import java.util.regex.Pattern;
11
12 import org.apache.commons.lang.StringUtils;
13 import org.springframework.stereotype.Component;
14
15 import eu.etaxonomy.cdm.model.description.CategoricalData;
16 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
17 import eu.etaxonomy.cdm.model.description.Feature;
18 import eu.etaxonomy.cdm.model.description.FeatureNode;
19 import eu.etaxonomy.cdm.model.description.FeatureTree;
20 import eu.etaxonomy.cdm.model.description.QuantitativeData;
21 import eu.etaxonomy.cdm.model.description.TaxonDescription;
22 import eu.etaxonomy.cdm.model.description.TextData;
23 import eu.etaxonomy.cdm.model.common.Annotation;
24 import eu.etaxonomy.cdm.model.common.AnnotationType;
25 import eu.etaxonomy.cdm.model.common.Language;
26
27 @Component
28 public class NaturalLanguageGenerator implements INaturalLanguageGenerator {
29
30 private DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder = new DefaultQuantitativeDescriptionBuilder();
31 private DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder = new DefaultCategoricalDescriptionBuilder();
32
33 private String previousFeatureName;
34
35 private Map<String, INaturalLanguageTextDataProcessor> elementProcessors;
36
37 public Map<String, INaturalLanguageTextDataProcessor> getElementProcessors() {
38 return elementProcessors;
39 }
40
41 /**
42 * The keys of the elementProcessors map are regular expressions which are
43 * being used to identify the those Descriptions to which the mapped
44 * NaturalLanguageTextDataProcessor is applicable.
45 *
46 * @param elementProcessors
47 */
48 public void setElementProcessors(
49 Map<String, INaturalLanguageTextDataProcessor> elementProcessors) {
50 this.elementProcessors = elementProcessors;
51 }
52
53 private Set<INaturalLanguageTextDataProcessor> applicableElementProcessors = new HashSet<INaturalLanguageTextDataProcessor>();
54
55 /**
56 * @param annotations
57 */
58 private void initNaturalLanguageDescriptionElementProcessors(Set<Annotation> annotations) {
59
60 if(annotations != null){
61 for(Annotation annotation : annotations){
62 if(annotation.getAnnotationType().equals(AnnotationType.TECHNICAL())){
63 for(String regex : elementProcessors.keySet()){
64 if(annotation.getText().matches(regex)){
65 applicableElementProcessors.add(elementProcessors.get(regex));
66 }
67 }
68 }
69 }
70 }
71 }
72
73 /**
74 * @param textData
75 */
76 private void applyNaturalLanguageDescriptionElementProcessors(TextData textData, TextData previousTextData){
77 for(INaturalLanguageTextDataProcessor processor : applicableElementProcessors){
78 processor.process(textData, previousTextData);
79 }
80 }
81
82 /**
83 *
84 */
85 public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description) {
86 List<Language> languages = new ArrayList<Language>();
87 languages.add(Language.DEFAULT());
88 return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages);
89 }
90
91
92 /**
93 *
94 */
95 public List<TextData> generatePreferredNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description, List<Language> languages) {
96 return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages);
97 }
98
99
100 public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description, Language language) {
101 List<Language> languages = new ArrayList<Language>();
102 languages.add(language);
103
104 initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
105
106 return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages);
107 }
108
109
110
111 /** recursive function that goes through a tree containing the order in which the description has to be generated,
112 * if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
113 *
114 * @param children
115 * @param parent
116 * @param description
117 * @param language The language in which the description has to be written
118 * @return
119 */
120 private List<TextData> buildBranchesDescr(List<FeatureNode> children, FeatureNode parent, TaxonDescription description, List<Language> languages) {
121 List<TextData> listTextData = new ArrayList<TextData>(); ;
122 if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
123 Feature fref = parent.getFeature();
124 for (Iterator<FeatureNode> ifn = children.iterator() ; ifn.hasNext() ;){
125 FeatureNode fn = ifn.next();
126 listTextData.addAll(buildBranchesDescr(fn.getChildren(),fn,description, languages));
127 }
128 }
129 else { //once a leaf is reached
130 Feature fref = parent.getFeature();
131 if (fref!=null) { // needs a better algorithm
132 int k=0;
133 Set<DescriptionElementBase> elements = description.getElements();
134 for (Iterator<DescriptionElementBase> deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
135 DescriptionElementBase descriptionElement = deb.next();
136 TextData textData;
137 if (descriptionElement.getFeature().equals(fref)){ // if one matches the corresponding feature associated to this leaf
138 if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData
139 CategoricalData categoricalData = (CategoricalData) descriptionElement;
140 //textData = buildCategoricalDescr(categoricalData, language);
141 textData = categoricalDescriptionBuilder.build(categoricalData, languages);
142 //textData.putText(fref.getLabel(), Language.DEFAULT());
143 TextData featureName = TextData.NewInstance(fref.getLabel(), Language.DEFAULT(), null);
144 listTextData.add(featureName); // if you want to print the name of the feature (Should it be an option ?)
145 listTextData.add(textData);
146 }
147 if (descriptionElement instanceof QuantitativeData) { // if this description is a QuantitativeData, generate the according TextData
148 QuantitativeData quantitativeData = (QuantitativeData) descriptionElement;
149 textData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
150 TextData featureName = TextData.NewInstance(fref.getLabel(), Language.DEFAULT(), null);
151 listTextData.add(featureName); // if you want to print the name of the feature
152 listTextData.add(textData);
153 }
154 }
155 }
156 }
157 }
158 return listTextData;
159 }
160
161 /**
162 *
163 *
164 * @param quantitativeDescriptionBuilder
165 */
166 public void setQuantitativeDescriptionBuilder(DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder){
167 this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder;
168 }
169
170 /**
171 *
172 *
173 * @param categoricalDescriptionBuilder
174 */
175 public void setCategoricalDescriptionBuilder(DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder){
176 this.categoricalDescriptionBuilder = categoricalDescriptionBuilder;
177 }
178
179
180 /**
181 * @param featureTree
182 * @param description
183 * @param language
184 * @return
185 */
186 public String generateStringNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description, Language language) {
187 List<Language> languages = new ArrayList<Language>();
188 languages.add(language);
189 return buildString(featureTree.getRootChildren(), featureTree.getRoot(), description, languages).toString();
190 }
191
192 /**
193 * recursive function that goes through a tree containing the order in which
194 * the description has to be generated, if an element of this tree matches
195 * one of the TaxonDescription, a DescriptionBuilder is called which returns
196 * a TextData with the corresponding description.
197 * <p>
198 * Also applies the folowing formatting rules which are special for data coming from Delta, DeltaAccess, DiversityDescriptions:
199 *
200 * <ul>
201 * <li><b>(1.A) if( doSkipTextInBrackets) : </b>Label Text in brackets is always skipped the remaining text string I the TEXT_TO_DISPLAY. The text may contain multiple substrings tagged with the brackets. A tagged substring may also occur in the middle of the whole string.</li>
202 * <li><b>(1.B) else : </b>just remove the brackets
203 * <li><b>(2) : </b> If the TEXT_TO_DISPLAY is equal the TEXT_TO_DISPLAY of the previous element output of this text is suppressed.</li>
204 * </ul>
205 *
206 * @param children
207 * @param parent
208 * @param description
209 * @param language
210 * The language in which the description has to be written
211 * @return
212 */
213 private StringBuilder buildString(List<FeatureNode> children, FeatureNode parent, TaxonDescription description, List<Language> languages) {
214 StringBuilder stringbuilder = new StringBuilder();
215 if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
216 for (Iterator<FeatureNode> ifn = children.iterator() ; ifn.hasNext() ;){
217 FeatureNode fn = ifn.next();
218 StringBuilder tempsb = buildString(fn.getChildren(),fn,description, languages);
219 if (tempsb.length()>1) stringbuilder.append(tempsb.deleteCharAt(tempsb.length()-1));
220 // if (tempsb.length()>1) stringbuilder.append(tempsb);
221 }
222 stringbuilder.append('.');
223 }
224 else { //once a leaf is reached
225 Feature fref = parent.getFeature();
226 if (fref!=null) { // needs a better algorithm
227 int k=0;
228 Set<DescriptionElementBase> elements = description.getElements();
229 for (Iterator<DescriptionElementBase> deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
230 DescriptionElementBase descriptionElement = deb.next();
231 TextData textData;
232 if (descriptionElement.getFeature().equals(fref)){ // if one matches the corresponding feature associated to this leaf
233 if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData
234 CategoricalData categoricalData = (CategoricalData) descriptionElement;
235 //textData = buildCategoricalDescr(categoricalData, language);
236 textData = categoricalDescriptionBuilder.build(categoricalData, languages);
237 //textData.putText(fref.getLabel(), Language.DEFAULT());
238 String featureName = StringUtils.substringBefore(fref.getLabel(), "<");
239 if (previousFeatureName==null){
240 previousFeatureName = featureName;
241 String featureString = categoricalDescriptionBuilder.buildFeature(fref,true);
242 stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1));
243 }
244 else if (!featureName.contains(previousFeatureName)) {
245 stringbuilder.append(". ");
246 previousFeatureName = featureName;
247 String featureString = categoricalDescriptionBuilder.buildFeature(fref,true);
248 stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1)); // if you want to print the name of the feature (Should it be an option ?)
249 }
250 stringbuilder.append(textData.getText(Language.DEFAULT()));
251 stringbuilder.append(',');
252 }
253 if (descriptionElement instanceof QuantitativeData) { // if this description is a QuantitativeData, generate the according TextData
254 QuantitativeData quantitativeData = (QuantitativeData) descriptionElement;
255 textData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
256 String featureName = StringUtils.substringBefore(fref.getLabel(), "<");
257 if (previousFeatureName==null){
258 previousFeatureName = featureName;
259 String featureString = quantitativeDescriptionBuilder.buildFeature(fref,true);
260 stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1));
261 }
262 else if (!featureName.contains(previousFeatureName)) {
263 stringbuilder.append(". ");
264 previousFeatureName = featureName;
265 String featureString = quantitativeDescriptionBuilder.buildFeature(fref,true);
266 stringbuilder.append(featureString.substring(0,1).toUpperCase() + featureString.substring(1)); // if you want to print the name of the feature (Should it be an option ?)
267 }
268 stringbuilder.append(textData.getText(Language.DEFAULT()));
269 stringbuilder.append(',');
270 }
271 }
272 }
273 }
274 }
275 return stringbuilder;
276 }
277
278
279 }