Revision c495b39b
Added by Katja Luther about 5 years ago
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NaturalLanguageGenerator.java | ||
---|---|---|
11 | 11 |
import org.apache.log4j.Logger; |
12 | 12 |
import org.springframework.stereotype.Component; |
13 | 13 |
|
14 |
import eu.etaxonomy.cdm.model.common.Annotation; |
|
15 |
import eu.etaxonomy.cdm.model.common.AnnotationType; |
|
16 |
import eu.etaxonomy.cdm.model.common.Language; |
|
14 | 17 |
import eu.etaxonomy.cdm.model.description.CategoricalData; |
18 |
import eu.etaxonomy.cdm.model.description.DescriptionBase; |
|
15 | 19 |
import eu.etaxonomy.cdm.model.description.DescriptionElementBase; |
16 | 20 |
import eu.etaxonomy.cdm.model.description.Feature; |
17 | 21 |
import eu.etaxonomy.cdm.model.description.FeatureNode; |
18 | 22 |
import eu.etaxonomy.cdm.model.description.FeatureTree; |
19 | 23 |
import eu.etaxonomy.cdm.model.description.QuantitativeData; |
20 |
import eu.etaxonomy.cdm.model.description.TaxonDescription; |
|
21 | 24 |
import eu.etaxonomy.cdm.model.description.TextData; |
22 | 25 |
import eu.etaxonomy.cdm.model.description.TextFormat; |
23 |
import eu.etaxonomy.cdm.model.common.Annotation; |
|
24 |
import eu.etaxonomy.cdm.model.common.AnnotationType; |
|
25 |
import eu.etaxonomy.cdm.model.common.Language; |
|
26 | 26 |
|
27 | 27 |
|
28 | 28 |
/** |
29 | 29 |
* Generator of natural language descriptions from TaxonDescriptions. |
30 |
*
|
|
30 |
* |
|
31 | 31 |
* @author m.venin |
32 | 32 |
* @since 13.04.2010 |
33 | 33 |
* @version 1.0 |
... | ... | |
45 | 45 |
private DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder = new DefaultCategoricalDescriptionBuilder(); |
46 | 46 |
|
47 | 47 |
private TextData previousTextData; |
48 |
|
|
48 |
|
|
49 | 49 |
DeltaTextDataProcessor deltaTextDataProcessor = new DeltaTextDataProcessor(); |
50 | 50 |
|
51 | 51 |
private Map<String, INaturalLanguageTextDataProcessor> elementProcessors; |
... | ... | |
54 | 54 |
|
55 | 55 |
/** |
56 | 56 |
* Change the first separator used by generateSingleTextData. By default ",". |
57 |
*
|
|
57 |
* |
|
58 | 58 |
* @param separator |
59 | 59 |
*/ |
60 | 60 |
public void setFirstSeparator(String separator){ |
... | ... | |
67 | 67 |
|
68 | 68 |
/** |
69 | 69 |
* Change the second separator used by generateSingleTextData. By default ".". |
70 |
*
|
|
70 |
* |
|
71 | 71 |
* @param separator |
72 | 72 |
*/ |
73 | 73 |
public void setSecondSeparator(String separator){ |
... | ... | |
103 | 103 |
* The keys of the elementProcessors map are regular expressions which are |
104 | 104 |
* being used to identify the those Descriptions to which the mapped |
105 | 105 |
* NaturalLanguageTextDataProcessor is applicable. |
106 |
*
|
|
106 |
* |
|
107 | 107 |
* @param elementProcessors |
108 | 108 |
*/ |
109 | 109 |
public void setElementProcessors( |
... | ... | |
115 | 115 |
* Looks for technical annotations, if one matches a regular expression of the element processors |
116 | 116 |
* the associated processor is added to the applicable element processors which will then be applied |
117 | 117 |
* when generating the description. |
118 |
*
|
|
118 |
* |
|
119 | 119 |
* @param annotations the set of annotations of the description |
120 | 120 |
*/ |
121 | 121 |
private void initNaturalLanguageDescriptionElementProcessors(Set<Annotation> annotations) { |
... | ... | |
138 | 138 |
|
139 | 139 |
/** |
140 | 140 |
* Applies the list of applicable processors to a TextData. |
141 |
*
|
|
141 |
* |
|
142 | 142 |
* @param textData the TextData to be modified |
143 | 143 |
* @param previousTextData the TextData corresponding to the feature of the previous level in the tree |
144 | 144 |
*/ |
... | ... | |
151 | 151 |
|
152 | 152 |
/** |
153 | 153 |
* The most simple function to generate a description. The language used is the default one. |
154 |
*
|
|
154 |
* |
|
155 | 155 |
* @param featureTree the FeatureTree holding the order in which features and their states must be printed |
156 | 156 |
* @param description the TaxonDescription with all the data |
157 |
*
|
|
157 |
* |
|
158 | 158 |
* @return a list of TextData, each one being a basic element of the natural language description |
159 | 159 |
*/ |
160 |
public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description) {
|
|
160 |
public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree, DescriptionBase description) {
|
|
161 | 161 |
return generateNaturalLanguageDescription(featureTree,description,Language.DEFAULT()); |
162 | 162 |
} |
163 | 163 |
|
... | ... | |
165 | 165 |
|
166 | 166 |
/** |
167 | 167 |
* Generate a description in a specified language. |
168 |
*
|
|
168 |
* |
|
169 | 169 |
* @param featureTree the FeatureTree holding the order in which features and their states must be printed |
170 | 170 |
* @param description the TaxonDescription with all the data |
171 | 171 |
* @param language the language in which the description has to be printed |
172 |
*
|
|
172 |
* |
|
173 | 173 |
* @return a list of TextData, each one being a basic element of the natural language description |
174 | 174 |
*/ |
175 |
public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description, Language language) {
|
|
175 |
public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree, DescriptionBase description, Language language) {
|
|
176 | 176 |
List<Language> languages = new ArrayList<Language>(); |
177 | 177 |
languages.add(language); |
178 | 178 |
initNaturalLanguageDescriptionElementProcessors(description.getAnnotations()); |
... | ... | |
181 | 181 |
|
182 | 182 |
/** |
183 | 183 |
* Generate a description with a specified list of preferred languages. |
184 |
*
|
|
184 |
* |
|
185 | 185 |
* @param featureTree the FeatureTree holding the order in which features and their states must be printed |
186 | 186 |
* @param description the TaxonDescription with all the data |
187 | 187 |
* @param languages the ordered list of languages preferred for printing the description |
188 |
*
|
|
188 |
* |
|
189 | 189 |
* @return a list of TextData, each one being a basic element of the natural language description |
190 | 190 |
*/ |
191 |
public List<TextData> generatePreferredNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description, List<Language> languages) {
|
|
191 |
public List<TextData> generatePreferredNaturalLanguageDescription(FeatureTree featureTree,DescriptionBase description, List<Language> languages) {
|
|
192 | 192 |
initNaturalLanguageDescriptionElementProcessors(description.getAnnotations()); |
193 | 193 |
return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0); |
194 | 194 |
} |
195 | 195 |
|
196 | 196 |
/** |
197 | 197 |
* Generate a description as a single paragraph in a TextData. |
198 |
*
|
|
198 |
* |
|
199 | 199 |
* @param featureTree the FeatureTree holding the order in which features and their states must be printed |
200 | 200 |
* @param description the TaxonDescription with all the data |
201 |
*
|
|
201 |
* |
|
202 | 202 |
* @return a TextData in the default language. |
203 | 203 |
*/ |
204 |
public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description) {
|
|
204 |
public TextData generateSingleTextData(FeatureTree featureTree, DescriptionBase description) {
|
|
205 | 205 |
return generateSingleTextData(featureTree,description,Language.DEFAULT()); |
206 | 206 |
} |
207 | 207 |
|
208 | 208 |
/** |
209 | 209 |
* Generate a description as a single paragraph in a TextData. |
210 |
*
|
|
210 |
* |
|
211 | 211 |
* @param featureTree the FeatureTree holding the order in which features and their states must be printed |
212 | 212 |
* @param description the TaxonDescription with all the data |
213 | 213 |
* @param language the language in which the description has to be printed |
214 |
*
|
|
214 |
* |
|
215 | 215 |
* @return a TextData in the specified language. |
216 | 216 |
*/ |
217 |
public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description, Language language) {
|
|
217 |
public TextData generateSingleTextData(FeatureTree featureTree, DescriptionBase description, Language language) {
|
|
218 | 218 |
List<Language> languages = new ArrayList<Language>(); |
219 | 219 |
languages.add(language); |
220 | 220 |
return generatePreferredSingleTextData(featureTree,description,languages); |
... | ... | |
222 | 222 |
|
223 | 223 |
/** |
224 | 224 |
* Generate a description with a specified list of preferred languages. |
225 |
*
|
|
225 |
* |
|
226 | 226 |
* @param featureTree the FeatureTree holding the order in which features and their states must be printed |
227 | 227 |
* @param description the TaxonDescription with all the data |
228 | 228 |
* @param languages the ordered list of languages preferred for printing the description |
229 |
*
|
|
229 |
* |
|
230 | 230 |
* @return a TextData using the languages (in the given order of preference) |
231 | 231 |
*/ |
232 |
public TextData generatePreferredSingleTextData(FeatureTree featureTree, TaxonDescription description, List<Language> languages) {
|
|
232 |
public TextData generatePreferredSingleTextData(FeatureTree featureTree, DescriptionBase description, List<Language> languages) {
|
|
233 | 233 |
levels.clear(); // before the start, the table containing the levels of each node must be cleared |
234 | 234 |
// Note: this is not the most efficient way to keep track of the levels of the nodes but it allows some flexibility |
235 | 235 |
List<TextData> texts = generatePreferredNaturalLanguageDescription(featureTree,description, languages);// first get the description as a raw list of TextData |
... | ... | |
246 | 246 |
startSentence=true; |
247 | 247 |
firstOne=false; |
248 | 248 |
String asString = texts.get(i).getText(Language.DEFAULT()).toString(); |
249 |
if (asString.length()>1) descriptionStringBuilder.append(asString.substring(0,1).toUpperCase() + asString.substring(1)); |
|
249 |
if (asString.length()>1) { |
|
250 |
descriptionStringBuilder.append(asString.substring(0,1).toUpperCase() + asString.substring(1)); |
|
251 |
} |
|
250 | 252 |
} |
251 | 253 |
i++; |
252 | 254 |
} |
253 | 255 |
else if (level==0) { // if this node is a leaf |
254 |
if (startSentence) descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT())); |
|
255 |
else descriptionStringBuilder.append(firstSeparator + texts.get(i).getText(Language.DEFAULT())); |
|
256 |
if (startSentence) { |
|
257 |
descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT())); |
|
258 |
} else { |
|
259 |
descriptionStringBuilder.append(firstSeparator + texts.get(i).getText(Language.DEFAULT())); |
|
260 |
} |
|
256 | 261 |
startSentence=false; |
257 | 262 |
i++; |
258 | 263 |
} |
259 | 264 |
else { |
260 | 265 |
if (!firstOne && levels.get(j-1).equals(0)){ // if this node corresponds to the states linked to the previous leaf |
261 |
if (i<texts.size()) descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT())); |
|
266 |
if (i<texts.size()) { |
|
267 |
descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT())); |
|
268 |
} |
|
262 | 269 |
i++; |
263 | 270 |
} |
264 | 271 |
} |
... | ... | |
274 | 281 |
|
275 | 282 |
/** recursive function that goes through a tree containing the order in which the description has to be generated, |
276 | 283 |
* if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description. |
277 |
*
|
|
284 |
* |
|
278 | 285 |
* @param children the children of the feature node considered |
279 | 286 |
* @param parent the feature node considered |
280 | 287 |
* @param description the TaxonDescription element for which we want a natural language output |
... | ... | |
282 | 289 |
* @param floor integer to keep track of the level in the tree |
283 | 290 |
* @return a list of TextData elements containing the part of description corresponding to the feature node considered |
284 | 291 |
*/ |
285 |
private List<TextData> buildBranchesDescr(List<FeatureNode> children, FeatureNode parent, TaxonDescription description, List<Language> languages, int floor) {
|
|
292 |
private List<TextData> buildBranchesDescr(List<FeatureNode> children, FeatureNode parent, DescriptionBase description, List<Language> languages, int floor) {
|
|
286 | 293 |
List<TextData> listTextData = new ArrayList<TextData>(); |
287 | 294 |
floor++; // counter to know the current level in the tree |
288 | 295 |
|
... | ... | |
295 | 302 |
levels.add(new Integer(-1)); // it is indicated by a '-1' after its level |
296 | 303 |
listTextData.add(featureName); // the TextData representing the name of the feature is concatenated to the list |
297 | 304 |
} |
298 |
else featureName = new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached) |
|
305 |
else { |
|
306 |
featureName = new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached) |
|
307 |
} |
|
299 | 308 |
|
300 | 309 |
for (Iterator<FeatureNode> ifn = children.iterator() ; ifn.hasNext() ;){ |
301 | 310 |
previousTextData = featureName; // this allows to keep track of the name of the feature one level up in the tree |
... | ... | |
334 | 343 |
} |
335 | 344 |
} |
336 | 345 |
return listTextData; |
337 |
}
|
|
346 |
} |
|
338 | 347 |
|
339 | 348 |
} |
Also available in: Unified diff
ref #8124: adapt NaturalLanguageGenerator to usage in nameFacts