Project

General

Profile

Download (15 KB) Statistics
| Branch: | Tag: | Revision:
1
package eu.etaxonomy.cdm.api.service;
2

    
3
import java.util.ArrayList;
4
import java.util.HashSet;
5
import java.util.Iterator;
6
import java.util.List;
7
import java.util.Map;
8
import java.util.Set;
9

    
10
import org.apache.commons.lang.StringUtils;
11
import org.apache.log4j.Logger;
12
import org.springframework.stereotype.Component;
13

    
14
import eu.etaxonomy.cdm.model.common.Annotation;
15
import eu.etaxonomy.cdm.model.common.AnnotationType;
16
import eu.etaxonomy.cdm.model.common.Language;
17
import eu.etaxonomy.cdm.model.description.CategoricalData;
18
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
19
import eu.etaxonomy.cdm.model.description.Feature;
20
import eu.etaxonomy.cdm.model.description.QuantitativeData;
21
import eu.etaxonomy.cdm.model.description.TaxonDescription;
22
import eu.etaxonomy.cdm.model.description.TextData;
23
import eu.etaxonomy.cdm.model.description.TextFormat;
24
import eu.etaxonomy.cdm.model.term.TermTree;
25
import eu.etaxonomy.cdm.model.term.TermTreeNode;
26

    
27

    
28
/**
29
 * Generator of natural language descriptions from TaxonDescriptions.
30
 *
31
 * @author m.venin
32
 * @since 13.04.2010
33
 */
34
@Component
35
public class NaturalLanguageGenerator implements INaturalLanguageGenerator {
36
	@SuppressWarnings("unused")
37
	private static final Logger logger = Logger.getLogger(NaturalLanguageGenerator.class);
38

    
39
	private String firstSeparator = ",";
40
	private String secondSeparator = ".";
41
	private List<Integer> levels = new ArrayList<Integer>();
42

    
43
	private DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder = new DefaultQuantitativeDescriptionBuilder();
44
	private DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder = new DefaultCategoricalDescriptionBuilder();
45

    
46
	private TextData previousTextData;
47

    
48
	DeltaTextDataProcessor deltaTextDataProcessor = new DeltaTextDataProcessor();
49

    
50
	private Map<String, INaturalLanguageTextDataProcessor> elementProcessors;
51

    
52
	private Set<INaturalLanguageTextDataProcessor> applicableElementProcessors = new HashSet<INaturalLanguageTextDataProcessor>();
53

    
54
	/**
55
	 * Change the first separator used by generateSingleTextData. By default ",".
56
	 *
57
	 * @param separator
58
	 */
59
	public void setFirstSeparator(String separator){
60
		firstSeparator=separator;
61
	}
62

    
63
	public String getFirstSeparator(){
64
		return firstSeparator;
65
	}
66

    
67
	/**
68
	 * Change the second separator used by generateSingleTextData. By default ".".
69
	 *
70
	 * @param separator
71
	 */
72
	public void setSecondSeparator(String separator){
73
		secondSeparator=separator;
74
	}
75

    
76
	public String getSecondSeparator(){
77
		return secondSeparator;
78
	}
79

    
80
	/**
81
	 * @param quantitativeDescriptionBuilder
82
	 */
83
	public void setQuantitativeDescriptionBuilder(DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder){
84
		this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder;
85
	}
86

    
87
	/**
88
	 * @param categoricalDescriptionBuilder
89
	 */
90
	public void setCategoricalDescriptionBuilder(DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder){
91
		this.categoricalDescriptionBuilder = categoricalDescriptionBuilder;
92
	}
93

    
94
	/**
95
	 * @return the element processors of this generator
96
	 */
97
	public Map<String, INaturalLanguageTextDataProcessor> getElementProcessors() {
98
		return elementProcessors;
99
	}
100

    
101
	/**
102
	 * The keys of the elementProcessors map are regular expressions which are
103
	 * being used to identify the those Descriptions to which the mapped
104
	 * NaturalLanguageTextDataProcessor is applicable.
105
	 *
106
	 * @param elementProcessors
107
	 */
108
	public void setElementProcessors(
109
			Map<String, INaturalLanguageTextDataProcessor> elementProcessors) {
110
		this.elementProcessors = elementProcessors;
111
	}
112

    
113
	/**
114
	 * Looks for technical annotations, if one matches a regular expression of the element processors
115
	 * the associated processor is added to the applicable element processors which will then be applied
116
	 * when generating the description.
117
	 *
118
	 * @param annotations the set of annotations of the description
119
	 */
120
	private void initNaturalLanguageDescriptionElementProcessors(Set<Annotation> annotations) {
121

    
122
		if(annotations != null){
123
			for(Annotation annotation : annotations){
124
				if(annotation.getAnnotationType().equals(AnnotationType.TECHNICAL())){
125
					if (elementProcessors!=null){
126
						for(String regex : elementProcessors.keySet()){
127
							if(annotation.getText().matches(regex)){
128
								applicableElementProcessors.add(elementProcessors.get(regex));
129
							}
130
						}
131
					}
132
				}
133
			}
134
		}
135
	}
136

    
137

    
138
	/**
139
	 * Applies the list of applicable processors to a TextData.
140
	 *
141
	 * @param textData the TextData to be modified
142
	 * @param previousTextData the TextData corresponding to the feature of the previous level in the tree
143
	 */
144
	private void applyNaturalLanguageDescriptionElementProcessors(TextData textData, TextData previousTextData){
145
		for(INaturalLanguageTextDataProcessor processor : applicableElementProcessors){
146
			processor.process(textData, previousTextData);
147
		}
148
	}
149

    
150

    
151
	/**
152
	 * The most simple function to generate a description. The language used is the default one.
153
	 *
154
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
155
	 * @param description the TaxonDescription with all the data
156
	 *
157
	 * @return a list of TextData, each one being a basic element of the natural language description
158
	 */
159
	@Override
160
    public List<TextData> generateNaturalLanguageDescription(TermTree featureTree,TaxonDescription description) {
161
		return generateNaturalLanguageDescription(featureTree,description,Language.DEFAULT());
162
	}
163

    
164

    
165

    
166
	/**
167
	 * Generate a description in a specified language.
168
	 *
169
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
170
	 * @param description the TaxonDescription with all the data
171
	 * @param language the language in which the description has to be printed
172
	 *
173
	 * @return a list of TextData, each one being a basic element of the natural language description
174
	 */
175
	@Override
176
    public List<TextData> generateNaturalLanguageDescription(TermTree featureTree, TaxonDescription description,	Language language) {
177
		List<Language> languages = new ArrayList<Language>();
178
		languages.add(language);
179
		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
180
		return generatePreferredNaturalLanguageDescription(featureTree,description,languages);
181
	}
182

    
183
	/**
184
	 * Generate a description with a specified list of preferred languages.
185
	 *
186
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
187
	 * @param description the TaxonDescription with all the data
188
	 * @param languages the ordered list of languages preferred for printing the description
189
	 *
190
	 * @return a list of TextData, each one being a basic element of the natural language description
191
	 */
192
	@Override
193
    public List<TextData> generatePreferredNaturalLanguageDescription(TermTree featureTree,TaxonDescription description, List<Language> languages) {
194
		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
195
		return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0);
196
	}
197

    
198
	/**
199
	 * Generate a description as a single paragraph in a TextData.
200
	 *
201
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
202
	 * @param description the TaxonDescription with all the data
203
	 *
204
	 * @return a TextData in the default language.
205
	 */
206
	@Override
207
    public TextData generateSingleTextData(TermTree featureTree, TaxonDescription description) {
208
		return generateSingleTextData(featureTree,description,Language.DEFAULT());
209
	}
210

    
211
	/**
212
	 * Generate a description as a single paragraph in a TextData.
213
	 *
214
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
215
	 * @param description the TaxonDescription with all the data
216
	 * @param language the language in which the description has to be printed
217
	 *
218
	 * @return a TextData in the specified language.
219
	 */
220
	@Override
221
    public TextData generateSingleTextData(TermTree featureTree, TaxonDescription description, Language language) {
222
		List<Language> languages = new ArrayList<Language>();
223
		languages.add(language);
224
		return generatePreferredSingleTextData(featureTree,description,languages);
225
	}
226

    
227
	/**
228
	 * Generate a description with a specified list of preferred languages.
229
	 *
230
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
231
	 * @param description the TaxonDescription with all the data
232
	 * @param languages the ordered list of languages preferred for printing the description
233
	 *
234
	 * @return a TextData using the languages (in the given order of preference)
235
	 */
236
	@Override
237
    public TextData generatePreferredSingleTextData(TermTree featureTree, TaxonDescription description, List<Language> languages) {
238
		levels.clear(); // before the start, the table containing the levels of each node must be cleared
239
		// Note: this is not the most efficient way to keep track of the levels of the nodes but it allows some flexibility
240
		List<TextData> texts = generatePreferredNaturalLanguageDescription(featureTree,description, languages);// first get the description as a raw list of TextData
241

    
242
		StringBuilder descriptionStringBuilder = new StringBuilder(); // the StringBuilder used to generate the description
243
		int i = 0,j,level; // i is used to store the index of the TextData to use
244
		boolean startSentence = false, firstOne = true;
245

    
246
		for (j=0 ; j<levels.size() ; j++){
247
			level = levels.get(j);
248
			if (level==-1){
249
				if ((j+1)<levels.size() && levels.get(j+1).equals(0)){ // if this node is the direct father of a leaf
250
					descriptionStringBuilder.append(secondSeparator + " ");
251
					startSentence=true;
252
					firstOne=false;
253
					String asString = texts.get(i).getText(Language.DEFAULT()).toString();
254
					if (asString.length()>1) {
255
                        descriptionStringBuilder.append(asString.substring(0,1).toUpperCase() + asString.substring(1));
256
                    }
257
				}
258
				i++;
259
			}
260
			else if (level==0) { // if this node is a leaf
261
				if (startSentence) {
262
                    descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
263
                } else {
264
                    descriptionStringBuilder.append(firstSeparator + texts.get(i).getText(Language.DEFAULT()));
265
                }
266
				startSentence=false;
267
				i++;
268
			}
269
			else {
270
				if (!firstOne && levels.get(j-1).equals(0)){ // if this node corresponds to the states linked to the previous leaf
271
					if (i<texts.size()) {
272
                        descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
273
                    }
274
					i++;
275
				}
276
			}
277
		}
278
		descriptionStringBuilder.append(secondSeparator);
279
		String returnString = descriptionStringBuilder.toString();
280
		returnString = StringUtils.replace(returnString, "  ", " ");
281
		returnString = StringUtils.removeStart(returnString, secondSeparator + " ");
282
		return TextData.NewInstance(returnString,Language.DEFAULT(),TextFormat.NewInstance("", "Text", ""));
283
	}
284

    
285

    
286

    
287
	/** recursive function that goes through a tree containing the order in which the description has to be generated,
288
	 *  if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
289
	 *
290
	 * @param children the children of the feature node considered
291
	 * @param parent the feature node considered
292
	 * @param description the TaxonDescription element for which we want a natural language output
293
	 * @param language The language in which the description has to be written
294
	 * @param floor integer to keep track of the level in the tree
295
	 * @return a list of TextData elements containing the part of description corresponding to the feature node considered
296
	 */
297
	private List<TextData> buildBranchesDescr(List<TermTreeNode> children, TermTreeNode<Feature> parent, TaxonDescription description, List<Language> languages, int floor) {
298
		List<TextData> listTextData = new ArrayList<TextData>();
299
		floor++; // counter to know the current level in the tree
300

    
301
		if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
302
			levels.add(new Integer(floor)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List<TextData>
303
			Feature feature = parent.getTerm();
304
			TextData featureName;
305
			if (feature!=null && feature.getLabel()!=null){ // if a node is associated to a feature
306
				featureName = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
307
				levels.add(new Integer(-1)); // it is indicated by a '-1' after its level
308
				listTextData.add(featureName); // the TextData representing the name of the feature is concatenated to the list
309
			}
310
            else {
311
                featureName = new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached)
312
            }
313

    
314
			for (Iterator<TermTreeNode> ifn = children.iterator() ; ifn.hasNext() ;){
315
				previousTextData = featureName; // this allows to keep track of the name of the feature one level up in the tree
316
				TermTreeNode fn = ifn.next();
317
				listTextData.addAll(buildBranchesDescr(fn.getChildNodes(),fn,description, languages, floor));
318
			}
319
		}
320
		else { //once a leaf is reached
321
			Feature feature = parent.getTerm();
322
			if (feature!=null && (feature.isSupportsQuantitativeData() || feature.isSupportsCategoricalData())) {
323
				Set<DescriptionElementBase> elements = description.getElements();
324
				for (Iterator<DescriptionElementBase> deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
325
					DescriptionElementBase descriptionElement = deb.next();
326
					if (descriptionElement.getFeature().equals(feature)){ // if one matches the corresponding feature associated to this leaf
327
						if (descriptionElement instanceof CategoricalData || descriptionElement instanceof QuantitativeData){
328
							TextData featureTextData;
329
							TextData statesTextData;
330
							if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData
331
								CategoricalData categoricalData = (CategoricalData) descriptionElement;
332
								statesTextData = categoricalDescriptionBuilder.build(categoricalData, languages);
333
								featureTextData = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
334
							}
335
							else { // if this description is a QuantitativeData, generate the according TextData
336
								QuantitativeData quantitativeData = (QuantitativeData) descriptionElement;
337
								statesTextData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
338
								featureTextData = quantitativeDescriptionBuilder.buildTextDataFeature(feature, languages);
339
							}
340
							applyNaturalLanguageDescriptionElementProcessors(featureTextData, previousTextData);
341
							levels.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree
342
							listTextData.add(featureTextData);
343
							levels.add(new Integer(floor)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature
344
							listTextData.add(statesTextData);
345
						}
346
					}
347
				}
348
			}
349
		}
350
		return listTextData;
351
	}
352

    
353
}
(80-80/103)