Project

General

Profile

Download (15.1 KB) Statistics
| Branch: | Tag: | Revision:
1
package eu.etaxonomy.cdm.api.service;
2

    
3
import java.util.ArrayList;
4
import java.util.HashSet;
5
import java.util.Iterator;
6
import java.util.List;
7
import java.util.Map;
8
import java.util.Set;
9

    
10
import org.apache.commons.lang.StringUtils;
11
import org.apache.log4j.Logger;
12
import org.springframework.stereotype.Component;
13

    
14
import eu.etaxonomy.cdm.model.common.Annotation;
15
import eu.etaxonomy.cdm.model.common.AnnotationType;
16
import eu.etaxonomy.cdm.model.common.Language;
17
import eu.etaxonomy.cdm.model.description.CategoricalData;
18
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
19
import eu.etaxonomy.cdm.model.description.Feature;
20
import eu.etaxonomy.cdm.model.description.FeatureNode;
21
import eu.etaxonomy.cdm.model.description.FeatureTree;
22
import eu.etaxonomy.cdm.model.description.QuantitativeData;
23
import eu.etaxonomy.cdm.model.description.TaxonDescription;
24
import eu.etaxonomy.cdm.model.description.TextData;
25
import eu.etaxonomy.cdm.model.description.TextFormat;
26

    
27

    
28
/**
29
 * Generator of natural language descriptions from TaxonDescriptions.
30
 *
31
 * @author m.venin
32
 * @since 13.04.2010
33
 * @version 1.0
34
 */
35
@Component
36
public class NaturalLanguageGenerator implements INaturalLanguageGenerator {
37
	@SuppressWarnings("unused")
38
	private static final Logger logger = Logger.getLogger(NaturalLanguageGenerator.class);
39

    
40
	private String firstSeparator = ",";
41
	private String secondSeparator = ".";
42
	private List<Integer> levels = new ArrayList<Integer>();
43

    
44
	private DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder = new DefaultQuantitativeDescriptionBuilder();
45
	private DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder = new DefaultCategoricalDescriptionBuilder();
46

    
47
	private TextData previousTextData;
48

    
49
	DeltaTextDataProcessor deltaTextDataProcessor = new DeltaTextDataProcessor();
50

    
51
	private Map<String, INaturalLanguageTextDataProcessor> elementProcessors;
52

    
53
	private Set<INaturalLanguageTextDataProcessor> applicableElementProcessors = new HashSet<INaturalLanguageTextDataProcessor>();
54

    
55
	/**
56
	 * Change the first separator used by generateSingleTextData. By default ",".
57
	 *
58
	 * @param separator
59
	 */
60
	public void setFirstSeparator(String separator){
61
		firstSeparator=separator;
62
	}
63

    
64
	public String getFirstSeparator(){
65
		return firstSeparator;
66
	}
67

    
68
	/**
69
	 * Change the second separator used by generateSingleTextData. By default ".".
70
	 *
71
	 * @param separator
72
	 */
73
	public void setSecondSeparator(String separator){
74
		secondSeparator=separator;
75
	}
76

    
77
	public String getSecondSeparator(){
78
		return secondSeparator;
79
	}
80

    
81
	/**
82
	 * @param quantitativeDescriptionBuilder
83
	 */
84
	public void setQuantitativeDescriptionBuilder(DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder){
85
		this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder;
86
	}
87

    
88
	/**
89
	 * @param categoricalDescriptionBuilder
90
	 */
91
	public void setCategoricalDescriptionBuilder(DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder){
92
		this.categoricalDescriptionBuilder = categoricalDescriptionBuilder;
93
	}
94

    
95
	/**
96
	 * @return the element processors of this generator
97
	 */
98
	public Map<String, INaturalLanguageTextDataProcessor> getElementProcessors() {
99
		return elementProcessors;
100
	}
101

    
102
	/**
103
	 * The keys of the elementProcessors map are regular expressions which are
104
	 * being used to identify the those Descriptions to which the mapped
105
	 * NaturalLanguageTextDataProcessor is applicable.
106
	 *
107
	 * @param elementProcessors
108
	 */
109
	public void setElementProcessors(
110
			Map<String, INaturalLanguageTextDataProcessor> elementProcessors) {
111
		this.elementProcessors = elementProcessors;
112
	}
113

    
114
	/**
115
	 * Looks for technical annotations, if one matches a regular expression of the element processors
116
	 * the associated processor is added to the applicable element processors which will then be applied
117
	 * when generating the description.
118
	 *
119
	 * @param annotations the set of annotations of the description
120
	 */
121
	private void initNaturalLanguageDescriptionElementProcessors(Set<Annotation> annotations) {
122

    
123
		if(annotations != null){
124
			for(Annotation annotation : annotations){
125
				if(annotation.getAnnotationType().equals(AnnotationType.TECHNICAL())){
126
					if (elementProcessors!=null){
127
						for(String regex : elementProcessors.keySet()){
128
							if(annotation.getText().matches(regex)){
129
								applicableElementProcessors.add(elementProcessors.get(regex));
130
							}
131
						}
132
					}
133
				}
134
			}
135
		}
136
	}
137

    
138

    
139
	/**
140
	 * Applies the list of applicable processors to a TextData.
141
	 *
142
	 * @param textData the TextData to be modified
143
	 * @param previousTextData the TextData corresponding to the feature of the previous level in the tree
144
	 */
145
	private void applyNaturalLanguageDescriptionElementProcessors(TextData textData, TextData previousTextData){
146
		for(INaturalLanguageTextDataProcessor processor : applicableElementProcessors){
147
			processor.process(textData, previousTextData);
148
		}
149
	}
150

    
151

    
152
	/**
153
	 * The most simple function to generate a description. The language used is the default one.
154
	 *
155
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
156
	 * @param description the TaxonDescription with all the data
157
	 *
158
	 * @return a list of TextData, each one being a basic element of the natural language description
159
	 */
160
	@Override
161
    public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description) {
162
		return generateNaturalLanguageDescription(featureTree,description,Language.DEFAULT());
163
	}
164

    
165

    
166

    
167
	/**
168
	 * Generate a description in a specified language.
169
	 *
170
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
171
	 * @param description the TaxonDescription with all the data
172
	 * @param language the language in which the description has to be printed
173
	 *
174
	 * @return a list of TextData, each one being a basic element of the natural language description
175
	 */
176
	@Override
177
    public List<TextData> generateNaturalLanguageDescription(FeatureTree featureTree, TaxonDescription description,	Language language) {
178
		List<Language> languages = new ArrayList<Language>();
179
		languages.add(language);
180
		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
181
		return generatePreferredNaturalLanguageDescription(featureTree,description,languages);
182
	}
183

    
184
	/**
185
	 * Generate a description with a specified list of preferred languages.
186
	 *
187
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
188
	 * @param description the TaxonDescription with all the data
189
	 * @param languages the ordered list of languages preferred for printing the description
190
	 *
191
	 * @return a list of TextData, each one being a basic element of the natural language description
192
	 */
193
	@Override
194
    public List<TextData> generatePreferredNaturalLanguageDescription(FeatureTree featureTree,TaxonDescription description, List<Language> languages) {
195
		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
196
		return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0);
197
	}
198

    
199
	/**
200
	 * Generate a description as a single paragraph in a TextData.
201
	 *
202
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
203
	 * @param description the TaxonDescription with all the data
204
	 *
205
	 * @return a TextData in the default language.
206
	 */
207
	@Override
208
    public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description) {
209
		return generateSingleTextData(featureTree,description,Language.DEFAULT());
210
	}
211

    
212
	/**
213
	 * Generate a description as a single paragraph in a TextData.
214
	 *
215
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
216
	 * @param description the TaxonDescription with all the data
217
	 * @param language the language in which the description has to be printed
218
	 *
219
	 * @return a TextData in the specified language.
220
	 */
221
	@Override
222
    public TextData generateSingleTextData(FeatureTree featureTree, TaxonDescription description, Language language) {
223
		List<Language> languages = new ArrayList<Language>();
224
		languages.add(language);
225
		return generatePreferredSingleTextData(featureTree,description,languages);
226
	}
227

    
228
	/**
229
	 * Generate a description with a specified list of preferred languages.
230
	 *
231
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
232
	 * @param description the TaxonDescription with all the data
233
	 * @param languages the ordered list of languages preferred for printing the description
234
	 *
235
	 * @return a TextData using the languages (in the given order of preference)
236
	 */
237
	@Override
238
    public TextData generatePreferredSingleTextData(FeatureTree featureTree, TaxonDescription description, List<Language> languages) {
239
		levels.clear(); // before the start, the table containing the levels of each node must be cleared
240
		// Note: this is not the most efficient way to keep track of the levels of the nodes but it allows some flexibility
241
		List<TextData> texts = generatePreferredNaturalLanguageDescription(featureTree,description, languages);// first get the description as a raw list of TextData
242

    
243
		StringBuilder descriptionStringBuilder = new StringBuilder(); // the StringBuilder used to generate the description
244
		int i = 0,j,level; // i is used to store the index of the TextData to use
245
		boolean startSentence = false, firstOne = true;
246

    
247
		for (j=0 ; j<levels.size() ; j++){
248
			level = levels.get(j);
249
			if (level==-1){
250
				if ((j+1)<levels.size() && levels.get(j+1).equals(0)){ // if this node is the direct father of a leaf
251
					descriptionStringBuilder.append(secondSeparator + " ");
252
					startSentence=true;
253
					firstOne=false;
254
					String asString = texts.get(i).getText(Language.DEFAULT()).toString();
255
					if (asString.length()>1) {
256
                        descriptionStringBuilder.append(asString.substring(0,1).toUpperCase() + asString.substring(1));
257
                    }
258
				}
259
				i++;
260
			}
261
			else if (level==0) { // if this node is a leaf
262
				if (startSentence) {
263
                    descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
264
                } else {
265
                    descriptionStringBuilder.append(firstSeparator + texts.get(i).getText(Language.DEFAULT()));
266
                }
267
				startSentence=false;
268
				i++;
269
			}
270
			else {
271
				if (!firstOne && levels.get(j-1).equals(0)){ // if this node corresponds to the states linked to the previous leaf
272
					if (i<texts.size()) {
273
                        descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
274
                    }
275
					i++;
276
				}
277
			}
278
		}
279
		descriptionStringBuilder.append(secondSeparator);
280
		String returnString = descriptionStringBuilder.toString();
281
		returnString = StringUtils.replace(returnString, "  ", " ");
282
		returnString = StringUtils.removeStart(returnString, secondSeparator + " ");
283
		return TextData.NewInstance(returnString,Language.DEFAULT(),TextFormat.NewInstance("", "Text", ""));
284
	}
285

    
286

    
287

    
288
	/** recursive function that goes through a tree containing the order in which the description has to be generated,
289
	 *  if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
290
	 *
291
	 * @param children the children of the feature node considered
292
	 * @param parent the feature node considered
293
	 * @param description the TaxonDescription element for which we want a natural language output
294
	 * @param language The language in which the description has to be written
295
	 * @param floor integer to keep track of the level in the tree
296
	 * @return a list of TextData elements containing the part of description corresponding to the feature node considered
297
	 */
298
	private List<TextData> buildBranchesDescr(List<FeatureNode> children, FeatureNode<Feature> parent, TaxonDescription description, List<Language> languages, int floor) {
299
		List<TextData> listTextData = new ArrayList<TextData>();
300
		floor++; // counter to know the current level in the tree
301

    
302
		if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
303
			levels.add(new Integer(floor)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List<TextData>
304
			Feature feature = parent.getTerm();
305
			TextData featureName;
306
			if (feature!=null && feature.getLabel()!=null){ // if a node is associated to a feature
307
				featureName = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
308
				levels.add(new Integer(-1)); // it is indicated by a '-1' after its level
309
				listTextData.add(featureName); // the TextData representing the name of the feature is concatenated to the list
310
			}
311
            else {
312
                featureName = new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached)
313
            }
314

    
315
			for (Iterator<FeatureNode> ifn = children.iterator() ; ifn.hasNext() ;){
316
				previousTextData = featureName; // this allows to keep track of the name of the feature one level up in the tree
317
				FeatureNode fn = ifn.next();
318
				listTextData.addAll(buildBranchesDescr(fn.getChildNodes(),fn,description, languages, floor));
319
			}
320
		}
321
		else { //once a leaf is reached
322
			Feature feature = parent.getTerm();
323
			if (feature!=null && (feature.isSupportsQuantitativeData() || feature.isSupportsCategoricalData())) {
324
				Set<DescriptionElementBase> elements = description.getElements();
325
				for (Iterator<DescriptionElementBase> deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
326
					DescriptionElementBase descriptionElement = deb.next();
327
					if (descriptionElement.getFeature().equals(feature)){ // if one matches the corresponding feature associated to this leaf
328
						if (descriptionElement instanceof CategoricalData || descriptionElement instanceof QuantitativeData){
329
							TextData featureTextData;
330
							TextData statesTextData;
331
							if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData
332
								CategoricalData categoricalData = (CategoricalData) descriptionElement;
333
								statesTextData = categoricalDescriptionBuilder.build(categoricalData, languages);
334
								featureTextData = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
335
							}
336
							else { // if this description is a QuantitativeData, generate the according TextData
337
								QuantitativeData quantitativeData = (QuantitativeData) descriptionElement;
338
								statesTextData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
339
								featureTextData = quantitativeDescriptionBuilder.buildTextDataFeature(feature, languages);
340
							}
341
							applyNaturalLanguageDescriptionElementProcessors(featureTextData, previousTextData);
342
							levels.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree
343
							listTextData.add(featureTextData);
344
							levels.add(new Integer(floor)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature
345
							listTextData.add(statesTextData);
346
						}
347
					}
348
				}
349
			}
350
		}
351
		return listTextData;
352
	}
353

    
354
}
(80-80/103)