Project

General

Profile

Download (15.3 KB) Statistics
| Branch: | Tag: | Revision:
1
package eu.etaxonomy.cdm.api.service;
2

    
3
import java.util.ArrayList;
4
import java.util.HashSet;
5
import java.util.Iterator;
6
import java.util.List;
7
import java.util.Map;
8
import java.util.Set;
9

    
10
import org.apache.commons.lang.StringUtils;
11
import org.apache.log4j.Logger;
12
import org.springframework.stereotype.Component;
13

    
14
import eu.etaxonomy.cdm.format.description.DefaultCategoricalDescriptionBuilder;
15
import eu.etaxonomy.cdm.format.description.DefaultQuantitativeDescriptionBuilder;
16
import eu.etaxonomy.cdm.format.description.DescriptionBuilder;
17
import eu.etaxonomy.cdm.model.common.Annotation;
18
import eu.etaxonomy.cdm.model.common.AnnotationType;
19
import eu.etaxonomy.cdm.model.common.Language;
20
import eu.etaxonomy.cdm.model.description.CategoricalData;
21
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
22
import eu.etaxonomy.cdm.model.description.Feature;
23
import eu.etaxonomy.cdm.model.description.QuantitativeData;
24
import eu.etaxonomy.cdm.model.description.TaxonDescription;
25
import eu.etaxonomy.cdm.model.description.TextData;
26
import eu.etaxonomy.cdm.model.description.TextFormat;
27
import eu.etaxonomy.cdm.model.term.TermTree;
28
import eu.etaxonomy.cdm.model.term.TermNode;
29

    
30

    
31
/**
32
 * Generator of natural language descriptions from TaxonDescriptions.
33
 *
34
 * @author m.venin
35
 * @since 13.04.2010
36
 */
37
@Component
38
public class NaturalLanguageGenerator implements INaturalLanguageGenerator {
39
	@SuppressWarnings("unused")
40
	private static final Logger logger = Logger.getLogger(NaturalLanguageGenerator.class);
41

    
42
	private String firstSeparator = ",";
43
	private String secondSeparator = ".";
44
	private List<Integer> levels = new ArrayList<Integer>();
45

    
46
	private DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder = new DefaultQuantitativeDescriptionBuilder();
47
	private DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder = new DefaultCategoricalDescriptionBuilder();
48

    
49
	private TextData previousTextData;
50

    
51
	DeltaTextDataProcessor deltaTextDataProcessor = new DeltaTextDataProcessor();
52

    
53
	private Map<String, INaturalLanguageTextDataProcessor> elementProcessors;
54

    
55
	private Set<INaturalLanguageTextDataProcessor> applicableElementProcessors = new HashSet<INaturalLanguageTextDataProcessor>();
56

    
57
	/**
58
	 * Change the first separator used by generateSingleTextData. By default ",".
59
	 *
60
	 * @param separator
61
	 */
62
	public void setFirstSeparator(String separator){
63
		firstSeparator=separator;
64
	}
65

    
66
	public String getFirstSeparator(){
67
		return firstSeparator;
68
	}
69

    
70
	/**
71
	 * Change the second separator used by generateSingleTextData. By default ".".
72
	 *
73
	 * @param separator
74
	 */
75
	public void setSecondSeparator(String separator){
76
		secondSeparator=separator;
77
	}
78

    
79
	public String getSecondSeparator(){
80
		return secondSeparator;
81
	}
82

    
83
	/**
84
	 * @param quantitativeDescriptionBuilder
85
	 */
86
	public void setQuantitativeDescriptionBuilder(DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder){
87
		this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder;
88
	}
89

    
90
	/**
91
	 * @param categoricalDescriptionBuilder
92
	 */
93
	public void setCategoricalDescriptionBuilder(DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder){
94
		this.categoricalDescriptionBuilder = categoricalDescriptionBuilder;
95
	}
96

    
97
	/**
98
	 * @return the element processors of this generator
99
	 */
100
	public Map<String, INaturalLanguageTextDataProcessor> getElementProcessors() {
101
		return elementProcessors;
102
	}
103

    
104
	/**
105
	 * The keys of the elementProcessors map are regular expressions which are
106
	 * being used to identify the those Descriptions to which the mapped
107
	 * NaturalLanguageTextDataProcessor is applicable.
108
	 *
109
	 * @param elementProcessors
110
	 */
111
	public void setElementProcessors(
112
			Map<String, INaturalLanguageTextDataProcessor> elementProcessors) {
113
		this.elementProcessors = elementProcessors;
114
	}
115

    
116
	/**
117
	 * Looks for technical annotations, if one matches a regular expression of the element processors
118
	 * the associated processor is added to the applicable element processors which will then be applied
119
	 * when generating the description.
120
	 *
121
	 * @param annotations the set of annotations of the description
122
	 */
123
	private void initNaturalLanguageDescriptionElementProcessors(Set<Annotation> annotations) {
124

    
125
		if(annotations != null){
126
			for(Annotation annotation : annotations){
127
				if(annotation.getAnnotationType().equals(AnnotationType.TECHNICAL())){
128
					if (elementProcessors!=null){
129
						for(String regex : elementProcessors.keySet()){
130
							if(annotation.getText().matches(regex)){
131
								applicableElementProcessors.add(elementProcessors.get(regex));
132
							}
133
						}
134
					}
135
				}
136
			}
137
		}
138
	}
139

    
140

    
141
	/**
142
	 * Applies the list of applicable processors to a TextData.
143
	 *
144
	 * @param textData the TextData to be modified
145
	 * @param previousTextData the TextData corresponding to the feature of the previous level in the tree
146
	 */
147
	private void applyNaturalLanguageDescriptionElementProcessors(TextData textData, TextData previousTextData){
148
		for(INaturalLanguageTextDataProcessor processor : applicableElementProcessors){
149
			processor.process(textData, previousTextData);
150
		}
151
	}
152

    
153

    
154
	/**
155
	 * The most simple function to generate a description. The language used is the default one.
156
	 *
157
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
158
	 * @param description the TaxonDescription with all the data
159
	 *
160
	 * @return a list of TextData, each one being a basic element of the natural language description
161
	 */
162
	@Override
163
    public List<TextData> generateNaturalLanguageDescription(TermTree featureTree,TaxonDescription description) {
164
		return generateNaturalLanguageDescription(featureTree,description,Language.DEFAULT());
165
	}
166

    
167

    
168

    
169
	/**
170
	 * Generate a description in a specified language.
171
	 *
172
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
173
	 * @param description the TaxonDescription with all the data
174
	 * @param language the language in which the description has to be printed
175
	 *
176
	 * @return a list of TextData, each one being a basic element of the natural language description
177
	 */
178
	@Override
179
    public List<TextData> generateNaturalLanguageDescription(TermTree featureTree, TaxonDescription description,	Language language) {
180
		List<Language> languages = new ArrayList<Language>();
181
		languages.add(language);
182
		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
183
		return generatePreferredNaturalLanguageDescription(featureTree,description,languages);
184
	}
185

    
186
	/**
187
	 * Generate a description with a specified list of preferred languages.
188
	 *
189
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
190
	 * @param description the TaxonDescription with all the data
191
	 * @param languages the ordered list of languages preferred for printing the description
192
	 *
193
	 * @return a list of TextData, each one being a basic element of the natural language description
194
	 */
195
	@Override
196
    public List<TextData> generatePreferredNaturalLanguageDescription(TermTree featureTree,TaxonDescription description, List<Language> languages) {
197
		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
198
		return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0);
199
	}
200

    
201
	/**
202
	 * Generate a description as a single paragraph in a TextData.
203
	 *
204
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
205
	 * @param description the TaxonDescription with all the data
206
	 *
207
	 * @return a TextData in the default language.
208
	 */
209
	@Override
210
    public TextData generateSingleTextData(TermTree featureTree, TaxonDescription description) {
211
		return generateSingleTextData(featureTree,description,Language.DEFAULT());
212
	}
213

    
214
	/**
215
	 * Generate a description as a single paragraph in a TextData.
216
	 *
217
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
218
	 * @param description the TaxonDescription with all the data
219
	 * @param language the language in which the description has to be printed
220
	 *
221
	 * @return a TextData in the specified language.
222
	 */
223
	@Override
224
    public TextData generateSingleTextData(TermTree featureTree, TaxonDescription description, Language language) {
225
		List<Language> languages = new ArrayList<Language>();
226
		languages.add(language);
227
		return generatePreferredSingleTextData(featureTree,description,languages);
228
	}
229

    
230
	/**
231
	 * Generate a description with a specified list of preferred languages.
232
	 *
233
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
234
	 * @param description the TaxonDescription with all the data
235
	 * @param languages the ordered list of languages preferred for printing the description
236
	 *
237
	 * @return a TextData using the languages (in the given order of preference)
238
	 */
239
	@Override
240
    public TextData generatePreferredSingleTextData(TermTree featureTree, TaxonDescription description, List<Language> languages) {
241
		levels.clear(); // before the start, the table containing the levels of each node must be cleared
242
		// Note: this is not the most efficient way to keep track of the levels of the nodes but it allows some flexibility
243
		List<TextData> texts = generatePreferredNaturalLanguageDescription(featureTree,description, languages);// first get the description as a raw list of TextData
244

    
245
		StringBuilder descriptionStringBuilder = new StringBuilder(); // the StringBuilder used to generate the description
246
		int i = 0,j,level; // i is used to store the index of the TextData to use
247
		boolean startSentence = false, firstOne = true;
248

    
249
		for (j=0 ; j<levels.size() ; j++){
250
			level = levels.get(j);
251
			if (level==-1){
252
				if ((j+1)<levels.size() && levels.get(j+1).equals(0)){ // if this node is the direct father of a leaf
253
					descriptionStringBuilder.append(secondSeparator + " ");
254
					startSentence=true;
255
					firstOne=false;
256
					String asString = texts.get(i).getText(Language.DEFAULT()).toString();
257
					if (asString.length()>1) {
258
                        descriptionStringBuilder.append(asString.substring(0,1).toUpperCase() + asString.substring(1));
259
                    }
260
				}
261
				i++;
262
			}
263
			else if (level==0) { // if this node is a leaf
264
				if (startSentence) {
265
                    descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
266
                } else {
267
                    descriptionStringBuilder.append(firstSeparator + texts.get(i).getText(Language.DEFAULT()));
268
                }
269
				startSentence=false;
270
				i++;
271
			}
272
			else {
273
				if (!firstOne && levels.get(j-1).equals(0)){ // if this node corresponds to the states linked to the previous leaf
274
					if (i<texts.size()) {
275
                        descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
276
                    }
277
					i++;
278
				}
279
			}
280
		}
281
		descriptionStringBuilder.append(secondSeparator);
282
		String returnString = descriptionStringBuilder.toString();
283
		returnString = StringUtils.replace(returnString, "  ", " ");
284
		returnString = StringUtils.removeStart(returnString, secondSeparator + " ");
285
		return TextData.NewInstance(returnString,Language.DEFAULT(),TextFormat.NewInstance("", "Text", ""));
286
	}
287

    
288

    
289

    
290
	/** recursive function that goes through a tree containing the order in which the description has to be generated,
291
	 *  if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
292
	 *
293
	 * @param children the children of the feature node considered
294
	 * @param parent the feature node considered
295
	 * @param description the TaxonDescription element for which we want a natural language output
296
	 * @param language The language in which the description has to be written
297
	 * @param floor integer to keep track of the level in the tree
298
	 * @return a list of TextData elements containing the part of description corresponding to the feature node considered
299
	 */
300
	private List<TextData> buildBranchesDescr(List<TermNode> children, TermNode<Feature> parent, TaxonDescription description, List<Language> languages, int floor) {
301
		List<TextData> listTextData = new ArrayList<TextData>();
302
		floor++; // counter to know the current level in the tree
303

    
304
		if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
305
			levels.add(new Integer(floor)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List<TextData>
306
			Feature feature = parent.getTerm();
307
			TextData featureName;
308
			if (feature!=null && feature.getLabel()!=null){ // if a node is associated to a feature
309
				featureName = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
310
				levels.add(new Integer(-1)); // it is indicated by a '-1' after its level
311
				listTextData.add(featureName); // the TextData representing the name of the feature is concatenated to the list
312
			}
313
            else {
314
                featureName = new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached)
315
            }
316

    
317
			for (Iterator<TermNode> ifn = children.iterator() ; ifn.hasNext() ;){
318
				previousTextData = featureName; // this allows to keep track of the name of the feature one level up in the tree
319
				TermNode fn = ifn.next();
320
				listTextData.addAll(buildBranchesDescr(fn.getChildNodes(),fn,description, languages, floor));
321
			}
322
		}
323
		else { //once a leaf is reached
324
			Feature feature = parent.getTerm();
325
			if (feature!=null && (feature.isSupportsQuantitativeData() || feature.isSupportsCategoricalData())) {
326
				Set<DescriptionElementBase> elements = description.getElements();
327
				for (Iterator<DescriptionElementBase> deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
328
					DescriptionElementBase descriptionElement = deb.next();
329
					if (descriptionElement.getFeature().equals(feature)){ // if one matches the corresponding feature associated to this leaf
330
						if (descriptionElement instanceof CategoricalData || descriptionElement instanceof QuantitativeData){
331
							TextData featureTextData;
332
							TextData statesTextData;
333
							if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData
334
								CategoricalData categoricalData = (CategoricalData) descriptionElement;
335
								statesTextData = categoricalDescriptionBuilder.build(categoricalData, languages);
336
								featureTextData = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
337
							}
338
							else { // if this description is a QuantitativeData, generate the according TextData
339
								QuantitativeData quantitativeData = (QuantitativeData) descriptionElement;
340
								statesTextData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
341
								featureTextData = quantitativeDescriptionBuilder.buildTextDataFeature(feature, languages);
342
							}
343
							applyNaturalLanguageDescriptionElementProcessors(featureTextData, previousTextData);
344
							levels.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree
345
							listTextData.add(featureTextData);
346
							levels.add(new Integer(floor)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature
347
							listTextData.add(statesTextData);
348
						}
349
					}
350
				}
351
			}
352
		}
353
		return listTextData;
354
	}
355

    
356
}
(75-75/100)