Project

General

Profile

Download (15.3 KB) Statistics
| Branch: | Tag: | Revision:
1
package eu.etaxonomy.cdm.api.service;
2

    
3
import java.util.ArrayList;
4
import java.util.HashSet;
5
import java.util.Iterator;
6
import java.util.List;
7
import java.util.Map;
8
import java.util.Set;
9

    
10
import org.apache.commons.lang.StringUtils;
11
import org.apache.logging.log4j.LogManager;import org.apache.logging.log4j.Logger;
12
import org.springframework.stereotype.Component;
13

    
14
import eu.etaxonomy.cdm.format.description.DefaultCategoricalDescriptionBuilder;
15
import eu.etaxonomy.cdm.format.description.DefaultQuantitativeDescriptionBuilder;
16
import eu.etaxonomy.cdm.format.description.DescriptionBuilder;
17
import eu.etaxonomy.cdm.model.common.Annotation;
18
import eu.etaxonomy.cdm.model.common.AnnotationType;
19
import eu.etaxonomy.cdm.model.common.Language;
20
import eu.etaxonomy.cdm.model.description.CategoricalData;
21
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
22
import eu.etaxonomy.cdm.model.description.Feature;
23
import eu.etaxonomy.cdm.model.description.QuantitativeData;
24
import eu.etaxonomy.cdm.model.description.TaxonDescription;
25
import eu.etaxonomy.cdm.model.description.TextData;
26
import eu.etaxonomy.cdm.model.description.TextFormat;
27
import eu.etaxonomy.cdm.model.term.TermTree;
28
import eu.etaxonomy.cdm.model.term.TermNode;
29

    
30

    
31
/**
32
 * Generator of natural language descriptions from TaxonDescriptions.
33
 *
34
 * @author m.venin
35
 * @since 13.04.2010
36
 */
37
@Component
38
public class NaturalLanguageGenerator implements INaturalLanguageGenerator {
39
	
40
	@SuppressWarnings("unused")
41
	private static final Logger logger = LogManager.getLogger(NaturalLanguageGenerator.class);
42

    
43
	private String firstSeparator = ",";
44
	private String secondSeparator = ".";
45
	private List<Integer> levels = new ArrayList<>();
46

    
47
	private DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder = new DefaultQuantitativeDescriptionBuilder();
48
	private DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder = new DefaultCategoricalDescriptionBuilder();
49

    
50
	private TextData previousTextData;
51

    
52
	private DeltaTextDataProcessor deltaTextDataProcessor = new DeltaTextDataProcessor();
53

    
54
	private Map<String, INaturalLanguageTextDataProcessor> elementProcessors;
55

    
56
	private Set<INaturalLanguageTextDataProcessor> applicableElementProcessors = new HashSet<INaturalLanguageTextDataProcessor>();
57

    
58
	/**
59
	 * Change the first separator used by generateSingleTextData. By default ",".
60
	 *
61
	 * @param separator
62
	 */
63
	public void setFirstSeparator(String separator){
64
		firstSeparator=separator;
65
	}
66

    
67
	public String getFirstSeparator(){
68
		return firstSeparator;
69
	}
70

    
71
	/**
72
	 * Change the second separator used by generateSingleTextData. By default ".".
73
	 *
74
	 * @param separator
75
	 */
76
	public void setSecondSeparator(String separator){
77
		secondSeparator=separator;
78
	}
79

    
80
	public String getSecondSeparator(){
81
		return secondSeparator;
82
	}
83

    
84
	/**
85
	 * @param quantitativeDescriptionBuilder
86
	 */
87
	public void setQuantitativeDescriptionBuilder(DescriptionBuilder<QuantitativeData> quantitativeDescriptionBuilder){
88
		this.quantitativeDescriptionBuilder = quantitativeDescriptionBuilder;
89
	}
90

    
91
	/**
92
	 * @param categoricalDescriptionBuilder
93
	 */
94
	public void setCategoricalDescriptionBuilder(DescriptionBuilder<CategoricalData> categoricalDescriptionBuilder){
95
		this.categoricalDescriptionBuilder = categoricalDescriptionBuilder;
96
	}
97

    
98
	/**
99
	 * @return the element processors of this generator
100
	 */
101
	public Map<String, INaturalLanguageTextDataProcessor> getElementProcessors() {
102
		return elementProcessors;
103
	}
104

    
105
	/**
106
	 * The keys of the elementProcessors map are regular expressions which are
107
	 * being used to identify the those Descriptions to which the mapped
108
	 * NaturalLanguageTextDataProcessor is applicable.
109
	 *
110
	 * @param elementProcessors
111
	 */
112
	public void setElementProcessors(
113
			Map<String, INaturalLanguageTextDataProcessor> elementProcessors) {
114
		this.elementProcessors = elementProcessors;
115
	}
116

    
117
	/**
118
	 * Looks for technical annotations, if one matches a regular expression of the element processors
119
	 * the associated processor is added to the applicable element processors which will then be applied
120
	 * when generating the description.
121
	 *
122
	 * @param annotations the set of annotations of the description
123
	 */
124
	private void initNaturalLanguageDescriptionElementProcessors(Set<Annotation> annotations) {
125

    
126
		if(annotations != null){
127
			for(Annotation annotation : annotations){
128
				if(annotation.getAnnotationType().equals(AnnotationType.TECHNICAL())){
129
					if (elementProcessors!=null){
130
						for(String regex : elementProcessors.keySet()){
131
							if(annotation.getText().matches(regex)){
132
								applicableElementProcessors.add(elementProcessors.get(regex));
133
							}
134
						}
135
					}
136
				}
137
			}
138
		}
139
	}
140

    
141

    
142
	/**
143
	 * Applies the list of applicable processors to a TextData.
144
	 *
145
	 * @param textData the TextData to be modified
146
	 * @param previousTextData the TextData corresponding to the feature of the previous level in the tree
147
	 */
148
	private void applyNaturalLanguageDescriptionElementProcessors(TextData textData, TextData previousTextData){
149
		for(INaturalLanguageTextDataProcessor processor : applicableElementProcessors){
150
			processor.process(textData, previousTextData);
151
		}
152
	}
153

    
154

    
155
	/**
156
	 * The most simple function to generate a description. The language used is the default one.
157
	 *
158
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
159
	 * @param description the TaxonDescription with all the data
160
	 *
161
	 * @return a list of TextData, each one being a basic element of the natural language description
162
	 */
163
	@Override
164
    public List<TextData> generateNaturalLanguageDescription(TermTree featureTree,TaxonDescription description) {
165
		return generateNaturalLanguageDescription(featureTree,description,Language.DEFAULT());
166
	}
167

    
168

    
169

    
170
	/**
171
	 * Generate a description in a specified language.
172
	 *
173
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
174
	 * @param description the TaxonDescription with all the data
175
	 * @param language the language in which the description has to be printed
176
	 *
177
	 * @return a list of TextData, each one being a basic element of the natural language description
178
	 */
179
	@Override
180
    public List<TextData> generateNaturalLanguageDescription(TermTree featureTree, TaxonDescription description,	Language language) {
181
		List<Language> languages = new ArrayList<Language>();
182
		languages.add(language);
183
		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
184
		return generatePreferredNaturalLanguageDescription(featureTree,description,languages);
185
	}
186

    
187
	/**
188
	 * Generate a description with a specified list of preferred languages.
189
	 *
190
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
191
	 * @param description the TaxonDescription with all the data
192
	 * @param languages the ordered list of languages preferred for printing the description
193
	 *
194
	 * @return a list of TextData, each one being a basic element of the natural language description
195
	 */
196
	@Override
197
    public List<TextData> generatePreferredNaturalLanguageDescription(TermTree featureTree,TaxonDescription description, List<Language> languages) {
198
		initNaturalLanguageDescriptionElementProcessors(description.getAnnotations());
199
		return buildBranchesDescr(featureTree.getRootChildren(), featureTree.getRoot(), description, languages,0);
200
	}
201

    
202
	/**
203
	 * Generate a description as a single paragraph in a TextData.
204
	 *
205
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
206
	 * @param description the TaxonDescription with all the data
207
	 *
208
	 * @return a TextData in the default language.
209
	 */
210
	@Override
211
    public TextData generateSingleTextData(TermTree featureTree, TaxonDescription description) {
212
		return generateSingleTextData(featureTree,description,Language.DEFAULT());
213
	}
214

    
215
	/**
216
	 * Generate a description as a single paragraph in a TextData.
217
	 *
218
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
219
	 * @param description the TaxonDescription with all the data
220
	 * @param language the language in which the description has to be printed
221
	 *
222
	 * @return a TextData in the specified language.
223
	 */
224
	@Override
225
    public TextData generateSingleTextData(TermTree featureTree, TaxonDescription description, Language language) {
226
		List<Language> languages = new ArrayList<Language>();
227
		languages.add(language);
228
		return generatePreferredSingleTextData(featureTree,description,languages);
229
	}
230

    
231
	/**
232
	 * Generate a description with a specified list of preferred languages.
233
	 *
234
	 * @param featureTree the FeatureTree holding the order in which features and their states must be printed
235
	 * @param description the TaxonDescription with all the data
236
	 * @param languages the ordered list of languages preferred for printing the description
237
	 *
238
	 * @return a TextData using the languages (in the given order of preference)
239
	 */
240
	@Override
241
    public TextData generatePreferredSingleTextData(TermTree featureTree, TaxonDescription description, List<Language> languages) {
242
		levels.clear(); // before the start, the table containing the levels of each node must be cleared
243
		// Note: this is not the most efficient way to keep track of the levels of the nodes but it allows some flexibility
244
		List<TextData> texts = generatePreferredNaturalLanguageDescription(featureTree,description, languages);// first get the description as a raw list of TextData
245

    
246
		StringBuilder descriptionStringBuilder = new StringBuilder(); // the StringBuilder used to generate the description
247
		int i = 0,j,level; // i is used to store the index of the TextData to use
248
		boolean startSentence = false, firstOne = true;
249

    
250
		for (j=0 ; j<levels.size() ; j++){
251
			level = levels.get(j);
252
			if (level==-1){
253
				if ((j+1)<levels.size() && levels.get(j+1).equals(0)){ // if this node is the direct father of a leaf
254
					descriptionStringBuilder.append(secondSeparator + " ");
255
					startSentence=true;
256
					firstOne=false;
257
					String asString = texts.get(i).getText(Language.DEFAULT()).toString();
258
					if (asString.length()>1) {
259
                        descriptionStringBuilder.append(asString.substring(0,1).toUpperCase() + asString.substring(1));
260
                    }
261
				}
262
				i++;
263
			}
264
			else if (level==0) { // if this node is a leaf
265
				if (startSentence) {
266
                    descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
267
                } else {
268
                    descriptionStringBuilder.append(firstSeparator + texts.get(i).getText(Language.DEFAULT()));
269
                }
270
				startSentence=false;
271
				i++;
272
			}
273
			else {
274
				if (!firstOne && levels.get(j-1).equals(0)){ // if this node corresponds to the states linked to the previous leaf
275
					if (i<texts.size()) {
276
                        descriptionStringBuilder.append(texts.get(i).getText(Language.DEFAULT()));
277
                    }
278
					i++;
279
				}
280
			}
281
		}
282
		descriptionStringBuilder.append(secondSeparator);
283
		String returnString = descriptionStringBuilder.toString();
284
		returnString = StringUtils.replace(returnString, "  ", " ");
285
		returnString = StringUtils.removeStart(returnString, secondSeparator + " ");
286
		return TextData.NewInstance(returnString,Language.DEFAULT(),TextFormat.NewInstance("", "Text", ""));
287
	}
288

    
289

    
290

    
291
	/** recursive function that goes through a tree containing the order in which the description has to be generated,
292
	 *  if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
293
	 *
294
	 * @param children the children of the feature node considered
295
	 * @param parent the feature node considered
296
	 * @param description the TaxonDescription element for which we want a natural language output
297
	 * @param language The language in which the description has to be written
298
	 * @param floor integer to keep track of the level in the tree
299
	 * @return a list of TextData elements containing the part of description corresponding to the feature node considered
300
	 */
301
	private List<TextData> buildBranchesDescr(List<TermNode> children, TermNode<Feature> parent, TaxonDescription description, List<Language> languages, int floor) {
302
		List<TextData> listTextData = new ArrayList<TextData>();
303
		floor++; // counter to know the current level in the tree
304

    
305
		if (!parent.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
306
			levels.add(new Integer(floor)); // the level of the different nodes in the tree are kept, thus it is easier to build a structured text out of the List<TextData>
307
			Feature feature = parent.getTerm();
308
			TextData featureName;
309
			if (feature!=null && feature.getLabel()!=null){ // if a node is associated to a feature
310
				featureName = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
311
				levels.add(new Integer(-1)); // it is indicated by a '-1' after its level
312
				listTextData.add(featureName); // the TextData representing the name of the feature is concatenated to the list
313
			}
314
            else {
315
                featureName = new TextData(); // else an empty TextData is created (because we keep track of the features, it is useful to inform when the upper node has no feature attached)
316
            }
317

    
318
			for (Iterator<TermNode> ifn = children.iterator() ; ifn.hasNext() ;){
319
				previousTextData = featureName; // this allows to keep track of the name of the feature one level up in the tree
320
				TermNode fn = ifn.next();
321
				listTextData.addAll(buildBranchesDescr(fn.getChildNodes(),fn,description, languages, floor));
322
			}
323
		}
324
		else { //once a leaf is reached
325
			Feature feature = parent.getTerm();
326
			if (feature!=null && (feature.isSupportsQuantitativeData() || feature.isSupportsCategoricalData())) {
327
				Set<DescriptionElementBase> elements = description.getElements();
328
				for (Iterator<DescriptionElementBase> deb = elements.iterator() ; deb.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
329
					DescriptionElementBase descriptionElement = deb.next();
330
					if (descriptionElement.getFeature().equals(feature)){ // if one matches the corresponding feature associated to this leaf
331
						if (descriptionElement instanceof CategoricalData || descriptionElement instanceof QuantitativeData){
332
							TextData featureTextData;
333
							TextData statesTextData;
334
							if (descriptionElement instanceof CategoricalData) { // if this description is a CategoricalData, generate the according TextData
335
								CategoricalData categoricalData = (CategoricalData) descriptionElement;
336
								statesTextData = categoricalDescriptionBuilder.build(categoricalData, languages);
337
								featureTextData = categoricalDescriptionBuilder.buildTextDataFeature(feature, languages);
338
							}
339
							else { // if this description is a QuantitativeData, generate the according TextData
340
								QuantitativeData quantitativeData = (QuantitativeData) descriptionElement;
341
								statesTextData = quantitativeDescriptionBuilder.build(quantitativeData, languages);
342
								featureTextData = quantitativeDescriptionBuilder.buildTextDataFeature(feature, languages);
343
							}
344
							applyNaturalLanguageDescriptionElementProcessors(featureTextData, previousTextData);
345
							levels.add(new Integer(0)); // 0 indicates a feature, which is a leaf of the tree
346
							listTextData.add(featureTextData);
347
							levels.add(new Integer(floor)); // this represents the level of the feature and means it is followed by a TextData containing the states of the feature
348
							listTextData.add(statesTextData);
349
						}
350
					}
351
				}
352
			}
353
		}
354
		return listTextData;
355
	}
356

    
357
}
(71-71/95)