1 package eu
.etaxonomy
.cdm
.api
.service
;
3 import java
.util
.ArrayList
;
4 import java
.util
.HashMap
;
5 import java
.util
.HashSet
;
6 import java
.util
.Iterator
;
10 import java
.util
.regex
.Pattern
;
12 import org
.apache
.commons
.lang
.StringUtils
;
13 import org
.springframework
.stereotype
.Component
;
15 import eu
.etaxonomy
.cdm
.model
.description
.CategoricalData
;
16 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
17 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
18 import eu
.etaxonomy
.cdm
.model
.description
.FeatureNode
;
19 import eu
.etaxonomy
.cdm
.model
.description
.FeatureTree
;
20 import eu
.etaxonomy
.cdm
.model
.description
.QuantitativeData
;
21 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
22 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
23 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
24 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
25 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
28 public class NaturalLanguageGenerator
implements INaturalLanguageGenerator
{
30 private DescriptionBuilder
<QuantitativeData
> quantitativeDescriptionBuilder
= new DefaultQuantitativeDescriptionBuilder();
31 private DescriptionBuilder
<CategoricalData
> categoricalDescriptionBuilder
= new DefaultCategoricalDescriptionBuilder();
33 private String previousFeatureName
;
35 private Map
<String
, INaturalLanguageTextDataProcessor
> elementProcessors
;
37 public Map
<String
, INaturalLanguageTextDataProcessor
> getElementProcessors() {
38 return elementProcessors
;
42 * The keys of the elementProcessors map are regular expressions which are
43 * being used to identify the those Descriptions to which the mapped
44 * NaturalLanguageTextDataProcessor is applicable.
46 * @param elementProcessors
48 public void setElementProcessors(
49 Map
<String
, INaturalLanguageTextDataProcessor
> elementProcessors
) {
50 this.elementProcessors
= elementProcessors
;
53 private Set
<INaturalLanguageTextDataProcessor
> applicableElementProcessors
= new HashSet
<INaturalLanguageTextDataProcessor
>();
58 private void initNaturalLanguageDescriptionElementProcessors(Set
<Annotation
> annotations
) {
60 if(annotations
!= null){
61 for(Annotation annotation
: annotations
){
62 if(annotation
.getAnnotationType().equals(AnnotationType
.TECHNICAL())){
63 for(String regex
: elementProcessors
.keySet()){
64 if(annotation
.getText().matches(regex
)){
65 applicableElementProcessors
.add(elementProcessors
.get(regex
));
76 private void applyNaturalLanguageDescriptionElementProcessors(TextData textData
, TextData previousTextData
){
77 for(INaturalLanguageTextDataProcessor processor
: applicableElementProcessors
){
78 processor
.process(textData
, previousTextData
);
85 public List
<TextData
> generateNaturalLanguageDescription(FeatureTree featureTree
,TaxonDescription description
) {
86 List
<Language
> languages
= new ArrayList
<Language
>();
87 languages
.add(Language
.DEFAULT());
88 return buildBranchesDescr(featureTree
.getRootChildren(), featureTree
.getRoot(), description
, languages
);
95 public List
<TextData
> generatePreferredNaturalLanguageDescription(FeatureTree featureTree
,TaxonDescription description
, List
<Language
> languages
) {
96 return buildBranchesDescr(featureTree
.getRootChildren(), featureTree
.getRoot(), description
, languages
);
100 public List
<TextData
> generateNaturalLanguageDescription(FeatureTree featureTree
, TaxonDescription description
, Language language
) {
101 List
<Language
> languages
= new ArrayList
<Language
>();
102 languages
.add(language
);
104 initNaturalLanguageDescriptionElementProcessors(description
.getAnnotations());
106 return buildBranchesDescr(featureTree
.getRootChildren(), featureTree
.getRoot(), description
, languages
);
111 /** recursive function that goes through a tree containing the order in which the description has to be generated,
112 * if an element of this tree matches one of the TaxonDescription, a DescriptionBuilder is called which returns a TextData with the corresponding description.
117 * @param language The language in which the description has to be written
120 private List
<TextData
> buildBranchesDescr(List
<FeatureNode
> children
, FeatureNode parent
, TaxonDescription description
, List
<Language
> languages
) {
121 List
<TextData
> listTextData
= new ArrayList
<TextData
>(); ;
122 if (!parent
.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
123 Feature fref
= parent
.getFeature();
124 for (Iterator
<FeatureNode
> ifn
= children
.iterator() ; ifn
.hasNext() ;){
125 FeatureNode fn
= ifn
.next();
126 listTextData
.addAll(buildBranchesDescr(fn
.getChildren(),fn
,description
, languages
));
129 else { //once a leaf is reached
130 Feature fref
= parent
.getFeature();
131 if (fref
!=null) { // needs a better algorithm
133 Set
<DescriptionElementBase
> elements
= description
.getElements();
134 for (Iterator
<DescriptionElementBase
> deb
= elements
.iterator() ; deb
.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
135 DescriptionElementBase descriptionElement
= deb
.next();
137 if (descriptionElement
.getFeature().equals(fref
)){ // if one matches the corresponding feature associated to this leaf
138 if (descriptionElement
instanceof CategoricalData
) { // if this description is a CategoricalData, generate the according TextData
139 CategoricalData categoricalData
= (CategoricalData
) descriptionElement
;
140 //textData = buildCategoricalDescr(categoricalData, language);
141 textData
= categoricalDescriptionBuilder
.build(categoricalData
, languages
);
142 //textData.putText(fref.getLabel(), Language.DEFAULT());
143 TextData featureName
= TextData
.NewInstance(fref
.getLabel(), Language
.DEFAULT(), null);
144 listTextData
.add(featureName
); // if you want to print the name of the feature (Should it be an option ?)
145 listTextData
.add(textData
);
147 if (descriptionElement
instanceof QuantitativeData
) { // if this description is a QuantitativeData, generate the according TextData
148 QuantitativeData quantitativeData
= (QuantitativeData
) descriptionElement
;
149 textData
= quantitativeDescriptionBuilder
.build(quantitativeData
, languages
);
150 TextData featureName
= TextData
.NewInstance(fref
.getLabel(), Language
.DEFAULT(), null);
151 listTextData
.add(featureName
); // if you want to print the name of the feature
152 listTextData
.add(textData
);
164 * @param quantitativeDescriptionBuilder
166 public void setQuantitativeDescriptionBuilder(DescriptionBuilder
<QuantitativeData
> quantitativeDescriptionBuilder
){
167 this.quantitativeDescriptionBuilder
= quantitativeDescriptionBuilder
;
173 * @param categoricalDescriptionBuilder
175 public void setCategoricalDescriptionBuilder(DescriptionBuilder
<CategoricalData
> categoricalDescriptionBuilder
){
176 this.categoricalDescriptionBuilder
= categoricalDescriptionBuilder
;
186 public String
generateStringNaturalLanguageDescription(FeatureTree featureTree
, TaxonDescription description
, Language language
) {
187 List
<Language
> languages
= new ArrayList
<Language
>();
188 languages
.add(language
);
189 return buildString(featureTree
.getRootChildren(), featureTree
.getRoot(), description
, languages
).toString();
193 * recursive function that goes through a tree containing the order in which
194 * the description has to be generated, if an element of this tree matches
195 * one of the TaxonDescription, a DescriptionBuilder is called which returns
196 * a TextData with the corresponding description.
198 * Also applies the folowing formatting rules which are special for data coming from Delta, DeltaAccess, DiversityDescriptions:
201 * <li><b>(1.A) if( doSkipTextInBrackets) : </b>Label Text in brackets is always skipped the remaining text string I the TEXT_TO_DISPLAY. The text may contain multiple substrings tagged with the brackets. A tagged substring may also occur in the middle of the whole string.</li>
202 * <li><b>(1.B) else : </b>just remove the brackets
203 * <li><b>(2) : </b> If the TEXT_TO_DISPLAY is equal the TEXT_TO_DISPLAY of the previous element output of this text is suppressed.</li>
210 * The language in which the description has to be written
213 private StringBuilder
buildString(List
<FeatureNode
> children
, FeatureNode parent
, TaxonDescription description
, List
<Language
> languages
) {
214 StringBuilder stringbuilder
= new StringBuilder();
215 if (!parent
.isLeaf()){ // if this node is not a leaf, continue recursively (only the leaves of a FeatureTree contain states)
216 for (Iterator
<FeatureNode
> ifn
= children
.iterator() ; ifn
.hasNext() ;){
217 FeatureNode fn
= ifn
.next();
218 StringBuilder tempsb
= buildString(fn
.getChildren(),fn
,description
, languages
);
219 if (tempsb
.length()>1) stringbuilder
.append(tempsb
.deleteCharAt(tempsb
.length()-1));
220 // if (tempsb.length()>1) stringbuilder.append(tempsb);
222 stringbuilder
.append('.');
224 else { //once a leaf is reached
225 Feature fref
= parent
.getFeature();
226 if (fref
!=null) { // needs a better algorithm
228 Set
<DescriptionElementBase
> elements
= description
.getElements();
229 for (Iterator
<DescriptionElementBase
> deb
= elements
.iterator() ; deb
.hasNext() ;){ // iterates over all the descriptions enclosed in the TaxonDescription
230 DescriptionElementBase descriptionElement
= deb
.next();
232 if (descriptionElement
.getFeature().equals(fref
)){ // if one matches the corresponding feature associated to this leaf
233 if (descriptionElement
instanceof CategoricalData
) { // if this description is a CategoricalData, generate the according TextData
234 CategoricalData categoricalData
= (CategoricalData
) descriptionElement
;
235 //textData = buildCategoricalDescr(categoricalData, language);
236 textData
= categoricalDescriptionBuilder
.build(categoricalData
, languages
);
237 //textData.putText(fref.getLabel(), Language.DEFAULT());
238 String featureName
= StringUtils
.substringBefore(fref
.getLabel(), "<");
239 if (previousFeatureName
==null){
240 previousFeatureName
= featureName
;
241 String featureString
= categoricalDescriptionBuilder
.buildFeature(fref
,true);
242 stringbuilder
.append(featureString
.substring(0,1).toUpperCase() + featureString
.substring(1));
244 else if (!featureName
.contains(previousFeatureName
)) {
245 stringbuilder
.append(". ");
246 previousFeatureName
= featureName
;
247 String featureString
= categoricalDescriptionBuilder
.buildFeature(fref
,true);
248 stringbuilder
.append(featureString
.substring(0,1).toUpperCase() + featureString
.substring(1)); // if you want to print the name of the feature (Should it be an option ?)
250 stringbuilder
.append(textData
.getText(Language
.DEFAULT()));
251 stringbuilder
.append(',');
253 if (descriptionElement
instanceof QuantitativeData
) { // if this description is a QuantitativeData, generate the according TextData
254 QuantitativeData quantitativeData
= (QuantitativeData
) descriptionElement
;
255 textData
= quantitativeDescriptionBuilder
.build(quantitativeData
, languages
);
256 String featureName
= StringUtils
.substringBefore(fref
.getLabel(), "<");
257 if (previousFeatureName
==null){
258 previousFeatureName
= featureName
;
259 String featureString
= quantitativeDescriptionBuilder
.buildFeature(fref
,true);
260 stringbuilder
.append(featureString
.substring(0,1).toUpperCase() + featureString
.substring(1));
262 else if (!featureName
.contains(previousFeatureName
)) {
263 stringbuilder
.append(". ");
264 previousFeatureName
= featureName
;
265 String featureString
= quantitativeDescriptionBuilder
.buildFeature(fref
,true);
266 stringbuilder
.append(featureString
.substring(0,1).toUpperCase() + featureString
.substring(1)); // if you want to print the name of the feature (Should it be an option ?)
268 stringbuilder
.append(textData
.getText(Language
.DEFAULT()));
269 stringbuilder
.append(',');
275 return stringbuilder
;