Merge branch 'release/5.0.0'
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / search / SearchResultBuilder.java
1 /**
2 * Copyright (C) 2012 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.api.service.search;
10
11 import java.io.IOException;
12 import java.util.ArrayList;
13 import java.util.List;
14 import java.util.Map;
15
16 import org.apache.commons.lang.ArrayUtils;
17 import org.apache.commons.lang.StringUtils;
18 import org.apache.log4j.Logger;
19 import org.apache.lucene.document.Document;
20 import org.apache.lucene.index.CorruptIndexException;
21 import org.apache.lucene.search.MultiTermQuery;
22 import org.apache.lucene.search.Query;
23 import org.apache.lucene.search.ScoreDoc;
24 import org.apache.lucene.search.TopDocs;
25 import org.apache.lucene.search.WildcardQuery;
26 import org.apache.lucene.search.grouping.GroupDocs;
27 import org.apache.lucene.search.grouping.TopGroups;
28 import org.apache.lucene.util.BytesRef;
29 import org.hibernate.search.engine.ProjectionConstants;
30
31 import eu.etaxonomy.cdm.model.CdmBaseType;
32 import eu.etaxonomy.cdm.model.common.CdmBase;
33 import eu.etaxonomy.cdm.persistence.dao.common.ICdmEntityDao;
34
35 /**
36 * @author Andreas Kohlbecker
37 * @since Jan 6, 2012
38 *
39 */
40 public class SearchResultBuilder implements ISearchResultBuilder {
41
42 public static final Logger logger = Logger.getLogger(SearchResultBuilder.class);
43
44 /* (non-Javadoc)
45 * @see eu.etaxonomy.cdm.api.service.search.ISearchResultBuilder#createResultSetFromIds(eu.etaxonomy.cdm.search.LuceneSearch, org.apache.lucene.search.TopDocs, eu.etaxonomy.cdm.persistence.dao.common.ICdmEntityDao, java.lang.String)
46 */
47 private Query query;
48 /**
49 * fragmentNumber - max number of sentence fragments to return
50 */
51 private final int fragmentNumber = 5;
52 /**
53 * fragmentSize - the max number of characters for each fragment
54 */
55 private final int fragmentSize = 100;
56 private final LuceneSearch luceneSearch;
57
58 /**
59 * Use this constructor if you do not wish to retrieve highlighted terms found in the best sections of a text.
60 * @param luceneSearch
61 */
62 public SearchResultBuilder(LuceneSearch luceneSearch){
63 this.luceneSearch = luceneSearch;
64 }
65
66 /**
67 * @param luceneSearch
68 * @param query the Query will be used to highlight matching fragments if the <code>highlightFields</code> property is supplied to
69 * {@link #createResultSet(TopDocs, String[], ICdmEntityDao, String, List)}
70 */
71 public SearchResultBuilder(LuceneSearch luceneSearch, Query query){
72 this.luceneSearch = luceneSearch;
73 this.query = query;
74 }
75
76 /**
77 * {@inheritDoc}
78 *
79 * <h3>NOTE:</h3> All {@link MultiTermQuery} like {@link WildcardQuery} are
80 * constant score by default since Lucene 2.9, you can change that back to
81 * scoring mode: <code>WildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)</code>
82 * This slows down the query immense or throws TooManyClauses exceptions if
83 * too many terms match the wildcard.
84 */
85 @Override
86 public <T extends CdmBase> List<SearchResult<T>> createResultSet(TopGroups<BytesRef> topGroupsResultSet,
87 String[] highlightFields, ICdmEntityDao<T> dao, Map<CdmBaseType, String> idFields, List<String> propertyPaths) throws CorruptIndexException, IOException {
88
89 List<SearchResult<T>> searchResults = new ArrayList<SearchResult<T>>();
90
91 if(topGroupsResultSet == null){
92 return searchResults;
93 }
94
95 SearchResultHighligther highlighter = null;
96 if(highlightFields != null && highlightFields.length > 0){
97 highlighter = new SearchResultHighligther();
98 }
99
100 for (GroupDocs groupDoc : topGroupsResultSet.groups) {
101
102 String cdmEntityId = null;
103 SearchResult<T> searchResult = new SearchResult<T>();
104 for(ScoreDoc scoreDoc : groupDoc.scoreDocs) {
105 Document document = luceneSearch.getSearcher().doc(scoreDoc.doc);
106 searchResult.addDoc(document);
107
108 if(cdmEntityId == null){
109 // IMPORTANT: here we assume that all documents refer to the same cdm entity
110 cdmEntityId = findId(idFields, document);
111 }
112 }
113
114 // set score values
115 if(isNumber(groupDoc.maxScore)){
116 searchResult.setScore(groupDoc.maxScore);
117 }
118
119 if(isNumber(topGroupsResultSet.maxScore)){
120 searchResult.setMaxScore(topGroupsResultSet.maxScore);
121 }
122
123 //TODO use findByUuid(List<UUID> uuids, List<Criterion> criteria, List<String> propertyPaths)
124 // instead or even better a similar findById(List<Integer> ids) however this is not yet implemented
125 if(cdmEntityId != null){
126 T entity = dao.load(Integer.valueOf(cdmEntityId), propertyPaths);
127 searchResult.setEntity(entity);
128 }
129
130 // add highlight fragments
131 if(highlighter != null){
132 Map<String, String[]> fieldFragmentMap = null;
133 for(Document doc: searchResult.getDocs()){
134 fieldFragmentMap = merge(fieldFragmentMap, highlighter.getFragmentsWithHighlightedTerms(luceneSearch.getAnalyzer(), query, highlightFields, doc, fragmentNumber, fragmentSize));
135 }
136 searchResult.setFieldHighlightMap(fieldFragmentMap);
137 }
138
139 // finally add the final result to the list
140 searchResults.add(searchResult);
141 }
142
143 return searchResults;
144 }
145
146 /**
147 * {@inheritDoc}
148 *
149 */
150 @Override
151 public <T extends CdmBase> List<SearchResult<T>> createResultSet(TopDocs topDocs,
152 String[] highlightFields, ICdmEntityDao<T> dao, Map<CdmBaseType, String> idFields, List<String> propertyPaths) throws CorruptIndexException, IOException {
153
154 List<SearchResult<T>> searchResults = new ArrayList<SearchResult<T>>();
155
156 if(topDocs == null){
157 return searchResults;
158 }
159
160 SearchResultHighligther highlighter = null;
161 if(highlightFields != null && highlightFields.length > 0){
162 highlighter = new SearchResultHighligther();
163 }
164
165 for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
166
167 String cdmEntityId = null;
168 SearchResult<T> searchResult = new SearchResult<T>();
169
170 Document document = luceneSearch.getSearcher().doc(scoreDoc.doc);
171 searchResult.addDoc(document);
172
173 if(cdmEntityId == null){
174 cdmEntityId = findId(idFields, document);
175 }
176
177 //TODO use findByUuid(List<UUID> uuids, List<Criterion> criteria, List<String> propertyPaths)
178 // instead or even better a similar findById(List<Integer> ids) however this is not yet implemented
179 if(cdmEntityId != null){
180 T entity = dao.load(Integer.valueOf(cdmEntityId), propertyPaths);
181 searchResult.setEntity(entity);
182 }
183 searchResult.setScore(scoreDoc.score);
184 searchResult.setMaxScore(scoreDoc.score);
185 // add highlight fragments
186 if(highlighter != null){
187 Map<String, String[]> fieldFragmentMap = null;
188 for(Document doc: searchResult.getDocs()){
189 fieldFragmentMap = merge(fieldFragmentMap, highlighter.getFragmentsWithHighlightedTerms(luceneSearch.getAnalyzer(), query, highlightFields, doc, fragmentNumber, fragmentSize));
190 }
191 searchResult.setFieldHighlightMap(fieldFragmentMap);
192 }
193
194 // finally add the final result to the list
195 searchResults.add(searchResult);
196 }
197
198 return searchResults;
199 }
200
201
202 /**
203 * {@inheritDoc}
204 *
205 */
206 @Override
207 public List<DocumentSearchResult> createResultSet(TopDocs topDocs, String[] highlightFields) throws CorruptIndexException, IOException {
208
209 List<DocumentSearchResult> searchResults = new ArrayList<DocumentSearchResult>();
210
211 if(topDocs == null){
212 return searchResults;
213 }
214
215 SearchResultHighligther highlighter = null;
216 if(highlightFields != null && highlightFields.length > 0){
217 highlighter = new SearchResultHighligther();
218 }
219
220 for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
221
222 String cdmEntityId = null;
223 DocumentSearchResult searchResult = new DocumentSearchResult();
224
225 Document document = luceneSearch.getSearcher().doc(scoreDoc.doc);
226 searchResult.addDoc(document);
227
228 searchResult.setScore(scoreDoc.score);
229 searchResult.setMaxScore(scoreDoc.score);
230 // add highlight fragments
231 if(highlighter != null){
232 Map<String, String[]> fieldFragmentMap = null;
233 for(Document doc: searchResult.getDocs()){
234 fieldFragmentMap = merge(fieldFragmentMap, highlighter.getFragmentsWithHighlightedTerms(luceneSearch.getAnalyzer(), query, highlightFields, doc, fragmentNumber, fragmentSize));
235 }
236 searchResult.setFieldHighlightMap(fieldFragmentMap);
237 }
238
239 // finally add the final result to the list
240 searchResults.add(searchResult);
241 }
242
243 return searchResults;
244 }
245 /**
246 * @param base
247 * @param add
248 * @return
249 */
250 private Map<String, String[]> merge(Map<String, String[]> base, Map<String, String[]> add) {
251 if(base == null){
252 return add;
253 } else {
254 for(String key : add.keySet()) {
255 if (base.containsKey(key)){
256 base.put(key, (String[]) ArrayUtils.addAll(base.get(key), add.get(key)));
257 } else {
258 base.put(key, add.get(key));
259 }
260 }
261 return base;
262 }
263 }
264
265 /**
266 * find the entity id
267 *
268 * @param idFields
269 * @param doc
270 * @return
271 */
272 private String findId(Map<CdmBaseType,String> idFieldMap, Document doc) {
273
274 String docClassName = doc.getValues(ProjectionConstants.OBJECT_CLASS)[0];
275
276 String id = null;
277 for(CdmBaseType baseType : idFieldMap.keySet()){
278 if(baseType.getSubClassNames().contains(docClassName)){
279 String[] idStrings = doc.getValues(idFieldMap.get(baseType));
280 if(idStrings.length > 0 && StringUtils.isNotBlank(idStrings[0])){
281 id = idStrings[0];
282 break;
283 }
284 }
285 }
286 if(id == null){
287 throw new RuntimeException("No id field name given for " + docClassName);
288 }
289 return id;
290 }
291
292 /**
293 * @param number
294 * @return
295 */
296 private boolean isNumber(Float number) {
297 return !Double.isNaN(number) && !Double.isInfinite(number);
298 }
299
300 }