2 * Copyright (C) 2012 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.api
.service
.search
;
11 import java
.io
.IOException
;
12 import java
.util
.ArrayList
;
13 import java
.util
.List
;
16 import org
.apache
.commons
.lang
.ArrayUtils
;
17 import org
.apache
.commons
.lang
.StringUtils
;
18 import org
.apache
.log4j
.Logger
;
19 import org
.apache
.lucene
.document
.Document
;
20 import org
.apache
.lucene
.index
.CorruptIndexException
;
21 import org
.apache
.lucene
.search
.MultiTermQuery
;
22 import org
.apache
.lucene
.search
.Query
;
23 import org
.apache
.lucene
.search
.ScoreDoc
;
24 import org
.apache
.lucene
.search
.TopDocs
;
25 import org
.apache
.lucene
.search
.WildcardQuery
;
26 import org
.apache
.lucene
.search
.grouping
.GroupDocs
;
27 import org
.apache
.lucene
.search
.grouping
.TopGroups
;
28 import org
.apache
.lucene
.util
.BytesRef
;
29 import org
.hibernate
.search
.engine
.ProjectionConstants
;
31 import eu
.etaxonomy
.cdm
.model
.CdmBaseType
;
32 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
33 import eu
.etaxonomy
.cdm
.persistence
.dao
.common
.ICdmEntityDao
;
36 * @author Andreas Kohlbecker
40 public class SearchResultBuilder
implements ISearchResultBuilder
{
42 public static final Logger logger
= Logger
.getLogger(SearchResultBuilder
.class);
45 * @see eu.etaxonomy.cdm.api.service.search.ISearchResultBuilder#createResultSetFromIds(eu.etaxonomy.cdm.search.LuceneSearch, org.apache.lucene.search.TopDocs, eu.etaxonomy.cdm.persistence.dao.common.ICdmEntityDao, java.lang.String)
49 * fragmentNumber - max number of sentence fragments to return
51 private final int fragmentNumber
= 5;
53 * fragmentSize - the max number of characters for each fragment
55 private final int fragmentSize
= 100;
56 private final LuceneSearch luceneSearch
;
59 * Use this constructor if you do not wish to retrieve highlighted terms found in the best sections of a text.
62 public SearchResultBuilder(LuceneSearch luceneSearch
){
63 this.luceneSearch
= luceneSearch
;
68 * @param query the Query will be used to highlight matching fragments if the <code>highlightFields</code> property is supplied to
69 * {@link #createResultSet(TopDocs, String[], ICdmEntityDao, String, List)}
71 public SearchResultBuilder(LuceneSearch luceneSearch
, Query query
){
72 this.luceneSearch
= luceneSearch
;
79 * <h3>NOTE:</h3> All {@link MultiTermQuery} like {@link WildcardQuery} are
80 * constant score by default since Lucene 2.9, you can change that back to
81 * scoring mode: <code>WildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)</code>
82 * This slows down the query immense or throws TooManyClauses exceptions if
83 * too many terms match the wildcard.
86 public <T
extends CdmBase
> List
<SearchResult
<T
>> createResultSet(TopGroups
<BytesRef
> topGroupsResultSet
,
87 String
[] highlightFields
, ICdmEntityDao
<T
> dao
, Map
<CdmBaseType
, String
> idFields
, List
<String
> propertyPaths
) throws CorruptIndexException
, IOException
{
89 List
<SearchResult
<T
>> searchResults
= new ArrayList
<SearchResult
<T
>>();
91 if(topGroupsResultSet
== null){
95 SearchResultHighligther highlighter
= null;
96 if(highlightFields
!= null && highlightFields
.length
> 0){
97 highlighter
= new SearchResultHighligther();
100 for (GroupDocs groupDoc
: topGroupsResultSet
.groups
) {
102 String cdmEntityId
= null;
103 SearchResult
<T
> searchResult
= new SearchResult
<T
>();
104 for(ScoreDoc scoreDoc
: groupDoc
.scoreDocs
) {
105 Document document
= luceneSearch
.getSearcher().doc(scoreDoc
.doc
);
106 searchResult
.addDoc(document
);
108 if(cdmEntityId
== null){
109 // IMPORTANT: here we assume that all documents refer to the same cdm entity
110 cdmEntityId
= findId(idFields
, document
);
115 if(isNumber(groupDoc
.maxScore
)){
116 searchResult
.setScore(groupDoc
.maxScore
);
119 if(isNumber(topGroupsResultSet
.maxScore
)){
120 searchResult
.setMaxScore(topGroupsResultSet
.maxScore
);
123 //TODO use findByUuid(List<UUID> uuids, List<Criterion> criteria, List<String> propertyPaths)
124 // instead or even better a similar findById(List<Integer> ids) however this is not yet implemented
125 if(cdmEntityId
!= null){
126 T entity
= dao
.load(Integer
.valueOf(cdmEntityId
), propertyPaths
);
127 searchResult
.setEntity(entity
);
130 // add highlight fragments
131 if(highlighter
!= null){
132 Map
<String
, String
[]> fieldFragmentMap
= null;
133 for(Document doc
: searchResult
.getDocs()){
134 fieldFragmentMap
= merge(fieldFragmentMap
, highlighter
.getFragmentsWithHighlightedTerms(luceneSearch
.getAnalyzer(), query
, highlightFields
, doc
, fragmentNumber
, fragmentSize
));
136 searchResult
.setFieldHighlightMap(fieldFragmentMap
);
139 // finally add the final result to the list
140 searchResults
.add(searchResult
);
143 return searchResults
;
151 public <T
extends CdmBase
> List
<SearchResult
<T
>> createResultSet(TopDocs topDocs
,
152 String
[] highlightFields
, ICdmEntityDao
<T
> dao
, Map
<CdmBaseType
, String
> idFields
, List
<String
> propertyPaths
) throws CorruptIndexException
, IOException
{
154 List
<SearchResult
<T
>> searchResults
= new ArrayList
<SearchResult
<T
>>();
157 return searchResults
;
160 SearchResultHighligther highlighter
= null;
161 if(highlightFields
!= null && highlightFields
.length
> 0){
162 highlighter
= new SearchResultHighligther();
165 for (ScoreDoc scoreDoc
: topDocs
.scoreDocs
) {
167 String cdmEntityId
= null;
168 SearchResult
<T
> searchResult
= new SearchResult
<T
>();
170 Document document
= luceneSearch
.getSearcher().doc(scoreDoc
.doc
);
171 searchResult
.addDoc(document
);
173 if(cdmEntityId
== null){
174 cdmEntityId
= findId(idFields
, document
);
177 //TODO use findByUuid(List<UUID> uuids, List<Criterion> criteria, List<String> propertyPaths)
178 // instead or even better a similar findById(List<Integer> ids) however this is not yet implemented
179 if(cdmEntityId
!= null){
180 T entity
= dao
.load(Integer
.valueOf(cdmEntityId
), propertyPaths
);
181 searchResult
.setEntity(entity
);
183 searchResult
.setScore(scoreDoc
.score
);
184 searchResult
.setMaxScore(scoreDoc
.score
);
185 // add highlight fragments
186 if(highlighter
!= null){
187 Map
<String
, String
[]> fieldFragmentMap
= null;
188 for(Document doc
: searchResult
.getDocs()){
189 fieldFragmentMap
= merge(fieldFragmentMap
, highlighter
.getFragmentsWithHighlightedTerms(luceneSearch
.getAnalyzer(), query
, highlightFields
, doc
, fragmentNumber
, fragmentSize
));
191 searchResult
.setFieldHighlightMap(fieldFragmentMap
);
194 // finally add the final result to the list
195 searchResults
.add(searchResult
);
198 return searchResults
;
207 public List
<DocumentSearchResult
> createResultSet(TopDocs topDocs
, String
[] highlightFields
) throws CorruptIndexException
, IOException
{
209 List
<DocumentSearchResult
> searchResults
= new ArrayList
<DocumentSearchResult
>();
212 return searchResults
;
215 SearchResultHighligther highlighter
= null;
216 if(highlightFields
!= null && highlightFields
.length
> 0){
217 highlighter
= new SearchResultHighligther();
220 for (ScoreDoc scoreDoc
: topDocs
.scoreDocs
) {
222 String cdmEntityId
= null;
223 DocumentSearchResult searchResult
= new DocumentSearchResult();
225 Document document
= luceneSearch
.getSearcher().doc(scoreDoc
.doc
);
226 searchResult
.addDoc(document
);
228 searchResult
.setScore(scoreDoc
.score
);
229 searchResult
.setMaxScore(scoreDoc
.score
);
230 // add highlight fragments
231 if(highlighter
!= null){
232 Map
<String
, String
[]> fieldFragmentMap
= null;
233 for(Document doc
: searchResult
.getDocs()){
234 fieldFragmentMap
= merge(fieldFragmentMap
, highlighter
.getFragmentsWithHighlightedTerms(luceneSearch
.getAnalyzer(), query
, highlightFields
, doc
, fragmentNumber
, fragmentSize
));
236 searchResult
.setFieldHighlightMap(fieldFragmentMap
);
239 // finally add the final result to the list
240 searchResults
.add(searchResult
);
243 return searchResults
;
250 private Map
<String
, String
[]> merge(Map
<String
, String
[]> base
, Map
<String
, String
[]> add
) {
254 for(String key
: add
.keySet()) {
255 if (base
.containsKey(key
)){
256 base
.put(key
, (String
[]) ArrayUtils
.addAll(base
.get(key
), add
.get(key
)));
258 base
.put(key
, add
.get(key
));
272 private String
findId(Map
<CdmBaseType
,String
> idFieldMap
, Document doc
) {
274 String docClassName
= doc
.getValues(ProjectionConstants
.OBJECT_CLASS
)[0];
277 for(CdmBaseType baseType
: idFieldMap
.keySet()){
278 if(baseType
.getSubClassNames().contains(docClassName
)){
279 String
[] idStrings
= doc
.getValues(idFieldMap
.get(baseType
));
280 if(idStrings
.length
> 0 && StringUtils
.isNotBlank(idStrings
[0])){
287 throw new RuntimeException("No id field name given for " + docClassName
);
296 private boolean isNumber(Float number
) {
297 return !Double
.isNaN(number
) && !Double
.isInfinite(number
);