3 * Copyright (C) 2012 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.api
.service
.search
;
12 import java
.io
.IOException
;
13 import java
.util
.ArrayList
;
14 import java
.util
.Arrays
;
15 import java
.util
.HashMap
;
16 import java
.util
.List
;
19 import org
.apache
.commons
.lang
.ArrayUtils
;
20 import org
.apache
.commons
.lang
.StringUtils
;
21 import org
.apache
.log4j
.Logger
;
22 import org
.apache
.lucene
.document
.Document
;
23 import org
.apache
.lucene
.index
.CorruptIndexException
;
24 import org
.apache
.lucene
.search
.MultiTermQuery
;
25 import org
.apache
.lucene
.search
.Query
;
26 import org
.apache
.lucene
.search
.ScoreDoc
;
27 import org
.apache
.lucene
.search
.TopDocs
;
28 import org
.apache
.lucene
.search
.WildcardQuery
;
29 import org
.apache
.lucene
.search
.grouping
.GroupDocs
;
30 import org
.apache
.lucene
.search
.grouping
.TopGroups
;
31 import org
.hibernate
.search
.engine
.DocumentBuilder
;
33 import eu
.etaxonomy
.cdm
.model
.CdmBaseType
;
34 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
35 import eu
.etaxonomy
.cdm
.persistence
.dao
.common
.ICdmEntityDao
;
38 * @author Andreas Kohlbecker
42 public class SearchResultBuilder
implements ISearchResultBuilder
{
44 public static final Logger logger
= Logger
.getLogger(SearchResultBuilder
.class);
47 * @see eu.etaxonomy.cdm.api.service.search.ISearchResultBuilder#createResultSetFromIds(eu.etaxonomy.cdm.search.LuceneSearch, org.apache.lucene.search.TopDocs, eu.etaxonomy.cdm.persistence.dao.common.ICdmEntityDao, java.lang.String)
51 * fragmentNumber - max number of sentence fragments to return
53 private int fragmentNumber
= 5;
55 * fragmentSize - the max number of characters for each fragment
57 private int fragmentSize
= 100;
58 private LuceneSearch luceneSearch
;
61 * Use this constructor if you do not wish to retrieve highlighted terms found in the best sections of a text.
64 public SearchResultBuilder(LuceneSearch luceneSearch
){
65 this.luceneSearch
= luceneSearch
;
70 * @param query the Query will be used to highlight matching fragments if the <code>highlightFields</code> property is supplied to
71 * {@link #createResultSet(TopDocs, String[], ICdmEntityDao, String, List)}
73 public SearchResultBuilder(LuceneSearch luceneSearch
, Query query
){
74 this.luceneSearch
= luceneSearch
;
81 * <h3>NOTE:</h3> All {@link MultiTermQuery} like {@link WildcardQuery} are
82 * constant score by default since Lucene 2.9, you can change that back to
83 * scoring mode: <code>WildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)</code>
84 * This slows down the query immense or throws TooManyClauses exceptions if
85 * too many terms match the wildcard.
87 public <T
extends CdmBase
> List
<SearchResult
<T
>> createResultSet(TopGroups topGroupsResultSet
,
88 String
[] highlightFields
, ICdmEntityDao
<T
> dao
, Map
<CdmBaseType
, String
> idFields
, List
<String
> propertyPaths
) throws CorruptIndexException
, IOException
{
90 List
<SearchResult
<T
>> searchResults
= new ArrayList
<SearchResult
<T
>>();
92 if(topGroupsResultSet
== null){
96 SearchResultHighligther highlighter
= null;
97 if(highlightFields
!= null && highlightFields
.length
> 0){
98 highlighter
= new SearchResultHighligther();
101 for (GroupDocs groupDoc
: topGroupsResultSet
.groups
) {
103 String cdmEntityId
= null;
104 SearchResult
<T
> searchResult
= new SearchResult
<T
>();
105 for(ScoreDoc scoreDoc
: groupDoc
.scoreDocs
) {
106 //FIXME should we group on taxon id ?????
107 Document document
= luceneSearch
.getSearcher().doc(scoreDoc
.doc
);
108 searchResult
.addDoc(document
);
110 if(cdmEntityId
== null){
111 // IMPORTANT: here we assume that all documents refer to the same cdm entity
112 cdmEntityId
= findId(idFields
, document
);
117 if(isNumber(groupDoc
.maxScore
)){
118 searchResult
.setScore(groupDoc
.maxScore
);
120 //FIXME get max score
121 // if(isNumber(topGroupsResultSet.getMaxScore())){
122 // searchResult.setMaxScore(topGroupsResultSet.getMaxScore());
125 //TODO use findByUuid(List<UUID> uuids, List<Criterion> criteria, List<String> propertyPaths)
126 // instead or even better a similar findById(List<Integer> ids) however this is not yet implemented
127 if(cdmEntityId
!= null){
128 T entity
= dao
.load(Integer
.valueOf(cdmEntityId
), propertyPaths
);
129 searchResult
.setEntity(entity
);
132 // add highlight fragments
133 if(highlighter
!= null){
134 Map
<String
, String
[]> fieldFragmentMap
= null;
135 for(Document doc
: searchResult
.getDocs()){
136 fieldFragmentMap
= merge(fieldFragmentMap
, highlighter
.getFragmentsWithHighlightedTerms(luceneSearch
.getAnalyzer(), query
, highlightFields
, doc
, fragmentNumber
, fragmentSize
));
138 searchResult
.setFieldHighlightMap(fieldFragmentMap
);
141 // finally add the final result to the list
142 searchResults
.add(searchResult
);
145 return searchResults
;
153 private Map
<String
, String
[]> merge(Map
<String
, String
[]> base
, Map
<String
, String
[]> add
) {
157 for(String key
: add
.keySet()) {
158 if (base
.containsKey(key
)){
159 base
.put(key
, (String
[]) ArrayUtils
.addAll(base
.get(key
), add
.get(key
)));
161 base
.put(key
, add
.get(key
));
175 private String
findId(Map
<CdmBaseType
,String
> idFieldMap
, Document doc
) {
177 String docClassName
= doc
.getValues(DocumentBuilder
.CLASS_FIELDNAME
)[0];
180 for(CdmBaseType baseType
: idFieldMap
.keySet()){
181 if(baseType
.getSubClassNames().contains(docClassName
)){
182 String
[] idStrings
= doc
.getValues(idFieldMap
.get(baseType
));
183 if(idStrings
.length
> 0 && StringUtils
.isNotBlank(idStrings
[0])){
190 throw new RuntimeException("No id field name given for " + docClassName
);
199 private boolean isNumber(Float number
) {
200 return !Double
.isNaN(number
) && !Double
.isInfinite(number
);