3 * Copyright (C) 2011 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.api
.service
.search
;
12 import java
.io
.IOException
;
13 import java
.util
.Collection
;
15 import org
.apache
.log4j
.Logger
;
16 import org
.apache
.lucene
.analysis
.Analyzer
;
17 import org
.apache
.lucene
.index
.IndexReader
;
18 import org
.apache
.lucene
.index
.Term
;
19 import org
.apache
.lucene
.queryParser
.ParseException
;
20 import org
.apache
.lucene
.queryParser
.QueryParser
;
21 import org
.apache
.lucene
.search
.BooleanClause
;
22 import org
.apache
.lucene
.search
.BooleanQuery
;
23 import org
.apache
.lucene
.search
.Hits
;
24 import org
.apache
.lucene
.search
.IndexSearcher
;
25 import org
.apache
.lucene
.search
.MultiCollector
;
26 import org
.apache
.lucene
.search
.Query
;
27 import org
.apache
.lucene
.search
.ScoreDoc
;
28 import org
.apache
.lucene
.search
.Searcher
;
29 import org
.apache
.lucene
.search
.Sort
;
30 import org
.apache
.lucene
.search
.SortField
;
31 import org
.apache
.lucene
.search
.TermQuery
;
32 import org
.apache
.lucene
.search
.TopDocs
;
33 import org
.apache
.lucene
.search
.grouping
.AllGroupsCollector
;
34 import org
.apache
.lucene
.search
.grouping
.FirstPassGroupingCollector
;
35 import org
.apache
.lucene
.search
.grouping
.SearchGroup
;
36 import org
.apache
.lucene
.search
.grouping
.SecondPassGroupingCollector
;
37 import org
.apache
.lucene
.search
.grouping
.TopGroups
;
38 import org
.hibernate
.Session
;
39 import org
.hibernate
.search
.Search
;
40 import org
.hibernate
.search
.SearchFactory
;
41 import org
.hibernate
.search
.engine
.DocumentBuilder
;
42 import org
.hibernate
.search
.reader
.ReaderProvider
;
43 import org
.hibernate
.search
.store
.DirectoryProvider
;
45 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
46 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
47 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
48 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
49 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
53 * @author Andreas Kohlbecker
57 public class LuceneSearch
{
59 private static final String GROUP_BY_FIELD
= "id";
61 public static final Logger logger
= Logger
.getLogger(LuceneSearch
.class);
63 protected Session session
;
65 protected IndexSearcher searcher
;
67 private SortField
[] sortFields
;
69 private Class
<?
extends CdmBase
> directorySelectClass
;
71 protected Class
<?
extends CdmBase
> getDirectorySelectClass() {
72 return pushAbstractBaseTypeDown(directorySelectClass
);
78 private Class
<?
extends CdmBase
> clazz
;
81 public Class
<?
extends CdmBase
> getClazz() {
86 * Sets the Class to use as filter criterion, in case the supplied Class equals the
87 * <code>directorySelectClass</code> the Class is set to <code>null</code>
90 public void setClazz(Class
<?
extends CdmBase
> clazz
) {
94 * we must not use the getter of directorySelectClass
95 * since we need the abstract base classes here!!!!
97 if(clazz
!= null && clazz
.equals(directorySelectClass
)){
104 * The MAX_HITS_ALLOWED value must be one less than Integer.MAX_VALUE
105 * otherwise PriorityQueue will produce an exception since it
106 * will always add 1 to the maxhits so Integer.MAX_VALUE
107 * would become Integer.MIN_VALUE
109 public final int MAX_HITS_ALLOWED
= 10000;
111 protected Query query
;
113 protected String
[] highlightFields
= new String
[0];
119 public LuceneSearch(Session session
, Class
<?
extends CdmBase
> directorySelectClass
) {
120 this.session
= session
;
121 this.directorySelectClass
= directorySelectClass
;
125 * TODO the abstract base class DescriptionElementBase can not be used, so
126 * we are using an arbitraty subclass to find the DirectoryProvider, future
127 * versions of hibernate search my allow using abstract base classes see
129 * ://stackoverflow.com/questions/492184/how-do-you-find-all-subclasses-of
130 * -a-given-class-in-java
132 * @param type must not be null
135 protected Class
<?
extends CdmBase
> pushAbstractBaseTypeDown(Class
<?
extends CdmBase
> type
) {
136 if (type
.equals(DescriptionElementBase
.class)) {
137 type
= TextData
.class;
139 if (type
.equals(TaxonBase
.class)) {
145 protected LuceneSearch() {
152 public Searcher
getSearcher() {
153 if(searcher
== null){
154 searcher
= new IndexSearcher(getIndexReader());
155 searcher
.setDefaultFieldSortScoring(true, true);
163 public IndexReader
getIndexReader() {
164 SearchFactory searchFactory
= Search
.getFullTextSession(session
).getSearchFactory();
166 DirectoryProvider
[] directoryProviders
= searchFactory
.getDirectoryProviders(getDirectorySelectClass());
167 logger
.info(directoryProviders
[0].getDirectory().toString());
169 ReaderProvider readerProvider
= searchFactory
.getReaderProvider();
170 IndexReader reader
= readerProvider
.openReader(directoryProviders
[0]);
177 public QueryParser
getQueryParser() {
178 Analyzer analyzer
= getAnalyzer();
179 QueryParser parser
= new QueryParser("titleCache", analyzer
);
186 public Analyzer
getAnalyzer() {
187 SearchFactory searchFactory
= Search
.getFullTextSession(session
).getSearchFactory();
188 Analyzer analyzer
= searchFactory
.getAnalyzer(getDirectorySelectClass());
193 * @param luceneQueryString
194 * @param clazz the type as additional filter criterion
195 * @param pageSize if the page size is null or in an invalid range it will be set to MAX_HITS_ALLOWED
196 * @param pageNumber a 0-based index of the page to return, will default to 0 if null or negative.
198 * @throws ParseException
199 * @throws IOException
201 public TopGroups
executeSearch(String luceneQueryString
, Integer pageSize
, Integer pageNumber
) throws ParseException
, IOException
{
203 Query luceneQuery
= parse(luceneQueryString
);
204 this.query
= luceneQuery
;
206 return executeSearch(pageSize
, pageNumber
);
210 * @param luceneQueryString
212 * @throws ParseException
214 public Query
parse(String luceneQueryString
) throws ParseException
{
215 logger
.debug("luceneQueryString to be parsed: " + luceneQueryString
);
216 Query luceneQuery
= getQueryParser().parse(luceneQueryString
);
222 * @param clazz the type as additional filter criterion
223 * @param pageSize if the page size is null or in an invalid range it will be set to MAX_HITS_ALLOWED
224 * @param pageNumber a 0-based index of the page to return, will default to 0 if null or negative.
226 * @throws ParseException
227 * @throws IOException
229 public TopGroups
executeSearch(Integer pageSize
, Integer pageNumber
) throws ParseException
, IOException
{
232 if(pageNumber
== null || pageNumber
< 0){
235 if(pageSize
== null || pageSize
<= 0 || pageSize
> MAX_HITS_ALLOWED
){
236 pageSize
= MAX_HITS_ALLOWED
;
237 logger
.info("limiting pageSize to MAX_HITS_ALLOWED = " + MAX_HITS_ALLOWED
+ " items");
240 Query fullQuery
= expandQuery();
242 logger
.info("final query: " + fullQuery
.toString());
244 int offset
= pageNumber
* pageSize
;
245 int limit
= (pageNumber
+ 1) * pageSize
- 1 ;
247 logger
.debug("start: " + offset
+ "; limit:" + limit
);
249 // TopDocs topDocs = null;
251 // sort must be non null default: Sort.RELEVANCE
252 Sort groupSort
= null;
253 Sort withinGroupSort
= Sort
.RELEVANCE
;
254 if(sortFields
!= null && sortFields
.length
> 0){
255 Sort sort
= new Sort(sortFields
);
256 groupSort
= new Sort(sortFields
);
257 // topDocs = getSearcher().search(fullQuery, null, limit, sort);
259 groupSort
= Sort
.RELEVANCE
; // == SortField.FIELD_SCORE !!
260 // topDocs = getSearcher().search(fullQuery, null, limit);
262 FirstPassGroupingCollector groupingCollector_1
= new FirstPassGroupingCollector(GROUP_BY_FIELD
, withinGroupSort
, limit
);
263 getSearcher().search(fullQuery
, groupingCollector_1
);
265 Collection
<SearchGroup
> topGroups
= groupingCollector_1
.getTopGroups(offset
, true);
267 if (topGroups
== null) {
271 boolean getScores
= true;
272 boolean getMaxScores
= true;
273 boolean fillFields
= true;
274 AllGroupsCollector c3
= new AllGroupsCollector(GROUP_BY_FIELD
);
275 SecondPassGroupingCollector c2
= new SecondPassGroupingCollector(GROUP_BY_FIELD
, topGroups
, groupSort
, withinGroupSort
, limit
, getScores
, getMaxScores
, fillFields
);
276 getSearcher().search(fullQuery
, MultiCollector
.wrap(c2
, c3
));
278 TopGroups groupsResult
= c2
.getTopGroups(offset
);
279 groupsResult
= new TopGroups(groupsResult
, c3
.getGroupCount());
284 //TODO when switched to Lucene 3.x which is included in hibernate 4.x
285 // use TopDocCollector.topDocs(int start, int howMany);
286 // since this method might be more memory save than our own implementation
288 // ALSO READ http://dev.e-taxonomy.eu/trac/ticket/3118 !!!
290 // TopDocs topDocs = hitCollector.topDocs();
291 // ScoreDoc[] scoreDocs = topDocs.scoreDocs;
293 // int docsAvailableInPage = Math.min(scoreDocs.length - offset, pageSize);
294 // logger.debug("docsAvailableInPage:" + docsAvailableInPage);
296 // ScoreDoc[] pagedDocs = new ScoreDoc[docsAvailableInPage];
297 // for(int i = 0; i < docsAvailableInPage; i++){
298 // pagedDocs[i] = scoreDocs[offset + i];
300 // TopDocs pagedTopDocs = new TopDocs(topDocs.totalHits, pagedDocs, topDocs.getMaxScore());
302 /////////////////////////////////////////////
304 // return pagedTopDocs;
310 protected Query
expandQuery() {
313 BooleanQuery filteredQuery
= new BooleanQuery();
314 BooleanQuery classFilter
= new BooleanQuery();
316 Term t
= new Term(DocumentBuilder
.CLASS_FIELDNAME
, clazz
.getName());
317 TermQuery termQuery
= new TermQuery(t
);
319 classFilter
.setBoost(0);
320 classFilter
.add(termQuery
, BooleanClause
.Occur
.SHOULD
);
322 filteredQuery
.add(this.query
, BooleanClause
.Occur
.MUST
);
323 filteredQuery
.add(classFilter
, BooleanClause
.Occur
.MUST
);
325 fullQuery
= filteredQuery
;
327 fullQuery
= this.query
;
332 public void setQuery(Query query
) {
336 public Query
getQuery() {
340 public Query
getExpandedQuery() {
345 public SortField
[] getSortFields() {
349 public void setSortFields(SortField
[] sortFields
) {
350 this.sortFields
= sortFields
;
353 public void setHighlightFields(String
[] textFieldNamesAsArray
) {
354 this.highlightFields
= textFieldNamesAsArray
;
358 public String
[] getHighlightFields() {
359 return this.highlightFields
;