3 * Copyright (C) 2011 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.api
.service
.search
;
12 import java
.io
.IOException
;
14 import org
.apache
.log4j
.Logger
;
15 import org
.apache
.lucene
.analysis
.Analyzer
;
16 import org
.apache
.lucene
.index
.IndexReader
;
17 import org
.apache
.lucene
.index
.Term
;
18 import org
.apache
.lucene
.queryParser
.ParseException
;
19 import org
.apache
.lucene
.queryParser
.QueryParser
;
20 import org
.apache
.lucene
.search
.BooleanClause
;
21 import org
.apache
.lucene
.search
.BooleanQuery
;
22 import org
.apache
.lucene
.search
.Hits
;
23 import org
.apache
.lucene
.search
.IndexSearcher
;
24 import org
.apache
.lucene
.search
.Query
;
25 import org
.apache
.lucene
.search
.ScoreDoc
;
26 import org
.apache
.lucene
.search
.Searcher
;
27 import org
.apache
.lucene
.search
.Sort
;
28 import org
.apache
.lucene
.search
.SortField
;
29 import org
.apache
.lucene
.search
.TermQuery
;
30 import org
.apache
.lucene
.search
.TopDocs
;
31 import org
.hibernate
.Session
;
32 import org
.hibernate
.search
.Search
;
33 import org
.hibernate
.search
.SearchFactory
;
34 import org
.hibernate
.search
.engine
.DocumentBuilder
;
35 import org
.hibernate
.search
.reader
.ReaderProvider
;
36 import org
.hibernate
.search
.store
.DirectoryProvider
;
38 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
39 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
40 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
41 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
42 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
46 * @author Andreas Kohlbecker
50 public class LuceneSearch
{
52 public static final Logger logger
= Logger
.getLogger(LuceneSearch
.class);
54 protected Session session
;
56 protected Searcher searcher
;
58 private SortField
[] sortFields
;
60 private Class
<?
extends CdmBase
> directorySelectClass
;
62 protected Class
<?
extends CdmBase
> getDirectorySelectClass() {
63 return pushAbstractBaseTypeDown(directorySelectClass
);
69 private Class
<?
extends CdmBase
> clazz
;
72 public Class
<?
extends CdmBase
> getClazz() {
77 * Sets the Class to use as filter criterion, in case the supplied Class equals the
78 * <code>directorySelectClass</code> the Class is set to <code>null</code>
81 public void setClazz(Class
<?
extends CdmBase
> clazz
) {
85 * we must not use the getter of directorySelectClass
86 * since we need the abstract base classes here!!!!
88 if(clazz
!= null && clazz
.equals(directorySelectClass
)){
95 * The MAX_HITS_ALLOWED value must be one less than Integer.MAX_VALUE
96 * otherwise PriorityQueue will produce an exception since it
97 * will always add 1 to the maxhits so Integer.MAX_VALUE
98 * would become Integer.MIN_VALUE
100 public final int MAX_HITS_ALLOWED
= 10000;
102 protected Query query
;
104 protected String
[] highlightFields
= new String
[0];
110 public LuceneSearch(Session session
, Class
<?
extends CdmBase
> directorySelectClass
) {
111 this.session
= session
;
112 this.directorySelectClass
= directorySelectClass
;
116 * TODO the abstract base class DescriptionElementBase can not be used, so
117 * we are using an arbitraty subclass to find the DirectoryProvider, future
118 * versions of hibernate search my allow using abstract base classes see
120 * ://stackoverflow.com/questions/492184/how-do-you-find-all-subclasses-of
121 * -a-given-class-in-java
123 * @param type must not be null
126 protected Class
<?
extends CdmBase
> pushAbstractBaseTypeDown(Class
<?
extends CdmBase
> type
) {
127 if (type
.equals(DescriptionElementBase
.class)) {
128 type
= TextData
.class;
130 if (type
.equals(TaxonBase
.class)) {
136 protected LuceneSearch() {
143 public Searcher
getSearcher() {
144 if(searcher
== null){
145 searcher
= new IndexSearcher(getIndexReader());
153 public IndexReader
getIndexReader() {
154 SearchFactory searchFactory
= Search
.getFullTextSession(session
).getSearchFactory();
156 DirectoryProvider
[] directoryProviders
= searchFactory
.getDirectoryProviders(getDirectorySelectClass());
157 logger
.info(directoryProviders
[0].getDirectory().toString());
159 ReaderProvider readerProvider
= searchFactory
.getReaderProvider();
160 IndexReader reader
= readerProvider
.openReader(directoryProviders
[0]);
167 public QueryParser
getQueryParser() {
168 Analyzer analyzer
= getAnalyzer();
169 QueryParser parser
= new QueryParser("titleCache", analyzer
);
176 public Analyzer
getAnalyzer() {
177 SearchFactory searchFactory
= Search
.getFullTextSession(session
).getSearchFactory();
178 Analyzer analyzer
= searchFactory
.getAnalyzer(getDirectorySelectClass());
183 * @param luceneQueryString
184 * @param clazz the type as additional filter criterion
185 * @param pageSize if the page size is null or in an invalid range it will be set to MAX_HITS_ALLOWED
186 * @param pageNumber a 0-based index of the page to return, will default to 0 if null or negative.
188 * @throws ParseException
189 * @throws IOException
191 public TopDocs
executeSearch(String luceneQueryString
, Integer pageSize
, Integer pageNumber
) throws ParseException
, IOException
{
193 Query luceneQuery
= parse(luceneQueryString
);
194 this.query
= luceneQuery
;
196 return executeSearch(pageSize
, pageNumber
);
200 * @param luceneQueryString
202 * @throws ParseException
204 public Query
parse(String luceneQueryString
) throws ParseException
{
205 logger
.debug("luceneQueryString to be parsed: " + luceneQueryString
);
206 Query luceneQuery
= getQueryParser().parse(luceneQueryString
);
212 * @param clazz the type as additional filter criterion
213 * @param pageSize if the page size is null or in an invalid range it will be set to MAX_HITS_ALLOWED
214 * @param pageNumber a 0-based index of the page to return, will default to 0 if null or negative.
216 * @throws ParseException
217 * @throws IOException
219 public TopDocs
executeSearch(Integer pageSize
, Integer pageNumber
) throws ParseException
, IOException
{
222 if(pageNumber
== null || pageNumber
< 0){
225 if(pageSize
== null || pageSize
<= 0 || pageSize
> MAX_HITS_ALLOWED
){
226 pageSize
= MAX_HITS_ALLOWED
;
227 logger
.info("limiting pageSize to MAX_HITS_ALLOWED = " + MAX_HITS_ALLOWED
+ " items");
230 Query fullQuery
= expandQuery();
232 logger
.info("final query: " + fullQuery
.toString());
234 int start
= pageNumber
* pageSize
;
235 int limit
= (pageNumber
+ 1) * pageSize
- 1 ;
237 logger
.debug("start: " + start
+ "; limit:" + limit
);
240 if(sortFields
!= null && sortFields
.length
> 0){
241 Sort sort
= new Sort(sortFields
);
242 topDocs
= getSearcher().search(fullQuery
, null, limit
, sort
);
244 topDocs
= getSearcher().search(fullQuery
, null, limit
);
248 //TODO when switched to Lucene 3.x which is included in hibernate 4.x
249 // use TopDocCollector.topDocs(int start, int howMany);
250 // since this method might be more memory save than our own implementation
252 // ALSO READ http://dev.e-taxonomy.eu/trac/ticket/3118 !!!
254 // TopDocs topDocs = hitCollector.topDocs();
255 ScoreDoc
[] scoreDocs
= topDocs
.scoreDocs
;
257 int docsAvailableInPage
= Math
.min(scoreDocs
.length
- start
, pageSize
);
258 logger
.debug("docsAvailableInPage:" + docsAvailableInPage
);
260 ScoreDoc
[] pagedDocs
= new ScoreDoc
[docsAvailableInPage
];
261 for(int i
= 0; i
< docsAvailableInPage
; i
++){
262 pagedDocs
[i
] = scoreDocs
[start
+ i
];
264 TopDocs pagedTopDocs
= new TopDocs(topDocs
.totalHits
, pagedDocs
, topDocs
.getMaxScore());
266 /////////////////////////////////////////////
274 protected Query
expandQuery() {
277 BooleanQuery filteredQuery
= new BooleanQuery();
278 BooleanQuery classFilter
= new BooleanQuery();
280 Term t
= new Term(DocumentBuilder
.CLASS_FIELDNAME
, clazz
.getName());
281 TermQuery termQuery
= new TermQuery(t
);
283 classFilter
.setBoost(0);
284 classFilter
.add(termQuery
, BooleanClause
.Occur
.SHOULD
);
286 filteredQuery
.add(this.query
, BooleanClause
.Occur
.MUST
);
287 filteredQuery
.add(classFilter
, BooleanClause
.Occur
.MUST
);
289 fullQuery
= filteredQuery
;
291 fullQuery
= this.query
;
296 public void setQuery(Query query
) {
300 public Query
getQuery() {
304 public Query
getExpandedQuery() {
309 public SortField
[] getSortFields() {
313 public void setSortFields(SortField
[] sortFields
) {
314 this.sortFields
= sortFields
;
317 public void setHighlightFields(String
[] textFieldNamesAsArray
) {
318 this.highlightFields
= textFieldNamesAsArray
;
322 public String
[] getHighlightFields() {
323 return this.highlightFields
;