3 * Copyright (C) 2012 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.api
.service
.search
;
12 import java
.io
.IOException
;
13 import java
.util
.HashMap
;
14 import java
.util
.HashSet
;
15 import java
.util
.List
;
18 import java
.util
.UUID
;
20 import org
.apache
.log4j
.Logger
;
21 import org
.apache
.lucene
.index
.IndexReader
;
22 import org
.apache
.lucene
.index
.Term
;
23 import org
.apache
.lucene
.queryparser
.classic
.ParseException
;
24 import org
.apache
.lucene
.search
.BooleanClause
;
25 import org
.apache
.lucene
.search
.BooleanClause
.Occur
;
26 import org
.apache
.lucene
.search
.BooleanQuery
;
27 import org
.apache
.lucene
.search
.BooleanQuery
.Builder
;
28 import org
.apache
.lucene
.search
.Filter
;
29 import org
.apache
.lucene
.search
.FilteredQuery
;
30 import org
.apache
.lucene
.search
.IndexSearcher
;
31 import org
.apache
.lucene
.search
.MatchAllDocsQuery
;
32 import org
.apache
.lucene
.search
.NumericRangeQuery
;
33 import org
.apache
.lucene
.search
.Query
;
34 import org
.apache
.lucene
.search
.QueryWrapperFilter
;
35 import org
.apache
.lucene
.search
.TermQuery
;
36 import org
.apache
.lucene
.search
.WildcardQuery
;
37 import org
.apache
.lucene
.search
.join
.JoinUtil
;
38 import org
.apache
.lucene
.search
.join
.ScoreMode
;
39 import org
.hibernate
.search
.engine
.ProjectionConstants
;
40 import org
.hibernate
.search
.spatial
.impl
.Point
;
41 import org
.hibernate
.search
.spatial
.impl
.Rectangle
;
43 import eu
.etaxonomy
.cdm
.hibernate
.search
.DefinedTermBaseClassBridge
;
44 import eu
.etaxonomy
.cdm
.hibernate
.search
.MultilanguageTextFieldBridge
;
45 import eu
.etaxonomy
.cdm
.hibernate
.search
.NotNullAwareIdBridge
;
46 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
47 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableEntity
;
48 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
51 * QueryFactory creates queries for a specific lucene index that means queries
52 * specific to the various CDM base types. Therefore the QueryFactory hold a
53 * reference to a {@link LuceneSearch} instance which has been created for a
55 * The field names used in queries created on free text fields are remembered
56 * and can be accessed by {@link #getTextFieldNames()} or {@link #getTextFieldNamesAsArray()}.
57 * This is useful for highlighting the matches with {@link LuceneSearch#setHighlightFields(String[])}
59 * The index specific methods from {@link LuceneSearch} which are
60 * used by QueryFactory directly or indirectly are:
62 * <li>{@link LuceneSearch#getAnalyzer()}</li>
66 * @author a.kohlbecker
70 public class QueryFactory
{
72 public static final Logger logger
= Logger
.getLogger(QueryFactory
.class);
74 protected ILuceneIndexToolProvider toolProvider
;
76 Set
<String
> textFieldNames
= new HashSet
<String
>();
78 Map
<Class
<?
extends CdmBase
>, IndexSearcher
> indexSearcherMap
= new HashMap
<Class
<?
extends CdmBase
>, IndexSearcher
>();
80 private final Class
<?
extends CdmBase
> cdmBaseType
;
82 public Set
<String
> getTextFieldNames() {
83 return textFieldNames
;
86 public String
[] getTextFieldNamesAsArray() {
87 return textFieldNames
.toArray(new String
[textFieldNames
.size()]);
90 public QueryFactory(ILuceneIndexToolProvider toolProvider
, Class
<?
extends CdmBase
> cdmBaseType
){
91 this.cdmBaseType
= cdmBaseType
;
92 this.toolProvider
= toolProvider
;
96 * Creates a new Term query. Depending on whether <code>isTextField</code> is set true or not the
97 * supplied <code>queryString</code> will be parsed by using the according analyzer or not.
98 * Setting <code>isTextField</code> to <code>false</code> is useful for searching for uuids etc.
102 * @param isTextField whether this field is a field containing free text in contrast to e.g. ID fields.
103 * If <code>isTextField</code> is set <code>true</code> the <code>queryString</code> will be parsed by
104 * using the according analyzer.
105 * @return the resulting <code>TermQuery</code> or <code>null</code> in case of an <code>ParseException</code>
107 * TODO consider throwing the ParseException !!!!
109 public Query
newTermQuery(String fieldName
, String queryString
, boolean isTextField
) {
111 String luceneQueryString
= fieldName
+ ":(" + queryString
+ ")";
113 textFieldNames
.add(fieldName
);
114 // in order to support the full query syntax we must use the parser
117 return toolProvider
.getQueryParserFor(cdmBaseType
).parse(luceneQueryString
);
118 } catch (ParseException e
) {
123 return new TermQuery(new Term(fieldName
, queryString
));
128 * only to be used for text fields, see {@link #newTermQuery(String, String, boolean)}
131 * @return a {@link TermQuery} or a {@link WildcardQuery}
133 public Query
newTermQuery(String fieldName
, String queryString
){
134 return newTermQuery(fieldName
, queryString
, true);
138 * DefinedTerms are stored in the Lucene index by the
139 * {@link DefinedTermBaseClassBridge} in a consistent way. One field per
140 * language and also in one additional field for all languages. This method
141 * is a convenient means to retrieve a Lucene query string for such the
145 * name of the term field as in the Lucene index. The field must
146 * have been written to Lucene document by
147 * {@link DefinedTermBaseClassBridge}
150 * the languages to search for exclusively. Can be
151 * <code>null</code> to search in all languages
154 public Query
newDefinedTermQuery(String name
, String queryString
, List
<Language
> languages
) {
156 Builder localizedTermQueryBuilder
= new Builder();
157 localizedTermQueryBuilder
.add(newTermQuery(name
+ ".label", queryString
), Occur
.SHOULD
);
158 if(languages
== null || languages
.size() == 0){
159 localizedTermQueryBuilder
.add(newTermQuery(name
+ ".representation.text.ALL", queryString
), Occur
.SHOULD
);
160 localizedTermQueryBuilder
.add(newTermQuery(name
+ ".representation.label.ALL", queryString
), Occur
.SHOULD
);
161 localizedTermQueryBuilder
.add(newTermQuery(name
+ ".representation.abbreviatedLabel.ALL", queryString
), Occur
.SHOULD
);
164 for(Language lang
: languages
){
165 localizedTermQueryBuilder
.add(newTermQuery(name
+ ".representation.text." + lang
.getUuid().toString(), queryString
), Occur
.SHOULD
);
166 localizedTermQueryBuilder
.add(newTermQuery(name
+ ".representation.label." + lang
.getUuid().toString(), queryString
), Occur
.SHOULD
);
167 localizedTermQueryBuilder
.add(newTermQuery(name
+ ".representation.abbreviatedLabel." + lang
.getUuid().toString(), queryString
), Occur
.SHOULD
);
170 return localizedTermQueryBuilder
.build();
174 * MultilanguageString maps are stored in the Lucene index by the
175 * {@link MultilanguageTextFieldBridge } in a consistent way. One field per
176 * language and also in one additional field for all languages. This method
177 * is a convenient means to retrieve a Lucene query string for such the
181 * name of the term field as in the Lucene index. The field must
182 * have been written to Lucene document by
183 * {@link DefinedTermBaseClassBridge}
185 * the languages to search for exclusively. Can be
186 * <code>null</code> to search in all languages
189 public Query
newMultilanguageTextQuery(String name
, String queryString
, List
<Language
> languages
) {
191 Builder localizedTermQueryBuilder
= new Builder();
192 localizedTermQueryBuilder
.add(newTermQuery(name
+ ".label", queryString
), Occur
.SHOULD
);
193 if(languages
== null || languages
.size() == 0){
194 localizedTermQueryBuilder
.add(newTermQuery(name
+ ".ALL", queryString
), Occur
.SHOULD
);
196 for(Language lang
: languages
){
197 localizedTermQueryBuilder
.add(newTermQuery(name
+ "." + lang
.getUuid().toString(), queryString
), Occur
.SHOULD
);
200 return localizedTermQueryBuilder
.build();
208 public Query
newEntityIdQuery(String idFieldName
, CdmBase entitiy
){
209 return newTermQuery(idFieldName
, String
.valueOf(entitiy
.getId()), false);
217 public Query
newEntityIdsQuery(String idFieldName
, List
<?
extends CdmBase
> entities
){
218 Builder idInQueryBuilder
= new Builder();
219 if(entities
!= null && entities
.size() > 0 ){
220 for(CdmBase entity
: entities
){
221 idInQueryBuilder
.add(newEntityIdQuery(idFieldName
, entity
), Occur
.SHOULD
);
224 return idInQueryBuilder
.build();
231 public Query
newIsNotNullQuery(String idFieldName
){
232 return new TermQuery(new Term(NotNullAwareIdBridge
.notNullField(idFieldName
), NotNullAwareIdBridge
.NOT_NULL_VALUE
));
236 * @param uuidFieldName
240 public Query
newEntityUuidQuery(String uuidFieldName
, IdentifiableEntity entity
) {
241 return newTermQuery(uuidFieldName
, entity
.getUuid().toString(), false);
245 * creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the uuid
246 * of the <code>entities</code>, the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
247 * @param uuidFieldName
251 public Query
newEntityUuidsQuery(String uuidFieldName
, List
<?
extends IdentifiableEntity
> entities
){
253 Builder uuidInQueryBuilder
= new Builder();
254 if(entities
!= null && entities
.size() > 0 ){
255 for(IdentifiableEntity entity
: entities
){
256 uuidInQueryBuilder
.add(newEntityUuidQuery(uuidFieldName
, entity
), Occur
.SHOULD
);
259 return uuidInQueryBuilder
.build();
264 * creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the
265 * supplied <code>uuids</code>
266 * the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
267 * @param uuidFieldName
271 public Query
newUuidQuery(String uuidFieldName
, List
<UUID
> uuids
){
273 Builder uuidInQueryBuilder
= new Builder();
274 if(uuids
!= null && uuids
.size() > 0 ){
275 for(UUID uuid
: uuids
){
276 uuidInQueryBuilder
.add(newTermQuery(uuidFieldName
, uuids
.toString(), false), Occur
.SHOULD
);
279 return uuidInQueryBuilder
.build();
284 * Returns a Lucene Query which rely on double numeric range query
285 * on Latitude / Longitude
287 *(+/- copied from {@link SpatialQueryBuilderFromPoint#buildSpatialQueryByRange(Point, double, String)})
289 * @param center center of the search discus
290 * @param radius distance max to center in km
291 * @param fieldName name of the Lucene Field implementing Coordinates
292 * @return Lucene Query to be used in a search
294 * @see org.hibernate.search.spatial.Coordinates
296 public static Query
buildSpatialQueryByRange(Rectangle boundingBox
, String fieldName
) {
298 String latitudeFieldName
= fieldName
+ "_HSSI_Latitude";
299 String longitudeFieldName
= fieldName
+ "_HSSI_Longitude";
301 Query latQuery
= NumericRangeQuery
.newDoubleRange(
302 latitudeFieldName
, boundingBox
.getLowerLeft().getLatitude(),
303 boundingBox
.getUpperRight().getLatitude(), true, true
306 Builder longQueryBuilder
= new Builder();
307 if ( boundingBox
.getLowerLeft().getLongitude() <= boundingBox
.getUpperRight().getLongitude() ) {
308 longQueryBuilder
.add(NumericRangeQuery
.newDoubleRange( longitudeFieldName
, boundingBox
.getLowerLeft().getLongitude(),
309 boundingBox
.getUpperRight().getLongitude(), true, true ), Occur
.MUST
);
312 longQueryBuilder
.add( NumericRangeQuery
.newDoubleRange( longitudeFieldName
, boundingBox
.getLowerLeft().getLongitude(),
313 180.0, true, true ), BooleanClause
.Occur
.SHOULD
);
314 longQueryBuilder
.add( NumericRangeQuery
.newDoubleRange( longitudeFieldName
, -180.0,
315 boundingBox
.getUpperRight().getLongitude(), true, true ), BooleanClause
.Occur
.SHOULD
);
318 Builder boxQueryBuilder
= new Builder();
319 boxQueryBuilder
.add( latQuery
, BooleanClause
.Occur
.MUST
);
320 boxQueryBuilder
.add( longQueryBuilder
.build(), BooleanClause
.Occur
.MUST
);
322 return new FilteredQuery(
323 new MatchAllDocsQuery(),
324 new QueryWrapperFilter( boxQueryBuilder
.build() )
332 * @param joinFromQuery
335 * @throws IOException
337 public Query
newJoinQuery(String fromField
, String toField
, Query joinFromQuery
,
338 Class
<?
extends CdmBase
> fromType
) throws IOException
{
339 boolean multipleValuesPerDocument
= true;
340 ScoreMode scoreMode
= ScoreMode
.Max
;
341 return JoinUtil
.createJoinQuery(
342 // need to use the sort field of the id field since
343 // ScoreMode.Max forces the fromField to be a docValue
344 // field of type [SORTED, SORTED_SET]
345 fromField
+ "__sort",
346 multipleValuesPerDocument
, toField
,
347 joinFromQuery
, indexSearcherFor(fromType
), scoreMode
);
351 * Creates a class restriction query and wraps the class restriction
352 * query and the given <code>query</code> into a BooleanQuery where both must match.
354 * TODO instead of using a BooleanQuery for the class restriction it would be much more
355 * performant to use a {@link Filter} instead.
357 * @param cdmTypeRestriction
361 public static BooleanQuery
addTypeRestriction(Query query
, Class
<?
extends CdmBase
> cdmTypeRestriction
) {
363 BooleanQuery fullQuery
;
364 Builder filteredQueryBuilder
= new Builder();
365 Builder classFilterBuilder
= new Builder();
367 Term t
= new Term(ProjectionConstants
.OBJECT_CLASS
, cdmTypeRestriction
.getName());
368 TermQuery termQuery
= new TermQuery(t
);
370 classFilterBuilder
.add(termQuery
, BooleanClause
.Occur
.SHOULD
);
371 BooleanQuery classFilter
= classFilterBuilder
.build();
372 classFilter
.setBoost(0);
374 filteredQueryBuilder
.add(query
, BooleanClause
.Occur
.MUST
);
375 filteredQueryBuilder
.add(classFilter
, BooleanClause
.Occur
.MUST
);
377 fullQuery
= filteredQueryBuilder
.build();
385 private IndexSearcher
indexSearcherFor(Class
<?
extends CdmBase
> clazz
) {
386 if(indexSearcherMap
.get(clazz
) == null){
388 IndexReader indexReader
= toolProvider
.getIndexReaderFor(clazz
);
389 IndexSearcher searcher
= new IndexSearcher(indexReader
);
390 // searcher.setDefaultFieldSortScoring(true, true);
391 indexSearcherMap
.put(clazz
, searcher
);
393 IndexSearcher indexSearcher
= indexSearcherMap
.get(clazz
);
394 return indexSearcher
;