#4716 removing deprecated usage of new BooleanQuery, using BooleanQuery.Builder instead
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / search / QueryFactory.java
1 // $Id$
2 /**
3 * Copyright (C) 2012 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.api.service.search;
11
12 import java.io.IOException;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.UUID;
19
20 import org.apache.log4j.Logger;
21 import org.apache.lucene.index.IndexReader;
22 import org.apache.lucene.index.Term;
23 import org.apache.lucene.queryparser.classic.ParseException;
24 import org.apache.lucene.search.BooleanClause;
25 import org.apache.lucene.search.BooleanClause.Occur;
26 import org.apache.lucene.search.BooleanQuery;
27 import org.apache.lucene.search.BooleanQuery.Builder;
28 import org.apache.lucene.search.Filter;
29 import org.apache.lucene.search.FilteredQuery;
30 import org.apache.lucene.search.IndexSearcher;
31 import org.apache.lucene.search.MatchAllDocsQuery;
32 import org.apache.lucene.search.NumericRangeQuery;
33 import org.apache.lucene.search.Query;
34 import org.apache.lucene.search.QueryWrapperFilter;
35 import org.apache.lucene.search.TermQuery;
36 import org.apache.lucene.search.WildcardQuery;
37 import org.apache.lucene.search.join.JoinUtil;
38 import org.apache.lucene.search.join.ScoreMode;
39 import org.hibernate.search.engine.ProjectionConstants;
40 import org.hibernate.search.spatial.impl.Point;
41 import org.hibernate.search.spatial.impl.Rectangle;
42
43 import eu.etaxonomy.cdm.hibernate.search.DefinedTermBaseClassBridge;
44 import eu.etaxonomy.cdm.hibernate.search.MultilanguageTextFieldBridge;
45 import eu.etaxonomy.cdm.hibernate.search.NotNullAwareIdBridge;
46 import eu.etaxonomy.cdm.model.common.CdmBase;
47 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
48 import eu.etaxonomy.cdm.model.common.Language;
49
50 /**
51 * QueryFactory creates queries for a specific lucene index that means queries
52 * specific to the various CDM base types. Therefore the QueryFactory hold a
53 * reference to a {@link LuceneSearch} instance which has been created for a
54 * CDM base type.<br>
55 * The field names used in queries created on free text fields are remembered
56 * and can be accessed by {@link #getTextFieldNames()} or {@link #getTextFieldNamesAsArray()}.
57 * This is useful for highlighting the matches with {@link LuceneSearch#setHighlightFields(String[])}
58 * <p>
59 * The index specific methods from {@link LuceneSearch} which are
60 * used by QueryFactory directly or indirectly are:
61 * <ul>
62 * <li>{@link LuceneSearch#getAnalyzer()}</li>
63 * </ul>
64 *
65 *
66 * @author a.kohlbecker
67 * @date Sep 14, 2012
68 *
69 */
70 public class QueryFactory {
71
72 public static final Logger logger = Logger.getLogger(QueryFactory.class);
73
74 protected ILuceneIndexToolProvider toolProvider;
75
76 Set<String> textFieldNames = new HashSet<String>();
77
78 Map<Class<? extends CdmBase>, IndexSearcher> indexSearcherMap = new HashMap<Class<? extends CdmBase>, IndexSearcher>();
79
80 private final Class<? extends CdmBase> cdmBaseType;
81
82 public Set<String> getTextFieldNames() {
83 return textFieldNames;
84 }
85
86 public String[] getTextFieldNamesAsArray() {
87 return textFieldNames.toArray(new String[textFieldNames.size()]);
88 }
89
90 public QueryFactory(ILuceneIndexToolProvider toolProvider, Class<? extends CdmBase> cdmBaseType){
91 this.cdmBaseType = cdmBaseType;
92 this.toolProvider = toolProvider;
93 }
94
95 /**
96 * Creates a new Term query. Depending on whether <code>isTextField</code> is set true or not the
97 * supplied <code>queryString</code> will be parsed by using the according analyzer or not.
98 * Setting <code>isTextField</code> to <code>false</code> is useful for searching for uuids etc.
99 *
100 * @param fieldName
101 * @param queryString
102 * @param isTextField whether this field is a field containing free text in contrast to e.g. ID fields.
103 * If <code>isTextField</code> is set <code>true</code> the <code>queryString</code> will be parsed by
104 * using the according analyzer.
105 * @return the resulting <code>TermQuery</code> or <code>null</code> in case of an <code>ParseException</code>
106 *
107 * TODO consider throwing the ParseException !!!!
108 */
109 public Query newTermQuery(String fieldName, String queryString, boolean isTextField) {
110
111 String luceneQueryString = fieldName + ":(" + queryString + ")";
112 if (isTextField) {
113 textFieldNames.add(fieldName);
114 // in order to support the full query syntax we must use the parser
115 // here
116 try {
117 return toolProvider.getQueryParserFor(cdmBaseType).parse(luceneQueryString);
118 } catch (ParseException e) {
119 logger.error(e);
120 }
121 return null;
122 } else {
123 return new TermQuery(new Term(fieldName, queryString));
124 }
125 }
126
127 /**
128 * only to be used for text fields, see {@link #newTermQuery(String, String, boolean)}
129 * @param fieldName
130 * @param queryString
131 * @return a {@link TermQuery} or a {@link WildcardQuery}
132 */
133 public Query newTermQuery(String fieldName, String queryString){
134 return newTermQuery(fieldName, queryString, true);
135 }
136
137 /**
138 * DefinedTerms are stored in the Lucene index by the
139 * {@link DefinedTermBaseClassBridge} in a consistent way. One field per
140 * language and also in one additional field for all languages. This method
141 * is a convenient means to retrieve a Lucene query string for such the
142 * fields.
143 *
144 * @param name
145 * name of the term field as in the Lucene index. The field must
146 * have been written to Lucene document by
147 * {@link DefinedTermBaseClassBridge}
148 *
149 * @param languages
150 * the languages to search for exclusively. Can be
151 * <code>null</code> to search in all languages
152 * @return
153 */
154 public Query newDefinedTermQuery(String name, String queryString, List<Language> languages) {
155
156 Builder localizedTermQueryBuilder = new Builder();
157 localizedTermQueryBuilder.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
158 if(languages == null || languages.size() == 0){
159 localizedTermQueryBuilder.add(newTermQuery(name + ".representation.text.ALL", queryString), Occur.SHOULD);
160 localizedTermQueryBuilder.add(newTermQuery(name + ".representation.label.ALL", queryString), Occur.SHOULD);
161 localizedTermQueryBuilder.add(newTermQuery(name + ".representation.abbreviatedLabel.ALL", queryString), Occur.SHOULD);
162
163 } else {
164 for(Language lang : languages){
165 localizedTermQueryBuilder.add(newTermQuery(name + ".representation.text." + lang.getUuid().toString(), queryString), Occur.SHOULD);
166 localizedTermQueryBuilder.add(newTermQuery(name + ".representation.label." + lang.getUuid().toString(), queryString), Occur.SHOULD);
167 localizedTermQueryBuilder.add(newTermQuery(name + ".representation.abbreviatedLabel." + lang.getUuid().toString(), queryString), Occur.SHOULD);
168 }
169 }
170 return localizedTermQueryBuilder.build();
171 }
172
173 /**
174 * MultilanguageString maps are stored in the Lucene index by the
175 * {@link MultilanguageTextFieldBridge } in a consistent way. One field per
176 * language and also in one additional field for all languages. This method
177 * is a convenient means to retrieve a Lucene query string for such the
178 * fields.
179 *
180 * @param name
181 * name of the term field as in the Lucene index. The field must
182 * have been written to Lucene document by
183 * {@link DefinedTermBaseClassBridge}
184 * @param languages
185 * the languages to search for exclusively. Can be
186 * <code>null</code> to search in all languages
187 * @return
188 */
189 public Query newMultilanguageTextQuery(String name, String queryString, List<Language> languages) {
190
191 Builder localizedTermQueryBuilder = new Builder();
192 localizedTermQueryBuilder.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
193 if(languages == null || languages.size() == 0){
194 localizedTermQueryBuilder.add(newTermQuery(name + ".ALL", queryString), Occur.SHOULD);
195 } else {
196 for(Language lang : languages){
197 localizedTermQueryBuilder.add(newTermQuery(name + "." + lang.getUuid().toString(), queryString), Occur.SHOULD);
198 }
199 }
200 return localizedTermQueryBuilder.build();
201 }
202
203 /**
204 * @param idFieldName
205 * @param entitiy
206 * @return
207 */
208 public Query newEntityIdQuery(String idFieldName, CdmBase entitiy){
209 return newTermQuery(idFieldName, String.valueOf(entitiy.getId()), false);
210 }
211
212 /**
213 * @param idFieldName
214 * @param entitiy
215 * @return
216 */
217 public Query newEntityIdsQuery(String idFieldName, List<? extends CdmBase> entities){
218 Builder idInQueryBuilder = new Builder();
219 if(entities != null && entities.size() > 0 ){
220 for(CdmBase entity : entities){
221 idInQueryBuilder.add(newEntityIdQuery(idFieldName, entity), Occur.SHOULD);
222 }
223 }
224 return idInQueryBuilder.build();
225 }
226
227 /**
228 * @param idFieldName
229 * @return
230 */
231 public Query newIsNotNullQuery(String idFieldName){
232 return new TermQuery(new Term(NotNullAwareIdBridge.notNullField(idFieldName), NotNullAwareIdBridge.NOT_NULL_VALUE));
233 }
234
235 /**
236 * @param uuidFieldName
237 * @param entity
238 * @return
239 */
240 public Query newEntityUuidQuery(String uuidFieldName, IdentifiableEntity entity) {
241 return newTermQuery(uuidFieldName, entity.getUuid().toString(), false);
242 }
243
244 /**
245 * creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the uuid
246 * of the <code>entities</code>, the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
247 * @param uuidFieldName
248 * @param entities
249 * @return
250 */
251 public Query newEntityUuidsQuery(String uuidFieldName, List<? extends IdentifiableEntity> entities){
252
253 Builder uuidInQueryBuilder = new Builder();
254 if(entities != null && entities.size() > 0 ){
255 for(IdentifiableEntity entity : entities){
256 uuidInQueryBuilder.add(newEntityUuidQuery(uuidFieldName, entity), Occur.SHOULD);
257 }
258 }
259 return uuidInQueryBuilder.build();
260 }
261
262
263 /**
264 * creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the
265 * supplied <code>uuids</code>
266 * the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
267 * @param uuidFieldName
268 * @param entities
269 * @return
270 */
271 public Query newUuidQuery(String uuidFieldName, List<UUID> uuids){
272
273 Builder uuidInQueryBuilder = new Builder();
274 if(uuids != null && uuids.size() > 0 ){
275 for(UUID uuid : uuids){
276 uuidInQueryBuilder.add(newTermQuery(uuidFieldName, uuids.toString(), false), Occur.SHOULD);
277 }
278 }
279 return uuidInQueryBuilder.build();
280 }
281
282
283 /**
284 * Returns a Lucene Query which rely on double numeric range query
285 * on Latitude / Longitude
286 *
287 *(+/- copied from {@link SpatialQueryBuilderFromPoint#buildSpatialQueryByRange(Point, double, String)})
288 *
289 * @param center center of the search discus
290 * @param radius distance max to center in km
291 * @param fieldName name of the Lucene Field implementing Coordinates
292 * @return Lucene Query to be used in a search
293 * @see Query
294 * @see org.hibernate.search.spatial.Coordinates
295 */
296 public static Query buildSpatialQueryByRange(Rectangle boundingBox, String fieldName) {
297
298 String latitudeFieldName = fieldName + "_HSSI_Latitude";
299 String longitudeFieldName = fieldName + "_HSSI_Longitude";
300
301 Query latQuery= NumericRangeQuery.newDoubleRange(
302 latitudeFieldName, boundingBox.getLowerLeft().getLatitude(),
303 boundingBox.getUpperRight().getLatitude(), true, true
304 );
305
306 Builder longQueryBuilder = new Builder();
307 if ( boundingBox.getLowerLeft().getLongitude() <= boundingBox.getUpperRight().getLongitude() ) {
308 longQueryBuilder.add(NumericRangeQuery.newDoubleRange( longitudeFieldName, boundingBox.getLowerLeft().getLongitude(),
309 boundingBox.getUpperRight().getLongitude(), true, true ), Occur.MUST);
310 }
311 else {
312 longQueryBuilder.add( NumericRangeQuery.newDoubleRange( longitudeFieldName, boundingBox.getLowerLeft().getLongitude(),
313 180.0, true, true ), BooleanClause.Occur.SHOULD );
314 longQueryBuilder.add( NumericRangeQuery.newDoubleRange( longitudeFieldName, -180.0,
315 boundingBox.getUpperRight().getLongitude(), true, true ), BooleanClause.Occur.SHOULD );
316 }
317
318 Builder boxQueryBuilder = new Builder();
319 boxQueryBuilder.add( latQuery, BooleanClause.Occur.MUST );
320 boxQueryBuilder.add( longQueryBuilder.build(), BooleanClause.Occur.MUST );
321
322 return new FilteredQuery(
323 new MatchAllDocsQuery(),
324 new QueryWrapperFilter( boxQueryBuilder.build() )
325 );
326 }
327
328 /**
329 *
330 * @param fromField
331 * @param toField
332 * @param joinFromQuery
333 * @param fromType
334 * @return
335 * @throws IOException
336 */
337 public Query newJoinQuery(String fromField, String toField, Query joinFromQuery,
338 Class<? extends CdmBase> fromType) throws IOException {
339 boolean multipleValuesPerDocument = true;
340 ScoreMode scoreMode = ScoreMode.Max;
341 return JoinUtil.createJoinQuery(
342 // need to use the sort field of the id field since
343 // ScoreMode.Max forces the fromField to be a docValue
344 // field of type [SORTED, SORTED_SET]
345 fromField + "__sort",
346 multipleValuesPerDocument, toField,
347 joinFromQuery, indexSearcherFor(fromType), scoreMode);
348 }
349
350 /**
351 * Creates a class restriction query and wraps the class restriction
352 * query and the given <code>query</code> into a BooleanQuery where both must match.
353 * <p>
354 * TODO instead of using a BooleanQuery for the class restriction it would be much more
355 * performant to use a {@link Filter} instead.
356 *
357 * @param cdmTypeRestriction
358 * @param query
359 * @return
360 */
361 public static BooleanQuery addTypeRestriction(Query query, Class<? extends CdmBase> cdmTypeRestriction) {
362
363 BooleanQuery fullQuery;
364 Builder filteredQueryBuilder = new Builder();
365 Builder classFilterBuilder = new Builder();
366
367 Term t = new Term(ProjectionConstants.OBJECT_CLASS, cdmTypeRestriction.getName());
368 TermQuery termQuery = new TermQuery(t);
369
370 classFilterBuilder.add(termQuery, BooleanClause.Occur.SHOULD);
371 BooleanQuery classFilter = classFilterBuilder.build();
372 classFilter.setBoost(0);
373
374 filteredQueryBuilder.add(query, BooleanClause.Occur.MUST);
375 filteredQueryBuilder.add(classFilter, BooleanClause.Occur.MUST);
376
377 fullQuery = filteredQueryBuilder.build();
378 return fullQuery;
379 }
380
381 /**
382 * @param clazz
383 * @return
384 */
385 private IndexSearcher indexSearcherFor(Class<? extends CdmBase> clazz) {
386 if(indexSearcherMap.get(clazz) == null){
387
388 IndexReader indexReader = toolProvider.getIndexReaderFor(clazz);
389 IndexSearcher searcher = new IndexSearcher(indexReader);
390 // searcher.setDefaultFieldSortScoring(true, true);
391 indexSearcherMap.put(clazz, searcher);
392 }
393 IndexSearcher indexSearcher = indexSearcherMap.get(clazz);
394 return indexSearcher;
395 }
396
397 }