Project

General

Profile

Download (17.4 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2012 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.api.service.search;
10

    
11
import java.io.IOException;
12
import java.util.Collection;
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.UUID;
19

    
20
import org.apache.log4j.Logger;
21
import org.apache.lucene.index.IndexReader;
22
import org.apache.lucene.index.Term;
23
import org.apache.lucene.queryparser.classic.ParseException;
24
import org.apache.lucene.search.BooleanClause;
25
import org.apache.lucene.search.BooleanClause.Occur;
26
import org.apache.lucene.search.BooleanQuery;
27
import org.apache.lucene.search.BooleanQuery.Builder;
28
import org.apache.lucene.search.FilteredQuery;
29
import org.apache.lucene.search.IndexSearcher;
30
import org.apache.lucene.search.MatchAllDocsQuery;
31
import org.apache.lucene.search.NumericRangeQuery;
32
import org.apache.lucene.search.Query;
33
import org.apache.lucene.search.QueryWrapperFilter;
34
import org.apache.lucene.search.ScoreDoc;
35
import org.apache.lucene.search.TermQuery;
36
import org.apache.lucene.search.TopDocs;
37
import org.apache.lucene.search.WildcardQuery;
38
import org.apache.lucene.search.join.JoinUtil;
39
import org.apache.lucene.search.join.ScoreMode;
40
import org.hibernate.search.engine.ProjectionConstants;
41
import org.hibernate.search.spatial.impl.Rectangle;
42

    
43
import eu.etaxonomy.cdm.hibernate.search.DefinedTermBaseClassBridge;
44
import eu.etaxonomy.cdm.hibernate.search.MultilanguageTextFieldBridge;
45
import eu.etaxonomy.cdm.hibernate.search.NotNullAwareIdBridge;
46
import eu.etaxonomy.cdm.model.common.CdmBase;
47
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
48
import eu.etaxonomy.cdm.model.common.Language;
49

    
50
/**
51
 * QueryFactory creates queries for a specific lucene index that means queries
52
 * specific to the various CDM base types. Therefore the QueryFactory hold a
53
 * reference to a {@link LuceneSearch} instance which has been created for a
54
 * CDM base type.<br>
55
 * The field names used in queries created on free text fields are remembered
56
 * and can be accessed by {@link #getTextFieldNames()} or {@link #getTextFieldNamesAsArray()}.
57
 * This is useful for highlighting the matches with {@link LuceneSearch#setHighlightFields(String[])}
58
 * <p>
59
 * The index specific methods from {@link LuceneSearch} which are
60
 * used by QueryFactory directly or indirectly are:
61
 * <ul>
62
 * <li>{@link LuceneSearch#getAnalyzer()}</li>
63
 * </ul>
64
 *
65
 *
66
 * @author a.kohlbecker
67
 * @since Sep 14, 2012
68
 */
69
public class QueryFactory {
70

    
71
    public static final Logger logger = Logger.getLogger(QueryFactory.class);
72

    
73
    protected ILuceneIndexToolProvider toolProvider;
74

    
75
    Set<String> textFieldNames = new HashSet<>();
76

    
77
    Map<Class<? extends CdmBase>, IndexSearcher> indexSearcherMap = new HashMap<>();
78

    
79
    private final Class<? extends CdmBase> cdmBaseType;
80

    
81
    public Set<String> getTextFieldNames() {
82
        return textFieldNames;
83
    }
84

    
85
    public String[] getTextFieldNamesAsArray() {
86
        return textFieldNames.toArray(new String[textFieldNames.size()]);
87
    }
88

    
89
    public QueryFactory(ILuceneIndexToolProvider toolProvider, Class<? extends CdmBase> cdmBaseType){
90
        this.cdmBaseType = cdmBaseType;
91
        this.toolProvider = toolProvider;
92
    }
93

    
94
    /**
95
     * Creates a new Term query. Depending on whether <code>isTextField</code> is set true or not the
96
     * supplied <code>queryString</code> will be parsed by using the according analyzer or not.
97
     * Setting <code>isTextField</code> to <code>false</code> is useful for searching for uuids etc.
98
     * <p>
99
     * The appropriate query type is determined by the query strnig:
100
     * <ul>
101
     * <li>Lactuca ==> TermQuery.class </li>
102
     * <li>Lactuca perennis ==> BooleanQuery.class </li>
103
     * <li>Lactu* ==> PrefixQuery.class</li>
104
     * <li>"Lactuca perennis" ==> PhraseQuery.class</li>
105
     * </ul>
106
     *
107
     *
108
     * @param fieldName
109
     * @param queryString
110
     * @param isTextField whether this field is a field containing free text in contrast to e.g. ID fields.
111
     *     If <code>isTextField</code> is set <code>true</code> the <code>queryString</code> will be parsed by
112
     *     using the according analyzer.
113
     * @return the resulting <code>TermQuery</code> or <code>null</code> in case of an <code>ParseException</code>
114
     *
115
     * TODO consider throwing the ParseException !!!!
116
     */
117
    public Query newTermQuery(String fieldName, String queryString, boolean isTextField) {
118

    
119
        String luceneQueryString = fieldName + ":(" + queryString + ")";
120
        if (isTextField) {
121
            queryString = queryString.trim();
122
            // ^\"(.*\s+.*[\*].*|.*[\*].*\s+.*)\"$ matches phrase query strings with wildcards like '"Lactuca per*"'
123
            boolean isComplexPhraseQuery = queryString.matches("^\\\"(.*\\s+.*[\\*].*|.*[\\*].*\\s+.*)\\\"$");
124
            textFieldNames.add(fieldName);
125
            // in order to support the full query syntax we must use the parser here
126
            try {
127
                Query termQuery = toolProvider.getQueryParserFor(cdmBaseType, isComplexPhraseQuery).parse(luceneQueryString);
128
                return termQuery;
129
            } catch (ParseException e) {
130
                logger.error(e);
131
            }
132
            return null;
133
        } else {
134
            return new TermQuery(new Term(fieldName, queryString));
135
        }
136
    }
137

    
138
    /**
139
     * Only to be used for text fields, see {@link #newTermQuery(String, String, boolean)}
140
     * @param fieldName
141
     * @param queryString
142
     * @return a {@link TermQuery} or a {@link WildcardQuery}
143
     */
144
    public Query newTermQuery(String fieldName, String queryString){
145
        return newTermQuery(fieldName, queryString, true);
146
    }
147

    
148
    public Query newBooleanQuery(String fieldName, boolean value){
149
        return new TermQuery(new Term(fieldName, Boolean.valueOf(value).toString()));
150
    }
151

    
152
    /**
153
     * DefinedTerms are stored in the Lucene index by the
154
     * {@link DefinedTermBaseClassBridge} in a consistent way. One field per
155
     * language and also in one additional field for all languages. This method
156
     * is a convenient means to retrieve a Lucene query string for such the
157
     * fields.
158
     *
159
     * @param name
160
     *            name of the term field as in the Lucene index. The field must
161
     *            have been written to Lucene document by
162
     *            {@link DefinedTermBaseClassBridge}
163
     *
164
     * @param languages
165
     *            the languages to search for exclusively. Can be
166
     *            <code>null</code> to search in all languages
167
     * @return
168
     */
169
    public Query newDefinedTermQuery(String name, String queryString, List<Language> languages) {
170

    
171
        Builder localizedTermQueryBuilder = new Builder();
172
        localizedTermQueryBuilder.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
173
        if(languages == null || languages.size() == 0){
174
            localizedTermQueryBuilder.add(newTermQuery(name + ".representation.text.ALL", queryString), Occur.SHOULD);
175
            localizedTermQueryBuilder.add(newTermQuery(name + ".representation.label.ALL", queryString), Occur.SHOULD);
176
            localizedTermQueryBuilder.add(newTermQuery(name + ".representation.abbreviatedLabel.ALL", queryString), Occur.SHOULD);
177

    
178
        } else {
179
            for(Language lang : languages){
180
                localizedTermQueryBuilder.add(newTermQuery(name + ".representation.text." + lang.getUuid().toString(), queryString), Occur.SHOULD);
181
                localizedTermQueryBuilder.add(newTermQuery(name + ".representation.label." + lang.getUuid().toString(), queryString), Occur.SHOULD);
182
                localizedTermQueryBuilder.add(newTermQuery(name + ".representation.abbreviatedLabel." + lang.getUuid().toString(), queryString), Occur.SHOULD);
183
            }
184
        }
185
        return localizedTermQueryBuilder.build();
186
    }
187

    
188
    /**
189
     * MultilanguageString maps are stored in the Lucene index by the
190
     * {@link MultilanguageTextFieldBridge } in a consistent way. One field per
191
     * language and also in one additional field for all languages. This method
192
     * is a convenient means to retrieve a Lucene query string for such the
193
     * fields.
194
     *
195
     * @param name
196
     *            name of the term field as in the Lucene index. The field must
197
     *            have been written to Lucene document by
198
     *            {@link DefinedTermBaseClassBridge}
199
     * @param languages
200
     *            the languages to search for exclusively. Can be
201
     *            <code>null</code> to search in all languages
202
     * @return
203
     */
204
    public Query newMultilanguageTextQuery(String name, String queryString, List<Language> languages) {
205

    
206
        Builder localizedTermQueryBuilder = new Builder();
207
        localizedTermQueryBuilder.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
208
        if(languages == null || languages.size() == 0){
209
            localizedTermQueryBuilder.add(newTermQuery(name + ".ALL", queryString), Occur.SHOULD);
210
        } else {
211
            for(Language lang : languages){
212
                localizedTermQueryBuilder.add(newTermQuery(name + "." + lang.getUuid().toString(), queryString), Occur.SHOULD);
213
            }
214
        }
215
        return localizedTermQueryBuilder.build();
216
    }
217

    
218
    /**
219
     * @param idFieldName
220
     * @param entitiy
221
     * @return
222
     */
223
    public Query newEntityIdQuery(String idFieldName, CdmBase entitiy){
224
        return newTermQuery(idFieldName, String.valueOf(entitiy.getId()), false);
225
    }
226

    
227
    /**
228
     * @param idFieldName
229
     * @param entitiy
230
     * @return
231
     */
232
    public Query newEntityIdsQuery(String idFieldName, Collection<? extends CdmBase> entities){
233
        Builder idInQueryBuilder = new Builder();
234
        if(entities != null && entities.size() > 0 ){
235
            for(CdmBase entity : entities){
236
                idInQueryBuilder.add(newEntityIdQuery(idFieldName, entity), Occur.SHOULD);
237
            }
238
        }
239
        return idInQueryBuilder.build();
240
    }
241

    
242
    public Query newIsNotNullQuery(String idFieldName){
243
        return new TermQuery(new Term(NotNullAwareIdBridge.notNullField(idFieldName), NotNullAwareIdBridge.NOT_NULL_VALUE));
244
    }
245

    
246
    public Query newEntityUuidQuery(String uuidFieldName, IdentifiableEntity entity) {
247
        return newTermQuery(uuidFieldName, entity.getUuid().toString(), false);
248
    }
249

    
250
    /**
251
     * Creates a query for searching for documents in which the field specified by <code>uuidFieldName</code>
252
     * matches at least one of the uuid of the <code>entities</code>, the sql equivalent of this is
253
     * <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
254
     * @param uuidFieldName
255
     * @param entities
256
     * @return
257
     */
258
    public Query newEntityUuidsQuery(String uuidFieldName, List<? extends IdentifiableEntity> entities){
259

    
260
        Builder uuidInQueryBuilder = new Builder();
261
        if(entities != null && entities.size() > 0 ){
262
            for(IdentifiableEntity<?> entity : entities){
263
                uuidInQueryBuilder.add(newEntityUuidQuery(uuidFieldName, entity), Occur.SHOULD);
264
            }
265
        }
266
        return uuidInQueryBuilder.build();
267
    }
268

    
269

    
270
    /**
271
     * Creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the
272
     * supplied <code>uuids</code>
273
     * the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
274
     * @param uuidFieldName
275
     * @param entities
276
     * @return
277
     */
278
    public Query newUuidQuery(String uuidFieldName, List<UUID> uuids){
279

    
280
        Builder uuidInQueryBuilder = new Builder();
281
        if(uuids != null && uuids.size() > 0 ){
282
            for(UUID uuid : uuids){
283
                uuidInQueryBuilder.add(newTermQuery(uuidFieldName, uuid.toString(), false), Occur.SHOULD);
284
            }
285
        }
286
        return uuidInQueryBuilder.build();
287
    }
288

    
289

    
290
    /**
291
     * Returns a Lucene Query which rely on double numeric range query
292
     * on Latitude / Longitude
293
     *
294
     *(+/- copied from {@link SpatialQueryBuilderFromPoint#buildSpatialQueryByRange(Point, double, String)})
295
     *
296
     * @param center center of the search discus
297
     * @param radius distance max to center in km
298
     * @param fieldName name of the Lucene Field implementing Coordinates
299
     * @return Lucene Query to be used in a search
300
     * @see Query
301
     * @see org.hibernate.search.spatial.Coordinates
302
     */
303
    public static Query buildSpatialQueryByRange(Rectangle boundingBox, String fieldName) {
304

    
305
        String latitudeFieldName = fieldName + "_HSSI_Latitude";
306
        String longitudeFieldName = fieldName + "_HSSI_Longitude";
307

    
308
        Query latQuery= NumericRangeQuery.newDoubleRange(
309
                latitudeFieldName, boundingBox.getLowerLeft().getLatitude(),
310
                boundingBox.getUpperRight().getLatitude(), true, true
311
        );
312

    
313
        Builder longQueryBuilder = new Builder();
314
        if ( boundingBox.getLowerLeft().getLongitude() <= boundingBox.getUpperRight().getLongitude() ) {
315
            longQueryBuilder.add(NumericRangeQuery.newDoubleRange( longitudeFieldName, boundingBox.getLowerLeft().getLongitude(),
316
                    boundingBox.getUpperRight().getLongitude(), true, true ), Occur.MUST);
317
        }
318
        else {
319
            longQueryBuilder.add( NumericRangeQuery.newDoubleRange( longitudeFieldName, boundingBox.getLowerLeft().getLongitude(),
320
                    180.0, true, true ), BooleanClause.Occur.SHOULD );
321
            longQueryBuilder.add( NumericRangeQuery.newDoubleRange( longitudeFieldName, -180.0,
322
                    boundingBox.getUpperRight().getLongitude(), true, true ), BooleanClause.Occur.SHOULD );
323
        }
324

    
325
        Builder boxQueryBuilder = new Builder();
326
        boxQueryBuilder.add( latQuery, BooleanClause.Occur.MUST );
327
        boxQueryBuilder.add( longQueryBuilder.build(), BooleanClause.Occur.MUST );
328

    
329
        return new FilteredQuery(
330
                new MatchAllDocsQuery(),
331
                new QueryWrapperFilter( boxQueryBuilder.build() )
332
        );
333
    }
334

    
335
    /**
336
     * Warning! JoinQuery do currently not work with numeric fields, see https://issues.apache.org/jira/browse/LUCENE-4824
337
     * @param fromType
338
     * @param fromField
339
     * @param fromFieldIsMultivalued TODO
340
     * @param fromQuery
341
     * @param toField
342
     * @param toType
343
     *      Optional parameter. Only used for debugging only, can be left null otherwise.
344
     * @param scoreMode TODO
345
     * @return
346
     * @throws IOException
347
     */
348
    public Query newJoinQuery(Class<? extends CdmBase> fromType, String fromField, boolean fromFieldIsMultivalued,
349
            Query fromQuery, String toField, Class<? extends CdmBase> toType, ScoreMode scoreMode) throws IOException {
350
            boolean multipleValuesPerDocument = false;
351
            Query joinQuery = JoinUtil.createJoinQuery(
352
                    // need to use the sort field of the id field since
353
                    // ScoreMode.Max forces the fromField to be a docValue
354
                    // field of type [SORTED, SORTED_SET]
355
                    fromField + "__sort",
356
                    multipleValuesPerDocument, toField,
357
                    fromQuery, indexSearcherFor(fromType), scoreMode);
358
            if(logger.isDebugEnabled()) {
359
                logger.debug("joinQuery: " + joinQuery);
360
                if(toType != null) {
361
                    TopDocs result = indexSearcherFor(toType).search(joinQuery, 10);
362
                    ScoreDoc[] docs = result.scoreDocs;
363
                    logger.debug("joinQuery '" + fromType.getSimpleName() + ". " + fromField + "=" + toField + " where " + fromType.getSimpleName() + " matches "+ fromQuery + "' has " + result.totalHits + " results:");
364
                    for(ScoreDoc doc : docs) {
365
                        logger.debug("    toType doc: " + doc);
366
                            IndexReader indexReader = toolProvider.getIndexReaderFor(toType);
367
                            logger.debug("              : " + indexReader.document(doc.doc));
368
                        }
369
                    }
370
            }
371
            return joinQuery;
372
    }
373

    
374
    /**
375
     * Creates a class restriction query and wraps the class restriction
376
     * query and the given <code>query</code> into a BooleanQuery where both must match.
377
     * <p>
378
     *
379
     * @param cdmTypeRestriction
380
     * @param query
381
     * @return
382
     */
383
    public static BooleanQuery.Builder addTypeRestriction(Query query, Class<? extends CdmBase> cdmTypeRestriction) {
384

    
385
        Builder filteredQueryBuilder = new Builder();
386
        Builder classFilterBuilder = new Builder();
387

    
388
        Term t = new Term(ProjectionConstants.OBJECT_CLASS, cdmTypeRestriction.getName());
389
        TermQuery termQuery = new TermQuery(t);
390

    
391
        classFilterBuilder.add(termQuery, Occur.SHOULD);
392
        BooleanQuery classFilter = classFilterBuilder.build();
393
        classFilter.setBoost(0);
394

    
395
        filteredQueryBuilder.add(query, Occur.MUST);
396
        filteredQueryBuilder.add(classFilter, Occur.MUST); // TODO using Occur.FILTER might be improve performance but causes wrong results
397

    
398
        return filteredQueryBuilder;
399
    }
400

    
401
    private IndexSearcher indexSearcherFor(Class<? extends CdmBase> clazz) {
402

    
403
        if(indexSearcherMap.get(clazz) == null){
404
            IndexReader indexReader = toolProvider.getIndexReaderFor(clazz);
405
            IndexSearcher searcher = new IndexSearcher(indexReader);
406
            indexSearcherMap.put(clazz, searcher);
407
        }
408
        IndexSearcher indexSearcher = indexSearcherMap.get(clazz);
409
        return indexSearcher;
410
    }
411
}
(13-13/16)