Project

General

Profile

Download (14 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2012 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package eu.etaxonomy.cdm.api.service.search;
11

    
12
import java.io.IOException;
13
import java.util.HashMap;
14
import java.util.HashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
import java.util.UUID;
19

    
20
import org.apache.log4j.Logger;
21
import org.apache.lucene.index.IndexReader;
22
import org.apache.lucene.index.Term;
23
import org.apache.lucene.queryParser.ParseException;
24
import org.apache.lucene.search.BooleanClause;
25
import org.apache.lucene.search.BooleanClause.Occur;
26
import org.apache.lucene.search.BooleanQuery;
27
import org.apache.lucene.search.FilteredQuery;
28
import org.apache.lucene.search.IndexSearcher;
29
import org.apache.lucene.search.MatchAllDocsQuery;
30
import org.apache.lucene.search.NumericRangeQuery;
31
import org.apache.lucene.search.Query;
32
import org.apache.lucene.search.QueryWrapperFilter;
33
import org.apache.lucene.search.TermQuery;
34
import org.apache.lucene.search.WildcardQuery;
35
import org.apache.lucene.search.join.JoinUtil;
36
import org.hibernate.search.spatial.impl.Point;
37
import org.hibernate.search.spatial.impl.Rectangle;
38
import org.hibernate.search.spatial.impl.SpatialQueryBuilderFromPoint;
39

    
40
import eu.etaxonomy.cdm.hibernate.search.DefinedTermBaseClassBridge;
41
import eu.etaxonomy.cdm.hibernate.search.MultilanguageTextFieldBridge;
42
import eu.etaxonomy.cdm.hibernate.search.NotNullAwareIdBridge;
43
import eu.etaxonomy.cdm.model.common.CdmBase;
44
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
45
import eu.etaxonomy.cdm.model.common.Language;
46

    
47
/**
48
 * QueryFactory creates queries for a specific lucene index that means queries
49
 * specific to the various CDM base types. Therefore the QueryFactory hold a
50
 * reference to a {@link LuceneSearch} instance which has been created for a
51
 * CDM base type.<br>
52
 * The field names used in queries created on free text fields are remembered
53
 * and can be accessed by {@link #getTextFieldNames()} or {@link #getTextFieldNamesAsArray()}.
54
 * This is useful for highlighting the matches with {@link LuceneSearch#setHighlightFields(String[])}
55
 * <p>
56
 * The index specific methods from {@link LuceneSearch} which are
57
 * used by QueryFactory directly or indirectly are:
58
 * <ul>
59
 * <li>{@link LuceneSearch#getAnalyzer()}</li>
60
 * </ul>
61
 *
62
 *
63
 * @author a.kohlbecker
64
 * @date Sep 14, 2012
65
 *
66
 */
67
public class QueryFactory {
68

    
69
    public static final Logger logger = Logger.getLogger(QueryFactory.class);
70

    
71
    protected ILuceneIndexToolProvider toolProvider;
72

    
73
    Set<String> textFieldNames = new HashSet<String>();
74

    
75
    Map<Class<? extends CdmBase>, IndexSearcher> indexSearcherMap = new HashMap<Class<? extends CdmBase>, IndexSearcher>();
76

    
77
    private final Class<? extends CdmBase> cdmBaseType;
78

    
79
    public Set<String> getTextFieldNames() {
80
        return textFieldNames;
81
    }
82

    
83
    public String[] getTextFieldNamesAsArray() {
84
        return textFieldNames.toArray(new String[textFieldNames.size()]);
85
    }
86

    
87
    public QueryFactory(ILuceneIndexToolProvider toolProvider, Class<? extends CdmBase> cdmBaseType){
88
        this.cdmBaseType = cdmBaseType;
89
        this.toolProvider = toolProvider;
90
    }
91

    
92
    /**
93
     * Creates a new Term query. Depending on whether <code>isTextField</code> is set true or not the
94
     * supplied <code>queryString</code> will be parsed by using the according analyzer or not.
95
     * Setting <code>isTextField</code> to <code>false</code> is useful for searching for uuids etc.
96
     *
97
     * @param fieldName
98
     * @param queryString
99
     * @param isTextField whether this field is a field containing free text in contrast to e.g. ID fields.
100
     *     If <code>isTextField</code> is set <code>true</code> the <code>queryString</code> will be parsed by
101
     *     using the according analyzer.
102
     * @return the resulting <code>TermQuery</code> or <code>null</code> in case of an <code>ParseException</code>
103
     *
104
     * TODO consider throwing the ParseException !!!!
105
     */
106
    public Query newTermQuery(String fieldName, String queryString, boolean isTextField) {
107

    
108
        String luceneQueryString = fieldName + ":(" + queryString + ")";
109
        if (isTextField) {
110
            textFieldNames.add(fieldName);
111
            // in order to support the full query syntax we must use the parser
112
            // here
113
            try {
114
                return toolProvider.getQueryParserFor(cdmBaseType).parse(luceneQueryString);
115
            } catch (ParseException e) {
116
                logger.error(e);
117
            }
118
            return null;
119
        } else {
120
            return new TermQuery(new Term(fieldName, queryString));
121
        }
122
    }
123

    
124
    /**
125
     * only to be used for text fields, see {@link #newTermQuery(String, String, boolean)}
126
     * @param fieldName
127
     * @param queryString
128
     * @return a {@link TermQuery} or a {@link WildcardQuery}
129
     */
130
    public Query newTermQuery(String fieldName, String queryString){
131
        return newTermQuery(fieldName, queryString, true);
132
    }
133

    
134
    /**
135
     * DefinedTerms are stored in the Lucene index by the
136
     * {@link DefinedTermBaseClassBridge} in a consistent way. One field per
137
     * language and also in one additional field for all languages. This method
138
     * is a convenient means to retrieve a Lucene query string for such the
139
     * fields.
140
     *
141
     * @param name
142
     *            name of the term field as in the Lucene index. The field must
143
     *            have been written to Lucene document by
144
     *            {@link DefinedTermBaseClassBridge}
145
     *
146
     * @param languages
147
     *            the languages to search for exclusively. Can be
148
     *            <code>null</code> to search in all languages
149
     * @return
150
     */
151
    public Query newDefinedTermQuery(String name, String queryString, List<Language> languages) {
152

    
153
        BooleanQuery localizedTermQuery = new BooleanQuery();
154
        localizedTermQuery.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
155
        if(languages == null || languages.size() == 0){
156
            localizedTermQuery.add(newTermQuery(name + ".representation.text.ALL", queryString), Occur.SHOULD);
157
            localizedTermQuery.add(newTermQuery(name + ".representation.label.ALL", queryString), Occur.SHOULD);
158
            localizedTermQuery.add(newTermQuery(name + ".representation.abbreviatedLabel.ALL", queryString), Occur.SHOULD);
159

    
160
        } else {
161
            for(Language lang : languages){
162
                localizedTermQuery.add(newTermQuery(name + ".representation.text." + lang.getUuid().toString(), queryString), Occur.SHOULD);
163
                localizedTermQuery.add(newTermQuery(name + ".representation.label." + lang.getUuid().toString(), queryString), Occur.SHOULD);
164
                localizedTermQuery.add(newTermQuery(name + ".representation.abbreviatedLabel." + lang.getUuid().toString(), queryString), Occur.SHOULD);
165
            }
166
        }
167
        return localizedTermQuery;
168
    }
169

    
170
    /**
171
     * MultilanguageString maps are stored in the Lucene index by the
172
     * {@link MultilanguageTextFieldBridge } in a consistent way. One field per
173
     * language and also in one additional field for all languages. This method
174
     * is a convenient means to retrieve a Lucene query string for such the
175
     * fields.
176
     *
177
     * @param name
178
     *            name of the term field as in the Lucene index. The field must
179
     *            have been written to Lucene document by
180
     *            {@link DefinedTermBaseClassBridge}
181
     * @param languages
182
     *            the languages to search for exclusively. Can be
183
     *            <code>null</code> to search in all languages
184
     * @return
185
     */
186
    public Query newMultilanguageTextQuery(String name, String queryString, List<Language> languages) {
187

    
188
        BooleanQuery localizedTermQuery = new BooleanQuery();
189
        localizedTermQuery.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
190
        if(languages == null || languages.size() == 0){
191
            localizedTermQuery.add(newTermQuery(name + ".ALL", queryString), Occur.SHOULD);
192
        } else {
193
            for(Language lang : languages){
194
                localizedTermQuery.add(newTermQuery(name + "." + lang.getUuid().toString(), queryString), Occur.SHOULD);
195
            }
196
        }
197
        return localizedTermQuery;
198
    }
199

    
200
    /**
201
     * @param idFieldName
202
     * @param entitiy
203
     * @return
204
     */
205
    public Query newEntityIdQuery(String idFieldName, CdmBase entitiy){
206
        return newTermQuery(idFieldName, String.valueOf(entitiy.getId()), false);
207
    }
208

    
209
    /**
210
     * @param idFieldName
211
     * @param entitiy
212
     * @return
213
     */
214
    public Query newEntityIdsQuery(String idFieldName, List<? extends CdmBase> entities){
215
        BooleanQuery idInQuery = new BooleanQuery();
216
        if(entities != null && entities.size() > 0 ){
217
            for(CdmBase entity : entities){
218
                idInQuery.add(newEntityIdQuery(idFieldName, entity), Occur.SHOULD);
219
            }
220
        }
221
        return idInQuery;
222
    }
223

    
224
    /**
225
     * @param idFieldName
226
     * @return
227
     */
228
    public Query newIsNotNullQuery(String idFieldName){
229
        return new TermQuery(new Term(NotNullAwareIdBridge.notNullField(idFieldName), NotNullAwareIdBridge.NOT_NULL_VALUE));
230
    }
231

    
232
    /**
233
     * @param uuidFieldName
234
     * @param entity
235
     * @return
236
     */
237
    public Query newEntityUuidQuery(String uuidFieldName, IdentifiableEntity entity) {
238
        return newTermQuery(uuidFieldName, entity.getUuid().toString(), false);
239
    }
240

    
241
    /**
242
     * creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the uuid
243
     * of the <code>entities</code>, the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
244
     * @param uuidFieldName
245
     * @param entities
246
     * @return
247
     */
248
    public Query newEntityUuidsQuery(String uuidFieldName, List<? extends IdentifiableEntity> entities){
249

    
250
        BooleanQuery uuidInQuery = new BooleanQuery();
251
        if(entities != null && entities.size() > 0 ){
252
            for(IdentifiableEntity entity : entities){
253
                uuidInQuery.add(newEntityUuidQuery(uuidFieldName, entity), Occur.SHOULD);
254
            }
255
        }
256
        return uuidInQuery;
257
    }
258

    
259

    
260
    /**
261
     * creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the
262
     * supplied <code>uuids</code>
263
     * the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
264
     * @param uuidFieldName
265
     * @param entities
266
     * @return
267
     */
268
    public Query newUuidQuery(String uuidFieldName, List<UUID> uuids){
269

    
270
        BooleanQuery uuidInQuery = new BooleanQuery();
271
        if(uuids != null && uuids.size() > 0 ){
272
            for(UUID uuid : uuids){
273
                uuidInQuery.add(newTermQuery(uuidFieldName, uuids.toString(), false), Occur.SHOULD);
274
            }
275
        }
276
        return uuidInQuery;
277
    }
278

    
279

    
280
    /**
281
     * Returns a Lucene Query which rely on double numeric range query
282
     * on Latitude / Longitude
283
     *
284
     *(+/- copied from {@link SpatialQueryBuilderFromPoint#buildSpatialQueryByRange(Point, double, String)})
285
     *
286
     * @param center center of the search discus
287
     * @param radius distance max to center in km
288
     * @param fieldName name of the Lucene Field implementing Coordinates
289
     * @return Lucene Query to be used in a search
290
     * @see Query
291
     * @see org.hibernate.search.spatial.Coordinates
292
     */
293
    public static Query buildSpatialQueryByRange(Rectangle boundingBox, String fieldName) {
294

    
295
        String latitudeFieldName = fieldName + "_HSSI_Latitude";
296
        String longitudeFieldName = fieldName + "_HSSI_Longitude";
297

    
298
        Query latQuery= NumericRangeQuery.newDoubleRange(
299
                latitudeFieldName, boundingBox.getLowerLeft().getLatitude(),
300
                boundingBox.getUpperRight().getLatitude(), true, true
301
        );
302

    
303
        Query longQuery= null;
304
        if ( boundingBox.getLowerLeft().getLongitude() <= boundingBox.getUpperRight().getLongitude() ) {
305
            longQuery = NumericRangeQuery.newDoubleRange( longitudeFieldName, boundingBox.getLowerLeft().getLongitude(),
306
                    boundingBox.getUpperRight().getLongitude(), true, true );
307
        }
308
        else {
309
            longQuery= new BooleanQuery();
310
            ( (BooleanQuery) longQuery).add( NumericRangeQuery.newDoubleRange( longitudeFieldName, boundingBox.getLowerLeft().getLongitude(),
311
                    180.0, true, true ), BooleanClause.Occur.SHOULD );
312
            ( (BooleanQuery) longQuery).add( NumericRangeQuery.newDoubleRange( longitudeFieldName, -180.0,
313
                    boundingBox.getUpperRight().getLongitude(), true, true ), BooleanClause.Occur.SHOULD );
314
        }
315

    
316
        BooleanQuery boxQuery = new BooleanQuery();
317
        boxQuery.add( latQuery, BooleanClause.Occur.MUST );
318
        boxQuery.add( longQuery, BooleanClause.Occur.MUST );
319

    
320
        return new FilteredQuery(
321
                new MatchAllDocsQuery(),
322
                new QueryWrapperFilter( boxQuery )
323
        );
324
    }
325

    
326
    /**
327
     *
328
     * @param fromField
329
     * @param toField
330
     * @param joinFromQuery
331
     * @param fromType
332
     * @return
333
     * @throws IOException
334
     */
335
    public Query newJoinQuery(String fromField, String toField, BooleanQuery joinFromQuery,
336
            Class<? extends CdmBase> fromType) throws IOException {
337
            return JoinUtil.createJoinQuery(fromField, toField, joinFromQuery, indexSearcherFor(fromType));
338
    }
339

    
340
    /**
341
     * @param clazz
342
     * @return
343
     */
344
    private IndexSearcher indexSearcherFor(Class<? extends CdmBase> clazz) {
345
        if(indexSearcherMap.get(clazz) == null){
346

    
347
            IndexReader indexReader = toolProvider.getIndexReaderFor(clazz);
348
            IndexSearcher searcher = new IndexSearcher(indexReader);
349
            searcher.setDefaultFieldSortScoring(true, true);
350
            indexSearcherMap.put(clazz, searcher);
351
        }
352
        IndexSearcher indexSearcher = indexSearcherMap.get(clazz);
353
        return indexSearcher;
354
    }
355

    
356
}
(11-11/14)