a bit more cleaning up
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / search / QueryFactory.java
1 // $Id$
2 /**
3 * Copyright (C) 2012 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.api.service.search;
11
12 import java.util.HashSet;
13 import java.util.List;
14 import java.util.Set;
15
16 import org.apache.log4j.Logger;
17 import org.apache.lucene.index.Term;
18 import org.apache.lucene.queryParser.ParseException;
19 import org.apache.lucene.search.BooleanClause.Occur;
20 import org.apache.lucene.search.BooleanQuery;
21 import org.apache.lucene.search.Query;
22 import org.apache.lucene.search.RangeQuery;
23 import org.apache.lucene.search.TermQuery;
24 import org.apache.lucene.search.WildcardQuery;
25
26 import eu.etaxonomy.cdm.hibernate.search.DefinedTermBaseClassBridge;
27 import eu.etaxonomy.cdm.hibernate.search.MultilanguageTextFieldBridge;
28 import eu.etaxonomy.cdm.hibernate.search.PaddedIntegerBridge;
29 import eu.etaxonomy.cdm.model.common.CdmBase;
30 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
31 import eu.etaxonomy.cdm.model.common.Language;
32
33 /**
34 * @author a.kohlbecker
35 * @date Sep 14, 2012
36 *
37 */
38 public class QueryFactory {
39
40 public static final Logger logger = Logger.getLogger(QueryFactory.class);
41
42 private LuceneSearch luceneSearch;
43
44 Set<String> textFieldNames = new HashSet<String>();
45
46 public Set<String> getTextFieldNames() {
47 return textFieldNames;
48 }
49
50
51 public QueryFactory(LuceneSearch luceneSearch){
52 this.luceneSearch = luceneSearch;
53 }
54
55 /**
56 * @param fieldName
57 * @param queryString
58 * @param isTextField whether this field is a field containing free text in contrast to e.g. ID fields.
59 * @return
60 */
61 public Query newTermQuery(String fieldName, String queryString, boolean isTextField){
62
63 if(isTextField){
64 textFieldNames.add(fieldName);
65 }
66
67 // in order to support the full query syntax we must use the parser here
68 String luceneQueryString = fieldName + ":(" + queryString + ")";
69 try {
70 return luceneSearch.parse(luceneQueryString);
71 } catch (ParseException e) {
72 logger.error(e);
73 }
74 return null;
75 }
76
77 /**
78 * only to be used for text fields, see {@link #newTermQuery(String, String, boolean)}
79 * @param fieldName
80 * @param queryString
81 * @return a {@link TermQuery} or a {@link WildcardQuery}
82 */
83 public Query newTermQuery(String fieldName, String queryString){
84 return newTermQuery(fieldName, queryString, true);
85 }
86
87 /**
88 * DefinedTerm representations and MultilanguageString maps are stored in the Lucene index by the {@link DefinedTermBaseClassBridge}
89 * and {@link MultilanguageTextFieldBridge } in a consistent way. One field per language and also in one additional field for all languages.
90 * This method is a convenient means to retrieve a Lucene query string for such the fields.
91 *
92 * @param name name of the term field as in the Lucene index. Must be field created by {@link DefinedTermBaseClassBridge}
93 * or {@link MultilanguageTextFieldBridge }
94 * @param languages the languages to search for exclusively. Can be <code>null</code> to search in all languages
95 * @return
96 */
97 public Query newLocalizedTermQuery(String name, String queryString, List<Language> languages) {
98
99 if(languages == null || languages.size() == 0){
100 return newTermQuery(name + ".ALL", queryString);
101 } else {
102 BooleanQuery localizedTermQuery = new BooleanQuery();
103 for(Language lang : languages){
104 localizedTermQuery.add(newTermQuery(name + "." + lang.getUuid().toString(), queryString), Occur.SHOULD);
105 }
106 return localizedTermQuery;
107 }
108 }
109
110 /**
111 * @param idFieldName
112 * @param entitiy
113 * @return
114 */
115 public Query newEntityIdQuery(String idFieldName, CdmBase entitiy){
116 return newTermQuery("inDescription.taxon.taxonNodes.classification.id", PaddedIntegerBridge.paddInteger(entitiy.getId()), false);
117 }
118
119 /**
120 * TODO open range queries [0 TO *] not working in the current version of lucene (https://issues.apache.org/jira/browse/LUCENE-995)
121 * so we are using integer maximum as workaround
122 * @param idFieldName
123 * @param entitiy
124 * @return
125 */
126 public Query newIdNotNullQuery(String idFieldName){
127 return new RangeQuery(
128 new Term(idFieldName, PaddedIntegerBridge.paddInteger(0)),
129 new Term(idFieldName, PaddedIntegerBridge.paddInteger(Integer.MAX_VALUE)),
130 false
131 );
132 }
133
134 /**
135 * creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the uuid
136 * of the <code>entities</code>, the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
137 * @param uuidFieldName
138 * @param entities
139 * @return
140 */
141 public Query newEntityUuidQuery(String uuidFieldName, List<? extends IdentifiableEntity> entities){
142
143 BooleanQuery uuidInQuery = new BooleanQuery();
144 if(entities != null && entities.size() > 0 ){
145 for(IdentifiableEntity entity : entities){
146 uuidInQuery.add(newTermQuery(uuidFieldName, entity.getUuid().toString(), false), Occur.SHOULD);
147 }
148 }
149 return uuidInQuery;
150 }
151
152 }