merge hibernate4 migration branch into trunk
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / search / QueryFactory.java
1 // $Id$
2 /**
3 * Copyright (C) 2012 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.api.service.search;
11
12 import java.util.HashSet;
13 import java.util.List;
14 import java.util.Set;
15
16 import org.apache.log4j.Logger;
17 import org.apache.lucene.index.Term;
18 import org.apache.lucene.queryParser.ParseException;
19 import org.apache.lucene.search.BooleanClause.Occur;
20 import org.apache.lucene.search.BooleanQuery;
21 import org.apache.lucene.search.Query;
22 import org.apache.lucene.search.TermQuery;
23 import org.apache.lucene.search.WildcardQuery;
24
25 import eu.etaxonomy.cdm.hibernate.search.DefinedTermBaseClassBridge;
26 import eu.etaxonomy.cdm.hibernate.search.MultilanguageTextFieldBridge;
27 import eu.etaxonomy.cdm.hibernate.search.NotNullAwareIdBridge;
28 import eu.etaxonomy.cdm.model.common.CdmBase;
29 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
30 import eu.etaxonomy.cdm.model.common.Language;
31
32 /**
33 * @author a.kohlbecker
34 * @date Sep 14, 2012
35 *
36 */
37 public class QueryFactory {
38
39 public static final Logger logger = Logger.getLogger(QueryFactory.class);
40
41 private final LuceneSearch luceneSearch;
42
43 Set<String> textFieldNames = new HashSet<String>();
44
45 private BooleanQuery finalQuery;
46
47 public Set<String> getTextFieldNames() {
48 return textFieldNames;
49 }
50
51 public String[] getTextFieldNamesAsArray() {
52 return textFieldNames.toArray(new String[textFieldNames.size()]);
53 }
54
55
56 public QueryFactory(LuceneSearch luceneSearch){
57 this.luceneSearch = luceneSearch;
58 }
59
60 /**
61 * Creates a new Term query. Depending on whether <code>isTextField</code> is set true or not the
62 * supplied <code>queryString</code> will be parsed by using the according analyzer or not.
63 * Setting <code>isTextField</code> to <code>false</code> is useful for searching for uuids etc.
64 *
65 * @param fieldName
66 * @param queryString
67 * @param isTextField whether this field is a field containing free text in contrast to e.g. ID fields.
68 * If <code>isTextField</code> to <code>true</code> the <code>queryString</code> will be parsed by
69 * using the according analyzer.
70 * @return the resulting <code>TermQuery</code> or <code>null</code> in case of an <code>ParseException</code>
71 *
72 * TODO consider throwing the ParseException !!!!
73 */
74 public Query newTermQuery(String fieldName, String queryString, boolean isTextField) {
75
76 String luceneQueryString = fieldName + ":(" + queryString + ")";
77 if (isTextField) {
78 textFieldNames.add(fieldName);
79 // in order to support the full query syntax we must use the parser
80 // here
81 try {
82 return luceneSearch.parse(luceneQueryString);
83 } catch (ParseException e) {
84 logger.error(e);
85 }
86 return null;
87 } else {
88 return new TermQuery(new Term(fieldName, queryString));
89 }
90 }
91
92 /**
93 * only to be used for text fields, see {@link #newTermQuery(String, String, boolean)}
94 * @param fieldName
95 * @param queryString
96 * @return a {@link TermQuery} or a {@link WildcardQuery}
97 */
98 public Query newTermQuery(String fieldName, String queryString){
99 return newTermQuery(fieldName, queryString, true);
100 }
101
102 /**
103 * DefinedTerms are stored in the Lucene index by the
104 * {@link DefinedTermBaseClassBridge} in a consistent way. One field per
105 * language and also in one additional field for all languages. This method
106 * is a convenient means to retrieve a Lucene query string for such the
107 * fields.
108 *
109 * @param name
110 * name of the term field as in the Lucene index. The field must
111 * have been written to Lucene document by
112 * {@link DefinedTermBaseClassBridge}
113 *
114 * @param languages
115 * the languages to search for exclusively. Can be
116 * <code>null</code> to search in all languages
117 * @return
118 */
119 public Query newDefinedTermQuery(String name, String queryString, List<Language> languages) {
120
121 BooleanQuery localizedTermQuery = new BooleanQuery();
122 localizedTermQuery.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
123 if(languages == null || languages.size() == 0){
124 localizedTermQuery.add(newTermQuery(name + ".representation.text.ALL", queryString), Occur.SHOULD);
125 localizedTermQuery.add(newTermQuery(name + ".representation.label.ALL", queryString), Occur.SHOULD);
126 localizedTermQuery.add(newTermQuery(name + ".representation.abbreviatedLabel.ALL", queryString), Occur.SHOULD);
127
128 } else {
129 for(Language lang : languages){
130 localizedTermQuery.add(newTermQuery(name + ".representation.text." + lang.getUuid().toString(), queryString), Occur.SHOULD);
131 localizedTermQuery.add(newTermQuery(name + ".representation.label." + lang.getUuid().toString(), queryString), Occur.SHOULD);
132 localizedTermQuery.add(newTermQuery(name + ".representation.abbreviatedLabel." + lang.getUuid().toString(), queryString), Occur.SHOULD);
133 }
134 }
135 return localizedTermQuery;
136 }
137
138 /**
139 * MultilanguageString maps are stored in the Lucene index by the
140 * {@link MultilanguageTextFieldBridge } in a consistent way. One field per
141 * language and also in one additional field for all languages. This method
142 * is a convenient means to retrieve a Lucene query string for such the
143 * fields.
144 *
145 * @param name
146 * name of the term field as in the Lucene index. The field must
147 * have been written to Lucene document by
148 * {@link DefinedTermBaseClassBridge}
149 * @param languages
150 * the languages to search for exclusively. Can be
151 * <code>null</code> to search in all languages
152 * @return
153 */
154 public Query newMultilanguageTextQuery(String name, String queryString, List<Language> languages) {
155
156 BooleanQuery localizedTermQuery = new BooleanQuery();
157 localizedTermQuery.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
158 if(languages == null || languages.size() == 0){
159 localizedTermQuery.add(newTermQuery(name + ".ALL", queryString), Occur.SHOULD);
160 } else {
161 for(Language lang : languages){
162 localizedTermQuery.add(newTermQuery(name + "." + lang.getUuid().toString(), queryString), Occur.SHOULD);
163 }
164 }
165 return localizedTermQuery;
166 }
167
168
169 /**
170 * @param idFieldName
171 * @param entitiy
172 * @return
173 */
174 public Query newEntityIdQuery(String idFieldName, CdmBase entitiy){
175 return newTermQuery("inDescription.taxon.taxonNodes.classification.id", String.valueOf(entitiy.getId()), false);
176 }
177
178 /**
179 * @param idFieldName
180 * @return
181 */
182 public Query newIsNotNullQuery(String idFieldName){
183 return new TermQuery(new Term(NotNullAwareIdBridge.notNullField(idFieldName), NotNullAwareIdBridge.NOT_NULL_VALUE));
184 }
185
186 /**
187 * creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the uuid
188 * of the <code>entities</code>, the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
189 * @param uuidFieldName
190 * @param entities
191 * @return
192 */
193 public Query newEntityUuidQuery(String uuidFieldName, List<? extends IdentifiableEntity> entities){
194
195 BooleanQuery uuidInQuery = new BooleanQuery();
196 if(entities != null && entities.size() > 0 ){
197 for(IdentifiableEntity entity : entities){
198 uuidInQuery.add(newTermQuery(uuidFieldName, entity.getUuid().toString(), false), Occur.SHOULD);
199 }
200 }
201 return uuidInQuery;
202 }
203
204 public void setFinalQuery(BooleanQuery finalQuery) {
205 this.finalQuery = finalQuery;
206 }
207
208 public BooleanQuery getFinalQuery(){
209 return finalQuery;
210 }
211
212 public LuceneSearch getLuceneSearch() {
213 return luceneSearch;
214 }
215
216 }