cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/QueryFactory.java

   1 // $Id$
   2 /**
   3 * Copyright (C) 2012 EDIT
   4 * European Distributed Institute of Taxonomy
   5 * http://www.e-taxonomy.eu
   6 *
   7 * The contents of this file are subject to the Mozilla Public License Version 1.1
   8 * See LICENSE.TXT at the top of this package for the full license terms.
   9 */
  10 package eu.etaxonomy.cdm.api.service.search;
  11
  12 import java.util.HashSet;
  13 import java.util.List;
  14 import java.util.Set;
  15
  16 import org.apache.log4j.Logger;
  17 import org.apache.lucene.index.Term;
  18 import org.apache.lucene.queryParser.ParseException;
  19 import org.apache.lucene.search.BooleanClause.Occur;
  20 import org.apache.lucene.search.BooleanQuery;
  21 import org.apache.lucene.search.Query;
  22 import org.apache.lucene.search.TermQuery;
  23 import org.apache.lucene.search.WildcardQuery;
  24
  25 import eu.etaxonomy.cdm.hibernate.search.DefinedTermBaseClassBridge;
  26 import eu.etaxonomy.cdm.hibernate.search.MultilanguageTextFieldBridge;
  27 import eu.etaxonomy.cdm.hibernate.search.NotNullAwareIdBridge;
  28 import eu.etaxonomy.cdm.model.common.CdmBase;
  29 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
  30 import eu.etaxonomy.cdm.model.common.Language;
  31
  32 /**
  33  * @author a.kohlbecker
  34  * @date Sep 14, 2012
  35  *
  36  */
  37 public class QueryFactory {
  38
  39     public static final Logger logger = Logger.getLogger(QueryFactory.class);
  40
  41     private final LuceneSearch luceneSearch;
  42
  43     Set<String> textFieldNames = new HashSet<String>();
  44
  45     private BooleanQuery finalQuery;
  46
  47     public Set<String> getTextFieldNames() {
  48         return textFieldNames;
  49     }
  50
  51     public String[] getTextFieldNamesAsArray() {
  52         return textFieldNames.toArray(new String[textFieldNames.size()]);
  53     }
  54
  55
  56     public QueryFactory(LuceneSearch luceneSearch){
  57         this.luceneSearch = luceneSearch;
  58     }
  59
  60     /**
  61      * Creates a new Term query. Depending on whether <code>isTextField</code> is set true or not the
  62      * supplied <code>queryString</code> will be parsed by using the according analyzer or not.
  63      * Setting <code>isTextField</code> to <code>false</code> is useful for searching for uuids etc.
  64      *
  65      * @param fieldName
  66      * @param queryString
  67      * @param isTextField whether this field is a field containing free text in contrast to e.g. ID fields.
  68      *     If <code>isTextField</code> to <code>true</code> the <code>queryString</code> will be parsed by
  69      *     using the according analyzer.
  70      * @return the resulting <code>TermQuery</code> or <code>null</code> in case of an <code>ParseException</code>
  71      *
  72      * TODO consider throwing the ParseException !!!!
  73      */
  74     public Query newTermQuery(String fieldName, String queryString, boolean isTextField) {
  75
  76         String luceneQueryString = fieldName + ":(" + queryString + ")";
  77         if (isTextField) {
  78             textFieldNames.add(fieldName);
  79             // in order to support the full query syntax we must use the parser
  80             // here
  81             try {
  82                 return luceneSearch.parse(luceneQueryString);
  83             } catch (ParseException e) {
  84                 logger.error(e);
  85             }
  86             return null;
  87         } else {
  88             return new TermQuery(new Term(fieldName, queryString));
  89         }
  90     }
  91
  92     /**
  93      * only to be used for text fields, see {@link #newTermQuery(String, String, boolean)}
  94      * @param fieldName
  95      * @param queryString
  96      * @return a {@link TermQuery} or a {@link WildcardQuery}
  97      */
  98     public Query newTermQuery(String fieldName, String queryString){
  99         return newTermQuery(fieldName, queryString, true);
 100     }
 101
 102     /**
 103      * DefinedTerms are stored in the Lucene index by the
 104      * {@link DefinedTermBaseClassBridge} in a consistent way. One field per
 105      * language and also in one additional field for all languages. This method
 106      * is a convenient means to retrieve a Lucene query string for such the
 107      * fields.
 108      *
 109      * @param name
 110      *            name of the term field as in the Lucene index. The field must
 111      *            have been written to Lucene document by
 112      *            {@link DefinedTermBaseClassBridge}
 113      *
 114      * @param languages
 115      *            the languages to search for exclusively. Can be
 116      *            <code>null</code> to search in all languages
 117      * @return
 118      */
 119     public Query newDefinedTermQuery(String name, String queryString, List<Language> languages) {
 120
 121         BooleanQuery localizedTermQuery = new BooleanQuery();
 122         localizedTermQuery.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
 123         if(languages == null || languages.size() == 0){
 124             localizedTermQuery.add(newTermQuery(name + ".representation.text.ALL", queryString), Occur.SHOULD);
 125             localizedTermQuery.add(newTermQuery(name + ".representation.label.ALL", queryString), Occur.SHOULD);
 126             localizedTermQuery.add(newTermQuery(name + ".representation.abbreviatedLabel.ALL", queryString), Occur.SHOULD);
 127
 128         } else {
 129             for(Language lang : languages){
 130                 localizedTermQuery.add(newTermQuery(name + ".representation.text." + lang.getUuid().toString(), queryString), Occur.SHOULD);
 131                 localizedTermQuery.add(newTermQuery(name + ".representation.label." + lang.getUuid().toString(), queryString), Occur.SHOULD);
 132                 localizedTermQuery.add(newTermQuery(name + ".representation.abbreviatedLabel." + lang.getUuid().toString(), queryString), Occur.SHOULD);
 133             }
 134         }
 135         return localizedTermQuery;
 136     }
 137
 138     /**
 139      * MultilanguageString maps are stored in the Lucene index by the
 140      * {@link MultilanguageTextFieldBridge } in a consistent way. One field per
 141      * language and also in one additional field for all languages. This method
 142      * is a convenient means to retrieve a Lucene query string for such the
 143      * fields.
 144      *
 145      * @param name
 146      *            name of the term field as in the Lucene index. The field must
 147      *            have been written to Lucene document by
 148      *            {@link DefinedTermBaseClassBridge}
 149      * @param languages
 150      *            the languages to search for exclusively. Can be
 151      *            <code>null</code> to search in all languages
 152      * @return
 153      */
 154     public Query newMultilanguageTextQuery(String name, String queryString, List<Language> languages) {
 155
 156         BooleanQuery localizedTermQuery = new BooleanQuery();
 157         localizedTermQuery.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
 158         if(languages == null || languages.size() == 0){
 159             localizedTermQuery.add(newTermQuery(name + ".ALL", queryString), Occur.SHOULD);
 160         } else {
 161             for(Language lang : languages){
 162                 localizedTermQuery.add(newTermQuery(name + "." + lang.getUuid().toString(), queryString), Occur.SHOULD);
 163             }
 164         }
 165         return localizedTermQuery;
 166     }
 167
 168
 169     /**
 170      * @param idFieldName
 171      * @param entitiy
 172      * @return
 173      */
 174     public Query newEntityIdQuery(String idFieldName, CdmBase entitiy){
 175         return newTermQuery("inDescription.taxon.taxonNodes.classification.id", String.valueOf(entitiy.getId()), false);
 176     }
 177
 178     /**
 179      * @param idFieldName
 180      * @return
 181      */
 182     public Query newIsNotNullQuery(String idFieldName){
 183         return new TermQuery(new Term(NotNullAwareIdBridge.notNullField(idFieldName), NotNullAwareIdBridge.NOT_NULL_VALUE));
 184   }
 185
 186     /**
 187      * creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the uuid
 188      * of the <code>entities</code>, the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
 189      * @param uuidFieldName
 190      * @param entities
 191      * @return
 192      */
 193     public Query newEntityUuidQuery(String uuidFieldName, List<? extends IdentifiableEntity> entities){
 194
 195         BooleanQuery uuidInQuery = new BooleanQuery();
 196         if(entities != null && entities.size() > 0 ){
 197             for(IdentifiableEntity entity : entities){
 198                 uuidInQuery.add(newTermQuery(uuidFieldName, entity.getUuid().toString(), false), Occur.SHOULD);
 199             }
 200         }
 201         return uuidInQuery;
 202     }
 203
 204     public void setFinalQuery(BooleanQuery finalQuery) {
 205         this.finalQuery = finalQuery;
 206     }
 207
 208     public BooleanQuery getFinalQuery(){
 209         return finalQuery;
 210     }
 211
 212     public LuceneSearch getLuceneSearch() {
 213         return luceneSearch;
 214     }
 215
 216 }