cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/QueryFactory.java

   1 // $Id$
   2 /**
   3 * Copyright (C) 2012 EDIT
   4 * European Distributed Institute of Taxonomy
   5 * http://www.e-taxonomy.eu
   6 *
   7 * The contents of this file are subject to the Mozilla Public License Version 1.1
   8 * See LICENSE.TXT at the top of this package for the full license terms.
   9 */
  10 package eu.etaxonomy.cdm.api.service.search;
  11
  12 import java.util.HashSet;
  13 import java.util.List;
  14 import java.util.Set;
  15
  16 import org.apache.log4j.Logger;
  17 import org.apache.lucene.index.Term;
  18 import org.apache.lucene.queryParser.ParseException;
  19 import org.apache.lucene.search.BooleanClause.Occur;
  20 import org.apache.lucene.search.BooleanQuery;
  21 import org.apache.lucene.search.Query;
  22 import org.apache.lucene.search.RangeQuery;
  23 import org.apache.lucene.search.TermQuery;
  24 import org.apache.lucene.search.WildcardQuery;
  25
  26 import eu.etaxonomy.cdm.hibernate.search.DefinedTermBaseClassBridge;
  27 import eu.etaxonomy.cdm.hibernate.search.MultilanguageTextFieldBridge;
  28 import eu.etaxonomy.cdm.hibernate.search.PaddedIntegerBridge;
  29 import eu.etaxonomy.cdm.model.common.CdmBase;
  30 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
  31 import eu.etaxonomy.cdm.model.common.Language;
  32
  33 /**
  34  * @author a.kohlbecker
  35  * @date Sep 14, 2012
  36  *
  37  */
  38 public class QueryFactory {
  39
  40     public static final Logger logger = Logger.getLogger(QueryFactory.class);
  41
  42     private LuceneSearch luceneSearch;
  43
  44     Set<String> textFieldNames = new HashSet<String>();
  45
  46     private BooleanQuery finalQuery;
  47
  48     public Set<String> getTextFieldNames() {
  49         return textFieldNames;
  50     }
  51
  52     public String[] getTextFieldNamesAsArray() {
  53         return textFieldNames.toArray(new String[textFieldNames.size()]);
  54     }
  55
  56
  57     public QueryFactory(LuceneSearch luceneSearch){
  58         this.luceneSearch = luceneSearch;
  59     }
  60
  61     /**
  62      * @param fieldName
  63      * @param queryString
  64      * @param isTextField whether this field is a field containing free text in contrast to e.g. ID fields.
  65      * @return
  66      */
  67     public Query newTermQuery(String fieldName, String queryString, boolean isTextField){
  68
  69         if(isTextField){
  70             textFieldNames.add(fieldName);
  71         }
  72
  73          // in order to support the full query syntax we must use the parser here
  74         String luceneQueryString = fieldName + ":(" + queryString + ")";
  75         try {
  76             return luceneSearch.parse(luceneQueryString);
  77         } catch (ParseException e) {
  78             logger.error(e);
  79         }
  80         return null;
  81     }
  82
  83     /**
  84      * only to be used for text fields, see {@link #newTermQuery(String, String, boolean)}
  85      * @param fieldName
  86      * @param queryString
  87      * @return a {@link TermQuery} or a {@link WildcardQuery}
  88      */
  89     public Query newTermQuery(String fieldName, String queryString){
  90         return newTermQuery(fieldName, queryString, true);
  91     }
  92
  93     /**
  94      * DefinedTerm representations and MultilanguageString maps are stored in the Lucene index by the {@link DefinedTermBaseClassBridge}
  95      * and {@link MultilanguageTextFieldBridge } in a consistent way. One field per language and also in one additional field for all languages.
  96      * This method is a convenient means to retrieve a Lucene query string for such the fields.
  97      *
  98      * @param name name of the term field as in the Lucene index. Must be field created by {@link DefinedTermBaseClassBridge}
  99      * or {@link MultilanguageTextFieldBridge }
 100      * @param languages the languages to search for exclusively. Can be <code>null</code> to search in all languages
 101      * @return
 102      */
 103     public Query newLocalizedTermQuery(String name, String queryString, List<Language> languages) {
 104
 105         BooleanQuery localizedTermQuery = new BooleanQuery();
 106         localizedTermQuery.add(newTermQuery(name + ".label", queryString), Occur.SHOULD);
 107         if(languages == null || languages.size() == 0){
 108             localizedTermQuery.add(newTermQuery(name + ".ALL", queryString), Occur.SHOULD);
 109         } else {
 110             for(Language lang : languages){
 111                 localizedTermQuery.add(newTermQuery(name + "." + lang.getUuid().toString(), queryString), Occur.SHOULD);
 112             }
 113         }
 114         return localizedTermQuery;
 115     }
 116
 117     /**
 118      * convenience method for localized searches on {@link DefinedTermBase}
 119      * instances, it adds the field name suffix "representations" to the
 120      * <code>name</code> parameter and calls
 121      * {@link #newLocalizedTermQuery(String, String, List)}
 122      *
 123      * @param name
 124      * @param queryString
 125      * @param languages
 126      * @return
 127      */
 128     public Query newDefinedTermBaseQuery(String name, String queryString, List<Language> languages) {
 129         return newLocalizedTermQuery(name + ".representations", queryString, languages);
 130     }
 131
 132     /**
 133      * @param idFieldName
 134      * @param entitiy
 135      * @return
 136      */
 137     public Query newEntityIdQuery(String idFieldName, CdmBase entitiy){
 138         return newTermQuery("inDescription.taxon.taxonNodes.classification.id", PaddedIntegerBridge.paddInteger(entitiy.getId()), false);
 139     }
 140
 141     /**
 142      *  TODO open range queries [0 TO *] not working in the current version of lucene (https://issues.apache.org/jira/browse/LUCENE-995)
 143      *  so we are using integer maximum as workaround
 144      * @param idFieldName
 145      * @param entitiy
 146      * @return
 147      */
 148     public Query newIdNotNullQuery(String idFieldName){
 149         return new RangeQuery(
 150                     new Term(idFieldName, PaddedIntegerBridge.paddInteger(0)),
 151                     new Term(idFieldName, PaddedIntegerBridge.paddInteger(Integer.MAX_VALUE)),
 152                     false
 153             );
 154     }
 155
 156     /**
 157      * creates a query for searching for documents in which the field specified by <code>uuidFieldName</code> matches at least one of the uuid
 158      * of the <code>entities</code>, the sql equivalent of this is <code>WHERE uuidFieldName IN (uuid_1, uuid_2, ...) </code>.
 159      * @param uuidFieldName
 160      * @param entities
 161      * @return
 162      */
 163     public Query newEntityUuidQuery(String uuidFieldName, List<? extends IdentifiableEntity> entities){
 164
 165         BooleanQuery uuidInQuery = new BooleanQuery();
 166         if(entities != null && entities.size() > 0 ){
 167             for(IdentifiableEntity entity : entities){
 168                 uuidInQuery.add(newTermQuery(uuidFieldName, entity.getUuid().toString(), false), Occur.SHOULD);
 169             }
 170         }
 171         return uuidInQuery;
 172     }
 173
 174     public void setFinalQuery(BooleanQuery finalQuery) {
 175         this.finalQuery = finalQuery;
 176     }
 177
 178     public BooleanQuery getFinalQuery(){
 179         return finalQuery;
 180     }
 181
 182     public LuceneSearch getLuceneSearch() {
 183         return luceneSearch;
 184     }
 185
 186 }