cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/SearchResultBuilder.java

   1 // $Id$
   2 /**
   3 * Copyright (C) 2012 EDIT
   4 * European Distributed Institute of Taxonomy
   5 * http://www.e-taxonomy.eu
   6 *
   7 * The contents of this file are subject to the Mozilla Public License Version 1.1
   8 * See LICENSE.TXT at the top of this package for the full license terms.
   9 */
  10 package eu.etaxonomy.cdm.api.service.search;
  11
  12 import java.io.IOException;
  13 import java.util.ArrayList;
  14 import java.util.Arrays;
  15 import java.util.HashMap;
  16 import java.util.List;
  17 import java.util.Map;
  18
  19 import org.apache.commons.lang.ArrayUtils;
  20 import org.apache.commons.lang.StringUtils;
  21 import org.apache.log4j.Logger;
  22 import org.apache.lucene.document.Document;
  23 import org.apache.lucene.index.CorruptIndexException;
  24 import org.apache.lucene.search.MultiTermQuery;
  25 import org.apache.lucene.search.Query;
  26 import org.apache.lucene.search.ScoreDoc;
  27 import org.apache.lucene.search.TopDocs;
  28 import org.apache.lucene.search.WildcardQuery;
  29 import org.apache.lucene.search.grouping.GroupDocs;
  30 import org.apache.lucene.search.grouping.TopGroups;
  31 import org.hibernate.search.engine.DocumentBuilder;
  32
  33 import eu.etaxonomy.cdm.model.CdmBaseType;
  34 import eu.etaxonomy.cdm.model.common.CdmBase;
  35 import eu.etaxonomy.cdm.persistence.dao.common.ICdmEntityDao;
  36
  37 /**
  38  * @author Andreas Kohlbecker
  39  * @date Jan 6, 2012
  40  *
  41  */
  42 public class SearchResultBuilder implements ISearchResultBuilder {
  43
  44     public static final Logger logger = Logger.getLogger(SearchResultBuilder.class);
  45
  46     /* (non-Javadoc)
  47      * @see eu.etaxonomy.cdm.api.service.search.ISearchResultBuilder#createResultSetFromIds(eu.etaxonomy.cdm.search.LuceneSearch, org.apache.lucene.search.TopDocs, eu.etaxonomy.cdm.persistence.dao.common.ICdmEntityDao, java.lang.String)
  48      */
  49     private Query query;
  50     /**
  51      * fragmentNumber - max number of sentence fragments to return
  52      */
  53     private int fragmentNumber = 5;
  54     /**
  55      * fragmentSize - the max number of characters for each fragment
  56      */
  57     private int fragmentSize = 100;
  58     private LuceneSearch luceneSearch;
  59
  60     /**
  61      * Use this constructor if you do not wish to retrieve highlighted terms found in the best sections of a text.
  62      * @param luceneSearch
  63      */
  64     public SearchResultBuilder(LuceneSearch luceneSearch){
  65         this.luceneSearch = luceneSearch;
  66     }
  67
  68     /**
  69      * @param luceneSearch
  70      * @param query the Query will be used to highlight matching fragments if the <code>highlightFields</code> property is supplied to
  71      * {@link #createResultSet(TopDocs, String[], ICdmEntityDao, String, List)}
  72      */
  73     public SearchResultBuilder(LuceneSearch luceneSearch, Query query){
  74         this.luceneSearch = luceneSearch;
  75         this.query = query;
  76     }
  77
  78     /**
  79      * {@inheritDoc}
  80      *
  81      * <h3>NOTE:</h3> All {@link MultiTermQuery} like {@link WildcardQuery} are
  82      * constant score by default since Lucene 2.9, you can change that back to
  83      * scoring mode: <code>WildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)</code>
  84      * This slows down the query immense or throws TooManyClauses exceptions if
  85      * too many terms match the wildcard.
  86      */
  87     public <T extends CdmBase> List<SearchResult<T>> createResultSet(TopGroups topGroupsResultSet,
  88                 String[] highlightFields, ICdmEntityDao<T> dao, Map<CdmBaseType, String> idFields, List<String> propertyPaths) throws CorruptIndexException, IOException {
  89
  90         List<SearchResult<T>> searchResults = new ArrayList<SearchResult<T>>();
  91
  92         if(topGroupsResultSet == null){
  93             return searchResults;
  94         }
  95
  96         SearchResultHighligther highlighter = null;
  97         if(highlightFields  != null && highlightFields.length > 0){
  98             highlighter = new SearchResultHighligther();
  99         }
 100
 101         for (GroupDocs groupDoc : topGroupsResultSet.groups) {
 102
 103             String cdmEntityId = null;
 104             SearchResult<T> searchResult = new SearchResult<T>();
 105             for(ScoreDoc scoreDoc : groupDoc.scoreDocs) {
 106                 //FIXME should we group on taxon id ?????
 107                 Document document = luceneSearch.getSearcher().doc(scoreDoc.doc);
 108                 searchResult.addDoc(document);
 109
 110                 if(cdmEntityId == null){
 111                     // IMPORTANT: here we assume that all documents refer to the same cdm entity
 112                     cdmEntityId = findId(idFields, document);
 113                 }
 114             }
 115
 116             // set score values
 117             if(isNumber(groupDoc.maxScore)){
 118                 searchResult.setScore(groupDoc.maxScore);
 119             }
 120             //FIXME get max score
 121 //            if(isNumber(topGroupsResultSet.getMaxScore())){
 122 //                searchResult.setMaxScore(topGroupsResultSet.getMaxScore());
 123 //            }
 124
 125             //TODO use findByUuid(List<UUID> uuids, List<Criterion> criteria, List<String> propertyPaths)
 126             //      instead or even better a similar findById(List<Integer> ids) however this is not yet implemented
 127             if(cdmEntityId != null){
 128                 T entity = dao.load(Integer.valueOf(cdmEntityId), propertyPaths);
 129                 searchResult.setEntity(entity);
 130             }
 131
 132             // add highlight fragments
 133             if(highlighter != null){
 134                 Map<String, String[]> fieldFragmentMap = null;
 135                 for(Document doc: searchResult.getDocs()){
 136                     fieldFragmentMap = merge(fieldFragmentMap, highlighter.getFragmentsWithHighlightedTerms(luceneSearch.getAnalyzer(), query, highlightFields, doc, fragmentNumber, fragmentSize));
 137                 }
 138                 searchResult.setFieldHighlightMap(fieldFragmentMap);
 139             }
 140
 141             // finally add the final result to the list
 142             searchResults.add(searchResult);
 143         }
 144
 145         return searchResults;
 146     }
 147
 148     /**
 149      * @param base
 150      * @param add
 151      * @return
 152      */
 153     private Map<String, String[]> merge(Map<String, String[]> base, Map<String, String[]> add) {
 154         if(base == null){
 155             return add;
 156         } else {
 157             for(String key : add.keySet()) {
 158                 if (base.containsKey(key)){
 159                     base.put(key, (String[]) ArrayUtils.addAll(base.get(key), add.get(key)));
 160                 } else {
 161                     base.put(key, add.get(key));
 162                 }
 163             }
 164             return base;
 165         }
 166     }
 167
 168     /**
 169      * find the entity id
 170      *
 171      * @param idFields
 172      * @param doc
 173      * @return
 174      */
 175     private String findId(Map<CdmBaseType,String> idFieldMap, Document doc) {
 176
 177         String docClassName = doc.getValues(DocumentBuilder.CLASS_FIELDNAME)[0];
 178
 179         String id = null;
 180         for(CdmBaseType baseType  : idFieldMap.keySet()){
 181             if(baseType.getSubClassNames().contains(docClassName)){
 182                 String[] idStrings = doc.getValues(idFieldMap.get(baseType));
 183                 if(idStrings.length > 0 && StringUtils.isNotBlank(idStrings[0])){
 184                     id = idStrings[0];
 185                     break;
 186                 }
 187             }
 188         }
 189         if(id == null){
 190             throw new RuntimeException("No id field name given for " + docClassName);
 191         }
 192         return id;
 193     }
 194
 195     /**
 196      * @param number
 197      * @return
 198      */
 199     private boolean isNumber(Float number) {
 200         return !Double.isNaN(number) && !Double.isInfinite(number);
 201     }
 202
 203 }