cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/SearchResultBuilder.java

   1 /**
   2 * Copyright (C) 2012 EDIT
   3 * European Distributed Institute of Taxonomy
   4 * http://www.e-taxonomy.eu
   5 *
   6 * The contents of this file are subject to the Mozilla Public License Version 1.1
   7 * See LICENSE.TXT at the top of this package for the full license terms.
   8 */
   9 package eu.etaxonomy.cdm.api.service.search;
  10
  11 import java.io.IOException;
  12 import java.util.ArrayList;
  13 import java.util.List;
  14 import java.util.Map;
  15
  16 import org.apache.commons.lang.ArrayUtils;
  17 import org.apache.commons.lang.StringUtils;
  18 import org.apache.log4j.Logger;
  19 import org.apache.lucene.document.Document;
  20 import org.apache.lucene.index.CorruptIndexException;
  21 import org.apache.lucene.search.MultiTermQuery;
  22 import org.apache.lucene.search.Query;
  23 import org.apache.lucene.search.ScoreDoc;
  24 import org.apache.lucene.search.TopDocs;
  25 import org.apache.lucene.search.WildcardQuery;
  26 import org.apache.lucene.search.grouping.GroupDocs;
  27 import org.apache.lucene.search.grouping.TopGroups;
  28 import org.apache.lucene.util.BytesRef;
  29 import org.hibernate.search.engine.ProjectionConstants;
  30
  31 import eu.etaxonomy.cdm.model.CdmBaseType;
  32 import eu.etaxonomy.cdm.model.common.CdmBase;
  33 import eu.etaxonomy.cdm.persistence.dao.common.ICdmEntityDao;
  34
  35 /**
  36  * @author Andreas Kohlbecker
  37  * @since Jan 6, 2012
  38  *
  39  */
  40 public class SearchResultBuilder implements ISearchResultBuilder {
  41
  42     public static final Logger logger = Logger.getLogger(SearchResultBuilder.class);
  43
  44     /* (non-Javadoc)
  45      * @see eu.etaxonomy.cdm.api.service.search.ISearchResultBuilder#createResultSetFromIds(eu.etaxonomy.cdm.search.LuceneSearch, org.apache.lucene.search.TopDocs, eu.etaxonomy.cdm.persistence.dao.common.ICdmEntityDao, java.lang.String)
  46      */
  47     private Query query;
  48     /**
  49      * fragmentNumber - max number of sentence fragments to return
  50      */
  51     private final int fragmentNumber = 5;
  52     /**
  53      * fragmentSize - the max number of characters for each fragment
  54      */
  55     private final int fragmentSize = 100;
  56     private final LuceneSearch luceneSearch;
  57
  58     /**
  59      * Use this constructor if you do not wish to retrieve highlighted terms found in the best sections of a text.
  60      * @param luceneSearch
  61      */
  62     public SearchResultBuilder(LuceneSearch luceneSearch){
  63         this.luceneSearch = luceneSearch;
  64     }
  65
  66     /**
  67      * @param luceneSearch
  68      * @param query the Query will be used to highlight matching fragments if the <code>highlightFields</code> property is supplied to
  69      * {@link #createResultSet(TopDocs, String[], ICdmEntityDao, String, List)}
  70      */
  71     public SearchResultBuilder(LuceneSearch luceneSearch, Query query){
  72         this.luceneSearch = luceneSearch;
  73         this.query = query;
  74     }
  75
  76     /**
  77      * {@inheritDoc}
  78      *
  79      * <h3>NOTE:</h3> All {@link MultiTermQuery} like {@link WildcardQuery} are
  80      * constant score by default since Lucene 2.9, you can change that back to
  81      * scoring mode: <code>WildcardQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)</code>
  82      * This slows down the query immense or throws TooManyClauses exceptions if
  83      * too many terms match the wildcard.
  84      */
  85     @Override
  86     public <T extends CdmBase> List<SearchResult<T>> createResultSet(TopGroups<BytesRef> topGroupsResultSet,
  87                 String[] highlightFields, ICdmEntityDao<T> dao, Map<CdmBaseType, String> idFields, List<String> propertyPaths) throws CorruptIndexException, IOException {
  88
  89         List<SearchResult<T>> searchResults = new ArrayList<SearchResult<T>>();
  90
  91         if(topGroupsResultSet == null){
  92             return searchResults;
  93         }
  94
  95         SearchResultHighligther highlighter = null;
  96         if(highlightFields  != null && highlightFields.length > 0){
  97             highlighter = new SearchResultHighligther();
  98         }
  99
 100         for (GroupDocs groupDoc : topGroupsResultSet.groups) {
 101
 102             String cdmEntityId = null;
 103             SearchResult<T> searchResult = new SearchResult<T>();
 104             for(ScoreDoc scoreDoc : groupDoc.scoreDocs) {
 105                 Document document = luceneSearch.getSearcher().doc(scoreDoc.doc);
 106                 searchResult.addDoc(document);
 107
 108                 if(cdmEntityId == null){
 109                     // IMPORTANT: here we assume that all documents refer to the same cdm entity
 110                     cdmEntityId = findId(idFields, document);
 111                 }
 112             }
 113
 114             // set score values
 115             if(isNumber(groupDoc.maxScore)){
 116                 searchResult.setScore(groupDoc.maxScore);
 117             }
 118
 119             if(isNumber(topGroupsResultSet.maxScore)){
 120                 searchResult.setMaxScore(topGroupsResultSet.maxScore);
 121             }
 122
 123             //TODO use findByUuid(List<UUID> uuids, List<Criterion> criteria, List<String> propertyPaths)
 124             //      instead or even better a similar findById(List<Integer> ids) however this is not yet implemented
 125             if(cdmEntityId != null){
 126                 T entity = dao.load(Integer.valueOf(cdmEntityId), propertyPaths);
 127                 searchResult.setEntity(entity);
 128             }
 129
 130             // add highlight fragments
 131             if(highlighter != null){
 132                 Map<String, String[]> fieldFragmentMap = null;
 133                 for(Document doc: searchResult.getDocs()){
 134                     fieldFragmentMap = merge(fieldFragmentMap, highlighter.getFragmentsWithHighlightedTerms(luceneSearch.getAnalyzer(), query, highlightFields, doc, fragmentNumber, fragmentSize));
 135                 }
 136                 searchResult.setFieldHighlightMap(fieldFragmentMap);
 137             }
 138
 139             // finally add the final result to the list
 140             searchResults.add(searchResult);
 141         }
 142
 143         return searchResults;
 144     }
 145
 146     /**
 147      * {@inheritDoc}
 148      *
 149      */
 150     @Override
 151     public <T extends CdmBase> List<SearchResult<T>> createResultSet(TopDocs topDocs,
 152                 String[] highlightFields, ICdmEntityDao<T> dao, Map<CdmBaseType, String> idFields, List<String> propertyPaths) throws CorruptIndexException, IOException {
 153
 154         List<SearchResult<T>> searchResults = new ArrayList<SearchResult<T>>();
 155
 156         if(topDocs == null){
 157             return searchResults;
 158         }
 159
 160         SearchResultHighligther highlighter = null;
 161         if(highlightFields  != null && highlightFields.length > 0){
 162             highlighter = new SearchResultHighligther();
 163         }
 164
 165         for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
 166
 167                 String cdmEntityId = null;
 168                 SearchResult<T> searchResult = new SearchResult<T>();
 169
 170                 Document document = luceneSearch.getSearcher().doc(scoreDoc.doc);
 171                 searchResult.addDoc(document);
 172
 173                 if(cdmEntityId == null){
 174                         cdmEntityId = findId(idFields, document);
 175                 }
 176
 177                 //TODO use findByUuid(List<UUID> uuids, List<Criterion> criteria, List<String> propertyPaths)
 178                 //      instead or even better a similar findById(List<Integer> ids) however this is not yet implemented
 179                 if(cdmEntityId != null){
 180                         T entity = dao.load(Integer.valueOf(cdmEntityId), propertyPaths);
 181                         searchResult.setEntity(entity);
 182                 }
 183                 searchResult.setScore(scoreDoc.score);
 184                 searchResult.setMaxScore(scoreDoc.score);
 185             // add highlight fragments
 186             if(highlighter != null){
 187                 Map<String, String[]> fieldFragmentMap = null;
 188                 for(Document doc: searchResult.getDocs()){
 189                     fieldFragmentMap = merge(fieldFragmentMap, highlighter.getFragmentsWithHighlightedTerms(luceneSearch.getAnalyzer(), query, highlightFields, doc, fragmentNumber, fragmentSize));
 190                 }
 191                 searchResult.setFieldHighlightMap(fieldFragmentMap);
 192             }
 193
 194             // finally add the final result to the list
 195             searchResults.add(searchResult);
 196         }
 197
 198         return searchResults;
 199     }
 200
 201
 202     /**
 203      * {@inheritDoc}
 204      *
 205      */
 206     @Override
 207     public  List<DocumentSearchResult> createResultSet(TopDocs topDocs, String[] highlightFields) throws CorruptIndexException, IOException {
 208
 209         List<DocumentSearchResult> searchResults = new ArrayList<DocumentSearchResult>();
 210
 211         if(topDocs == null){
 212             return searchResults;
 213         }
 214
 215         SearchResultHighligther highlighter = null;
 216         if(highlightFields  != null && highlightFields.length > 0){
 217             highlighter = new SearchResultHighligther();
 218         }
 219
 220         for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
 221
 222                 String cdmEntityId = null;
 223                 DocumentSearchResult searchResult = new DocumentSearchResult();
 224
 225                 Document document = luceneSearch.getSearcher().doc(scoreDoc.doc);
 226                 searchResult.addDoc(document);
 227
 228                 searchResult.setScore(scoreDoc.score);
 229                 searchResult.setMaxScore(scoreDoc.score);
 230             // add highlight fragments
 231             if(highlighter != null){
 232                 Map<String, String[]> fieldFragmentMap = null;
 233                 for(Document doc: searchResult.getDocs()){
 234                     fieldFragmentMap = merge(fieldFragmentMap, highlighter.getFragmentsWithHighlightedTerms(luceneSearch.getAnalyzer(), query, highlightFields, doc, fragmentNumber, fragmentSize));
 235                 }
 236                 searchResult.setFieldHighlightMap(fieldFragmentMap);
 237             }
 238
 239             // finally add the final result to the list
 240             searchResults.add(searchResult);
 241         }
 242
 243         return searchResults;
 244     }
 245     /**
 246      * @param base
 247      * @param add
 248      * @return
 249      */
 250     private Map<String, String[]> merge(Map<String, String[]> base, Map<String, String[]> add) {
 251         if(base == null){
 252             return add;
 253         } else {
 254             for(String key : add.keySet()) {
 255                 if (base.containsKey(key)){
 256                     base.put(key, (String[]) ArrayUtils.addAll(base.get(key), add.get(key)));
 257                 } else {
 258                     base.put(key, add.get(key));
 259                 }
 260             }
 261             return base;
 262         }
 263     }
 264
 265     /**
 266      * find the entity id
 267      *
 268      * @param idFields
 269      * @param doc
 270      * @return
 271      */
 272     private String findId(Map<CdmBaseType,String> idFieldMap, Document doc) {
 273
 274         String docClassName = doc.getValues(ProjectionConstants.OBJECT_CLASS)[0];
 275
 276         String id = null;
 277         for(CdmBaseType baseType  : idFieldMap.keySet()){
 278             if(baseType.getSubClassNames().contains(docClassName)){
 279                 String[] idStrings = doc.getValues(idFieldMap.get(baseType));
 280                 if(idStrings.length > 0 && StringUtils.isNotBlank(idStrings[0])){
 281                     id = idStrings[0];
 282                     break;
 283                 }
 284             }
 285         }
 286         if(id == null){
 287             throw new RuntimeException("No id field name given for " + docClassName);
 288         }
 289         return id;
 290     }
 291
 292     /**
 293      * @param number
 294      * @return
 295      */
 296     private boolean isNumber(Float number) {
 297         return !Double.isNaN(number) && !Double.isInfinite(number);
 298     }
 299
 300 }