merge trunk into cdm3.3 branch
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / search / SearchResultHighligther.java
index 073d0bc7dbd40dcb6ed396798e32e682fe012681..c29bc91a4b51d30813430f8f43dcb0a708f18933 100644 (file)
@@ -17,32 +17,20 @@ import java.util.Map;
 import org.apache.commons.lang.StringUtils;
 import org.apache.log4j.Logger;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CachingTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.highlight.Fragmenter;
 import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.Scorer;
 import org.apache.lucene.search.highlight.SimpleFragmenter;
-import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
-import org.apache.lucene.search.highlight.SpanScorer;
 import org.apache.lucene.search.highlight.TokenSources;
 
 /**
- * This SearchResultHighligther is using the QueryScorer by default even if the SpanScorer is meant to be the new default scorer in Lucene,
- * see https://issues.apache.org/jira/browse/LUCENE-1685 and https://issues.apache.org/jira/browse/LUCENE-2013.
- * The SpanScorer was causing problems with phrase queries (see https://dev.e-taxonomy.eu/trac/ticket/2961)
- * whereas the QueryScorer was returning good results.
- * <p>
- * This SearchResultHighligther can be switched to use the SpanScorer: {@link #setUseSpanScorer(boolean)}
- * <p>
- * Based on work of Nicholas Hrycan
- * see http://code.google.com/p/hrycan-blog/source/browse/trunk/lucene-highlight/src/com/hrycan/search/HighlighterUtil.java
  *
  *
  * @author Andreas Kohlbecker
@@ -52,16 +40,6 @@ public class SearchResultHighligther {
 
     public static final Logger logger = Logger.getLogger(SearchResultHighligther.class);
 
-    private boolean useSpanScorer = true;
-
-    public boolean isUseSpanScorer() {
-        return useSpanScorer;
-    }
-
-    public void setUseSpanScorer(boolean useSpanScorer) {
-        this.useSpanScorer = useSpanScorer;
-    }
-
     public Map<String,String[]> getFragmentsWithHighlightedTerms(Analyzer analyzer, Query query, String[] fieldNames,  Document doc,  int fragmentNumber, int fragmentSize){
 
         Map<String,String[]> fieldHighlightMap = new HashMap<String, String[]>();
@@ -144,21 +122,21 @@ public class SearchResultHighligther {
     private String[] getFragmentsWithHighlightedTerms(TokenStream stream, Query query, String fieldName, String fieldContents, int fragmentNumber,
             int fragmentSize) throws IOException {
 
-        Fragmenter fragmenter;
-        Scorer scorer;
-        if(useSpanScorer){
-            scorer = new QueryScorer(query, fieldName);
-            fragmenter = new SimpleFragmenter(fragmentSize);
-        } else {
-            scorer = new SpanScorer(query, fieldName, new CachingTokenFilter(stream));
-            fragmenter = new SimpleSpanFragmenter((SpanScorer)scorer, fragmentSize);
-        }
 
+        Scorer scorer = new QueryScorer(query, fieldName);
+        Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
         Highlighter highlighter = new Highlighter(scorer);
+
         highlighter.setTextFragmenter(fragmenter);
         highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
 
-        String[] fragments = highlighter.getBestFragments(stream, fieldContents, fragmentNumber);
+        String[] fragments = null;
+        try {
+            fragments = highlighter.getBestFragments(stream, fieldContents, fragmentNumber);
+        } catch (InvalidTokenOffsetsException e) {
+            //should never happen
+            logger.error("InvalidTokenOffsetsException", e);
+        }
         return fragments;
     }