Fix all compile errors from services to remote (some only preliminary)
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / search / SearchResultHighligther.java
1 // $Id$
2 /**
3 * Copyright (C) 2012 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.api.service.search;
11
12
13 import java.io.IOException;
14 import java.util.HashMap;
15 import java.util.Map;
16
17 import org.apache.commons.lang.StringUtils;
18 import org.apache.log4j.Logger;
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.TokenStream;
21 import org.apache.lucene.document.Document;
22 import org.apache.lucene.index.CorruptIndexException;
23 import org.apache.lucene.search.Query;
24 import org.apache.lucene.search.highlight.Fragmenter;
25 import org.apache.lucene.search.highlight.Highlighter;
26 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
27 import org.apache.lucene.search.highlight.QueryScorer;
28 import org.apache.lucene.search.highlight.Scorer;
29 import org.apache.lucene.search.highlight.SimpleFragmenter;
30 import org.apache.lucene.search.highlight.TokenSources;
31
32 /**
33 *
34 *
35 * @author Andreas Kohlbecker
36 *
37 */
38 public class SearchResultHighligther {
39
40 public static final Logger logger = Logger.getLogger(SearchResultHighligther.class);
41
42 public Map<String,String[]> getFragmentsWithHighlightedTerms(Analyzer analyzer, Query query, String[] fieldNames, Document doc, int fragmentNumber, int fragmentSize){
43
44 Map<String,String[]> fieldHighlightMap = new HashMap<String, String[]>();
45 String[] values;
46 String fieldContents;
47 String[] fragments;
48
49 try {
50 for(String fieldName : fieldNames){
51 values = doc.getValues(fieldName);
52 if(values.length == 0){
53 continue;
54 }
55 fieldContents = StringUtils.join(values, ' ');
56 fragments = getFragmentsWithHighlightedTerms(analyzer, query, fieldName, fieldContents, fragmentNumber, fragmentSize);
57 fieldHighlightMap.put(fieldName, fragments);
58 }
59 } catch (CorruptIndexException e) {
60 logger.error("Error on retrieving highlighted fragments", e);
61 e.printStackTrace();
62 } catch (IOException e) {
63 logger.error("Error on retrieving highlighted fragments", e);
64 }
65
66 return fieldHighlightMap;
67 }
68
69 /**
70 * Generates contextual fragments. Assumes term vectors not stored in the index.
71 * @param analyzer - analyzer used for both indexing and searching
72 * @param query - query object created from user's input
73 * @param fieldName - name of the field in the lucene doc containing the text to be fragmented
74 * @param fieldContents - contents of fieldName
75 * @param fragmentNumber - max number of sentence fragments to return
76 * @param fragmentSize - the max number of characters for each fragment
77 * @return
78 * @throws IOException
79 */
80 public String[] getFragmentsWithHighlightedTerms(Analyzer analyzer, Query query,
81 String fieldName, String fieldContents, int fragmentNumber, int fragmentSize) throws IOException {
82
83 TokenStream stream = TokenSources.getTokenStream(fieldName, fieldContents, analyzer);
84 String[] fragments = getFragmentsWithHighlightedTerms(stream, query, fieldName, fieldContents, fragmentNumber, fragmentSize);
85
86 return fragments;
87 }
88
89
90 // /**
91 // * Generates contextual fragments.
92 // * @param termPosVector - Term Position Vector for fieldName
93 // * @param query - query object created from user's input
94 // * @param fieldName - name of the field containing the text to be fragmented
95 // * @param fieldContents - contents of fieldName
96 // * @param fragmentNumber - max number of sentence fragments to return
97 // * @param fragmentSize - the max number of characters for each fragment
98 // * @return
99 // * @return
100 // * @throws IOException
101 // */
102 // public String[] getFragmentsWithHighlightedTerms(TermPositionVector termPosVector, Query query,
103 // String fieldName, String fieldContents, int fragmentNumber, int fragmentSize) throws IOException {
104 //
105 // TokenStream stream = TokenSources.getTokenStream(termPosVector);
106 // String[] fragments = getFragmentsWithHighlightedTerms(stream, query, fieldName, fieldContents, fragmentNumber, fragmentSize);
107 //
108 // return fragments;
109 // }
110
111 /**
112 * @param stream
113 * @param query - query object created from user's input
114 * @param fieldName - name of the field containing the text to be fragmented
115 * @param fieldContents - contents of fieldName
116 * @param fragmentNumber - max number of sentence fragments to return
117 * @param fragmentSize - the max number of characters for each fragment
118 * @return
119 * @throws IOException
120 */
121 private String[] getFragmentsWithHighlightedTerms(TokenStream stream, Query query, String fieldName, String fieldContents, int fragmentNumber,
122 int fragmentSize) throws IOException {
123
124
125 Scorer scorer = new QueryScorer(query, fieldName);
126 Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
127 Highlighter highlighter = new Highlighter(scorer);
128
129 highlighter.setTextFragmenter(fragmenter);
130 highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
131
132 String[] fragments = null;
133 try {
134 fragments = highlighter.getBestFragments(stream, fieldContents, fragmentNumber);
135 } catch (InvalidTokenOffsetsException e) {
136 //should never happen
137 logger.error("InvalidTokenOffsetsException", e);
138 }
139 return fragments;
140 }
141
142 }
143