Merge branch 'release/4.0.0'
[cdmlib.git] / cdmlib-persistence / src / main / java / eu / etaxonomy / cdm / persistence / dao / hibernate / AlternativeSpellingSuggestionParser.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.persistence.dao.hibernate;
11
12
13 import java.io.IOException;
14 import java.io.StringReader;
15 import java.util.Vector;
16
17 import org.apache.commons.logging.Log;
18 import org.apache.commons.logging.LogFactory;
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.Token;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
23 import org.apache.lucene.index.CorruptIndexException;
24 import org.apache.lucene.index.IndexReader;
25 import org.apache.lucene.index.IndexWriterConfig;
26 import org.apache.lucene.index.Term;
27 import org.apache.lucene.queryparser.classic.ParseException;
28 import org.apache.lucene.queryparser.classic.QueryParser;
29 import org.apache.lucene.search.PhraseQuery;
30 import org.apache.lucene.search.Query;
31 import org.apache.lucene.search.TermQuery;
32 import org.apache.lucene.search.spell.Dictionary;
33 import org.apache.lucene.search.spell.LuceneDictionary;
34 import org.apache.lucene.search.spell.SpellChecker;
35 import org.apache.lucene.store.Directory;
36 import org.apache.lucene.util.BytesRef;
37 import org.apache.lucene.util.BytesRefIterator;
38 import org.hibernate.SessionFactory;
39 import org.hibernate.search.FullTextSession;
40 import org.hibernate.search.Search;
41 import org.hibernate.search.SearchFactory;
42 import org.hibernate.search.indexes.IndexReaderAccessor;
43 import org.springframework.beans.factory.annotation.Autowired;
44 import org.springframework.orm.hibernate3.support.HibernateDaoSupport;
45
46 import eu.etaxonomy.cdm.model.common.CdmBase;
47 import eu.etaxonomy.cdm.persistence.dao.IAlternativeSpellingSuggestionParser;
48
49
50 /**
51 * @author unknown
52 *
53 * @param <T>
54 * @deprecated Use current methods for alternative spelling suggestions. This class is no longer supported
55 * after migration to hibernate 4.x.
56 */
57 @Deprecated
58 public abstract class AlternativeSpellingSuggestionParser<T extends CdmBase>
59 extends HibernateDaoSupport
60 implements IAlternativeSpellingSuggestionParser {
61 private static Log log = LogFactory.getLog(AlternativeSpellingSuggestionParser.class);
62
63 private String defaultField;
64 protected Directory directory;
65 private final Class<T> type;
66 private Class<? extends T> indexedClasses[];
67
68
69 public AlternativeSpellingSuggestionParser(Class<T> type) {
70 this.type = type;
71 }
72
73 public void setIndexedClasses(Class<? extends T> indexedClasses[]) {
74 this.indexedClasses = indexedClasses;
75 }
76
77 public abstract void setDirectory(Directory directory);
78
79 @Autowired
80 public void setHibernateSessionFactory(SessionFactory sessionFactory) {
81 super.setSessionFactory(sessionFactory);
82 }
83
84 public void setDefaultField(String defaultField) {
85 this.defaultField = defaultField;
86 }
87
88 @Override
89 public Query parse(String queryString) throws ParseException {
90 QueryParser queryParser = new QueryParser(defaultField, new StandardAnalyzer());
91 return queryParser.parse(queryString);
92 }
93
94 @Override
95 public Query suggest(String queryString) throws ParseException {
96 QuerySuggester querySuggester = new QuerySuggester(defaultField, new StandardAnalyzer());
97 Query query = querySuggester.parse(queryString);
98 return querySuggester.hasSuggestedQuery() ? query : null;
99 }
100
101 private class QuerySuggester extends QueryParser {
102 private boolean suggestedQuery = false;
103 public QuerySuggester(String field, Analyzer analyzer) {
104 super(field, analyzer);
105 }
106 @Override
107 protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
108 // Copied from org.apache.lucene.queryParser.QueryParser
109 // replacing construction of TermQuery with call to getTermQuery()
110 // which finds close matches.
111 TokenStream source;
112 source = getAnalyzer().tokenStream(field, new StringReader(queryText));
113 Vector<Object> v = new Vector<Object>();
114 Token t;
115
116 while (true) {
117 try {
118 //OLD
119 // t = source.next();
120
121 //FIXME this is new after Hibernate 4 migration
122 //but completely unchecked and unsure if correct
123 //#3344
124 boolean it = source.incrementToken();
125 t = source.getAttribute(Token.class);
126
127
128
129 } catch (IOException e) {
130 t = null;
131 }
132 if (t == null){
133 break;
134 }
135
136 // OLD v.addElement(t.termText());
137 //FIXME unchecked #3344
138 //FIXME #4716 not sure if this implementation equals the old t.term()
139 String term = new String(t.buffer(), 0, t.length());
140
141 v.addElement(term);
142 }
143 try {
144 source.close();
145 } catch (IOException e) {
146 // ignore
147 }
148
149 if (v.size() == 0) {
150 return null;
151 } else if (v.size() == 1) {
152 return new TermQuery(getTerm(field, (String) v.elementAt(0)));
153 } else {
154 PhraseQuery q = new PhraseQuery();
155 q.setSlop(getPhraseSlop());
156 for (int i = 0; i < v.size(); i++) {
157 q.add(getTerm(field, (String) v.elementAt(i)));
158 }
159 return q;
160 }
161 }
162
163 private Term getTerm(String field, String queryText) throws ParseException {
164
165 try {
166 SpellChecker spellChecker = new SpellChecker(directory);
167 if (spellChecker.exist(queryText)) {
168 return new Term(field, queryText);
169 }
170 String[] similarWords = spellChecker.suggestSimilar(queryText, 1);
171 if (similarWords.length == 0) {
172 return new Term(field, queryText);
173 }
174 suggestedQuery = true;
175 return new Term(field, similarWords[0]);
176 } catch (IOException e) {
177 throw new ParseException(e.getMessage());
178 }
179 }
180 public boolean hasSuggestedQuery() {
181 return suggestedQuery;
182 }
183 }
184
185 @Override
186 public void refresh() {
187 FullTextSession fullTextSession = Search.getFullTextSession(getSession());
188 SearchFactory searchFactory = fullTextSession.getSearchFactory();
189 try {
190 SpellChecker spellChecker = new SpellChecker(directory);
191
192 for(Class<? extends T> indexedClass : indexedClasses) {
193 //OLD
194 // DirectoryProvider<?> directoryProvider = searchFactory.getDirectoryProviders(indexedClass)[0];
195 // ReaderProvider readerProvider = searchFactory.getReaderProvider();
196 IndexReaderAccessor ira = searchFactory.getIndexReaderAccessor();
197 // IndexReader indexReader = ira.open(indexedClass);
198 IndexReader indexReader = null;
199
200 try {
201
202 indexReader = ira.open(indexedClass);
203 // indexReader = readerProvider.openIndexReader(); // .openReader(directoryProvider);
204 log.debug("Creating new dictionary for words in " + defaultField + " docs " + indexReader.numDocs());
205
206 Dictionary dictionary = new LuceneDictionary(indexReader, defaultField);
207 if(log.isDebugEnabled()) {
208 BytesRefIterator iterator = dictionary.getEntryIterator();
209 BytesRef bytesRef;
210 while((bytesRef = iterator.next()) != null) {
211 log.debug("Indexing word " + bytesRef);
212 }
213 }
214
215
216 // OLD: spellChecker.indexDictionary(dictionary);
217 //FIXME preliminary for Hibernate 4 migration see # 3344
218 IndexWriterConfig config = new IndexWriterConfig( new StandardAnalyzer());
219 boolean fullMerge = true;
220 spellChecker.indexDictionary(dictionary, config, fullMerge);
221
222 } catch (CorruptIndexException cie) {
223 log.error("Spellings index is corrupted", cie);
224 } finally {
225 if (indexReader != null) {
226 // readerProvider.closeIndexReader(indexReader);
227 ira.close(indexReader);
228 }
229 }
230 }
231 }catch (IOException ioe) {
232 log.error(ioe);
233 }
234 }
235
236 }