-
[cdmlib.git] / cdmlib-persistence / src / main / java / eu / etaxonomy / cdm / persistence / dao / hibernate / AlternativeSpellingSuggestionParser.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.persistence.dao.hibernate;
11
12
13 import java.io.IOException;
14 import java.io.StringReader;
15 import java.util.Vector;
16
17 import org.apache.commons.logging.Log;
18 import org.apache.commons.logging.LogFactory;
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.Token;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
23 import org.apache.lucene.index.CorruptIndexException;
24 import org.apache.lucene.index.IndexReader;
25 import org.apache.lucene.index.IndexWriterConfig;
26 import org.apache.lucene.index.Term;
27 import org.apache.lucene.queryParser.ParseException;
28 import org.apache.lucene.queryParser.QueryParser;
29 import org.apache.lucene.search.PhraseQuery;
30 import org.apache.lucene.search.Query;
31 import org.apache.lucene.search.TermQuery;
32 import org.apache.lucene.search.spell.Dictionary;
33 import org.apache.lucene.search.spell.LuceneDictionary;
34 import org.apache.lucene.search.spell.SpellChecker;
35 import org.apache.lucene.store.Directory;
36 import org.apache.lucene.util.BytesRef;
37 import org.apache.lucene.util.BytesRefIterator;
38 import org.apache.lucene.util.Version;
39 import org.hibernate.SessionFactory;
40 import org.hibernate.search.FullTextSession;
41 import org.hibernate.search.Search;
42 import org.hibernate.search.SearchFactory;
43 import org.hibernate.search.indexes.IndexReaderAccessor;
44 import org.springframework.beans.factory.annotation.Autowired;
45 import org.springframework.orm.hibernate3.support.HibernateDaoSupport;
46
47 import eu.etaxonomy.cdm.config.Configuration;
48 import eu.etaxonomy.cdm.model.common.CdmBase;
49 import eu.etaxonomy.cdm.persistence.dao.IAlternativeSpellingSuggestionParser;
50
51
52 /**
53 * @author unknown
54 *
55 * @param <T>
56 * @deprecated Use current methods for alternative spelling suggestions. This class is no longer supported
57 * after migration to hibernate 4.x.
58 */
59 @Deprecated
60 public abstract class AlternativeSpellingSuggestionParser<T extends CdmBase>
61 extends HibernateDaoSupport
62 implements IAlternativeSpellingSuggestionParser {
63 private static Log log = LogFactory.getLog(AlternativeSpellingSuggestionParser.class);
64
65 private String defaultField;
66 protected Directory directory;
67 private final Class<T> type;
68 private Class<? extends T> indexedClasses[];
69
70 private static Version version = Configuration.luceneVersion;
71
72
73 public AlternativeSpellingSuggestionParser(Class<T> type) {
74 this.type = type;
75 }
76
77 public void setIndexedClasses(Class<? extends T> indexedClasses[]) {
78 this.indexedClasses = indexedClasses;
79 }
80
81 public abstract void setDirectory(Directory directory);
82
83 @Autowired
84 public void setHibernateSessionFactory(SessionFactory sessionFactory) {
85 super.setSessionFactory(sessionFactory);
86 }
87
88 public void setDefaultField(String defaultField) {
89 this.defaultField = defaultField;
90 }
91
92 @Override
93 public Query parse(String queryString) throws ParseException {
94 QueryParser queryParser = new QueryParser(version, defaultField, new StandardAnalyzer(version));
95 return queryParser.parse(queryString);
96 }
97
98 @Override
99 public Query suggest(String queryString) throws ParseException {
100 QuerySuggester querySuggester = new QuerySuggester(defaultField, new StandardAnalyzer(version));
101 Query query = querySuggester.parse(queryString);
102 return querySuggester.hasSuggestedQuery() ? query : null;
103 }
104
105 private class QuerySuggester extends QueryParser {
106 private boolean suggestedQuery = false;
107 public QuerySuggester(String field, Analyzer analyzer) {
108 super(version, field, analyzer);
109 }
110 @Override
111 protected Query getFieldQuery(String field, String queryText) throws ParseException {
112 // Copied from org.apache.lucene.queryParser.QueryParser
113 // replacing construction of TermQuery with call to getTermQuery()
114 // which finds close matches.
115 TokenStream source = getAnalyzer().tokenStream(field, new StringReader(queryText));
116 Vector<Object> v = new Vector<Object>();
117 Token t;
118
119 while (true) {
120 try {
121 //OLD
122 // t = source.next();
123
124 //FIXME this is new after Hibernate 4 migration
125 //but completely unchecked and unsure if correct
126 //#3344
127 boolean it = source.incrementToken();
128 t = source.getAttribute(Token.class);
129
130
131
132 } catch (IOException e) {
133 t = null;
134 }
135 if (t == null){
136 break;
137 }
138
139 // OLD v.addElement(t.termText());
140 //FIXME unchecked #3344
141 v.addElement(t.term());
142 }
143 try {
144 source.close();
145 } catch (IOException e) {
146 // ignore
147 }
148
149 if (v.size() == 0) {
150 return null;
151 } else if (v.size() == 1) {
152 return new TermQuery(getTerm(field, (String) v.elementAt(0)));
153 } else {
154 PhraseQuery q = new PhraseQuery();
155 q.setSlop(getPhraseSlop());
156 for (int i = 0; i < v.size(); i++) {
157 q.add(getTerm(field, (String) v.elementAt(i)));
158 }
159 return q;
160 }
161 }
162
163 private Term getTerm(String field, String queryText) throws ParseException {
164
165 try {
166 SpellChecker spellChecker = new SpellChecker(directory);
167 if (spellChecker.exist(queryText)) {
168 return new Term(field, queryText);
169 }
170 String[] similarWords = spellChecker.suggestSimilar(queryText, 1);
171 if (similarWords.length == 0) {
172 return new Term(field, queryText);
173 }
174 suggestedQuery = true;
175 return new Term(field, similarWords[0]);
176 } catch (IOException e) {
177 throw new ParseException(e.getMessage());
178 }
179 }
180 public boolean hasSuggestedQuery() {
181 return suggestedQuery;
182 }
183 }
184
185 @Override
186 public void refresh() {
187 FullTextSession fullTextSession = Search.getFullTextSession(getSession());
188 SearchFactory searchFactory = fullTextSession.getSearchFactory();
189 try {
190 SpellChecker spellChecker = new SpellChecker(directory);
191
192 for(Class<? extends T> indexedClass : indexedClasses) {
193 //OLD
194 // DirectoryProvider<?> directoryProvider = searchFactory.getDirectoryProviders(indexedClass)[0];
195 // ReaderProvider readerProvider = searchFactory.getReaderProvider();
196 IndexReaderAccessor ira = searchFactory.getIndexReaderAccessor();
197 // IndexReader indexReader = ira.open(indexedClass);
198 IndexReader indexReader = null;
199
200 try {
201
202 indexReader = ira.open(indexedClass);
203 // indexReader = readerProvider.openIndexReader(); // .openReader(directoryProvider);
204 log.debug("Creating new dictionary for words in " + defaultField + " docs " + indexReader.numDocs());
205
206 Dictionary dictionary = new LuceneDictionary(indexReader, defaultField);
207 if(log.isDebugEnabled()) {
208 BytesRefIterator iterator = dictionary.getWordsIterator();
209 BytesRef bytesRef;
210 while((bytesRef = iterator.next()) != null) {
211 log.debug("Indexing word " + bytesRef);
212 }
213 }
214
215
216 // OLD: spellChecker.indexDictionary(dictionary);
217 //FIXME preliminary for Hibernate 4 migration see # 3344
218 IndexWriterConfig config = new IndexWriterConfig(version, new StandardAnalyzer(version));
219 boolean fullMerge = true;
220 spellChecker.indexDictionary(dictionary, config, fullMerge);
221
222 } catch (CorruptIndexException cie) {
223 log.error("Spellings index is corrupted", cie);
224 } finally {
225 if (indexReader != null) {
226 // readerProvider.closeIndexReader(indexReader);
227 ira.close(indexReader);
228 }
229 }
230 }
231 }catch (IOException ioe) {
232 log.error(ioe);
233 }
234 }
235
236 }