1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.persistence.dao.hibernate;
|
11
|
|
12
|
|
13
|
import java.io.IOException;
|
14
|
import java.io.StringReader;
|
15
|
import java.util.Vector;
|
16
|
|
17
|
import org.apache.commons.logging.Log;
|
18
|
import org.apache.commons.logging.LogFactory;
|
19
|
import org.apache.lucene.analysis.Analyzer;
|
20
|
import org.apache.lucene.analysis.Token;
|
21
|
import org.apache.lucene.analysis.TokenStream;
|
22
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
23
|
import org.apache.lucene.index.CorruptIndexException;
|
24
|
import org.apache.lucene.index.IndexReader;
|
25
|
import org.apache.lucene.index.IndexWriterConfig;
|
26
|
import org.apache.lucene.index.Term;
|
27
|
import org.apache.lucene.queryparser.classic.ParseException;
|
28
|
import org.apache.lucene.queryparser.classic.QueryParser;
|
29
|
import org.apache.lucene.search.PhraseQuery;
|
30
|
import org.apache.lucene.search.Query;
|
31
|
import org.apache.lucene.search.TermQuery;
|
32
|
import org.apache.lucene.search.spell.Dictionary;
|
33
|
import org.apache.lucene.search.spell.LuceneDictionary;
|
34
|
import org.apache.lucene.search.spell.SpellChecker;
|
35
|
import org.apache.lucene.store.Directory;
|
36
|
import org.apache.lucene.util.BytesRef;
|
37
|
import org.apache.lucene.util.BytesRefIterator;
|
38
|
import org.hibernate.SessionFactory;
|
39
|
import org.hibernate.search.FullTextSession;
|
40
|
import org.hibernate.search.Search;
|
41
|
import org.hibernate.search.SearchFactory;
|
42
|
import org.hibernate.search.indexes.IndexReaderAccessor;
|
43
|
import org.springframework.beans.factory.annotation.Autowired;
|
44
|
import org.springframework.orm.hibernate3.support.HibernateDaoSupport;
|
45
|
|
46
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
47
|
import eu.etaxonomy.cdm.persistence.dao.IAlternativeSpellingSuggestionParser;
|
48
|
|
49
|
|
50
|
/**
|
51
|
* @author unknown
|
52
|
*
|
53
|
* @param <T>
|
54
|
* @deprecated Use current methods for alternative spelling suggestions. This class is no longer supported
|
55
|
* after migration to hibernate 4.x.
|
56
|
*/
|
57
|
@Deprecated
|
58
|
public abstract class AlternativeSpellingSuggestionParser<T extends CdmBase>
|
59
|
extends HibernateDaoSupport
|
60
|
implements IAlternativeSpellingSuggestionParser {
|
61
|
private static Log log = LogFactory.getLog(AlternativeSpellingSuggestionParser.class);
|
62
|
|
63
|
private String defaultField;
|
64
|
protected Directory directory;
|
65
|
private final Class<T> type;
|
66
|
private Class<? extends T> indexedClasses[];
|
67
|
|
68
|
|
69
|
public AlternativeSpellingSuggestionParser(Class<T> type) {
|
70
|
this.type = type;
|
71
|
}
|
72
|
|
73
|
public void setIndexedClasses(Class<? extends T> indexedClasses[]) {
|
74
|
this.indexedClasses = indexedClasses;
|
75
|
}
|
76
|
|
77
|
public abstract void setDirectory(Directory directory);
|
78
|
|
79
|
@Autowired
|
80
|
public void setHibernateSessionFactory(SessionFactory sessionFactory) {
|
81
|
super.setSessionFactory(sessionFactory);
|
82
|
}
|
83
|
|
84
|
public void setDefaultField(String defaultField) {
|
85
|
this.defaultField = defaultField;
|
86
|
}
|
87
|
|
88
|
@Override
|
89
|
public Query parse(String queryString) throws ParseException {
|
90
|
QueryParser queryParser = new QueryParser(defaultField, new StandardAnalyzer());
|
91
|
return queryParser.parse(queryString);
|
92
|
}
|
93
|
|
94
|
@Override
|
95
|
public Query suggest(String queryString) throws ParseException {
|
96
|
QuerySuggester querySuggester = new QuerySuggester(defaultField, new StandardAnalyzer());
|
97
|
Query query = querySuggester.parse(queryString);
|
98
|
return querySuggester.hasSuggestedQuery() ? query : null;
|
99
|
}
|
100
|
|
101
|
private class QuerySuggester extends QueryParser {
|
102
|
private boolean suggestedQuery = false;
|
103
|
public QuerySuggester(String field, Analyzer analyzer) {
|
104
|
super(field, analyzer);
|
105
|
}
|
106
|
@Override
|
107
|
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
|
108
|
// Copied from org.apache.lucene.queryParser.QueryParser
|
109
|
// replacing construction of TermQuery with call to getTermQuery()
|
110
|
// which finds close matches.
|
111
|
TokenStream source;
|
112
|
source = getAnalyzer().tokenStream(field, new StringReader(queryText));
|
113
|
Vector<Object> v = new Vector<Object>();
|
114
|
Token t;
|
115
|
|
116
|
while (true) {
|
117
|
try {
|
118
|
//OLD
|
119
|
// t = source.next();
|
120
|
|
121
|
//FIXME this is new after Hibernate 4 migration
|
122
|
//but completely unchecked and unsure if correct
|
123
|
//#3344
|
124
|
boolean it = source.incrementToken();
|
125
|
t = source.getAttribute(Token.class);
|
126
|
|
127
|
|
128
|
|
129
|
} catch (IOException e) {
|
130
|
t = null;
|
131
|
}
|
132
|
if (t == null){
|
133
|
break;
|
134
|
}
|
135
|
|
136
|
// OLD v.addElement(t.termText());
|
137
|
//FIXME unchecked #3344
|
138
|
//FIXME #4716 not sure if this implementation equals the old t.term()
|
139
|
String term = new String(t.buffer(), 0, t.length());
|
140
|
|
141
|
v.addElement(term);
|
142
|
}
|
143
|
try {
|
144
|
source.close();
|
145
|
} catch (IOException e) {
|
146
|
// ignore
|
147
|
}
|
148
|
|
149
|
if (v.size() == 0) {
|
150
|
return null;
|
151
|
} else if (v.size() == 1) {
|
152
|
return new TermQuery(getTerm(field, (String) v.elementAt(0)));
|
153
|
} else {
|
154
|
PhraseQuery q = new PhraseQuery();
|
155
|
q.setSlop(getPhraseSlop());
|
156
|
for (int i = 0; i < v.size(); i++) {
|
157
|
q.add(getTerm(field, (String) v.elementAt(i)));
|
158
|
}
|
159
|
return q;
|
160
|
}
|
161
|
}
|
162
|
|
163
|
private Term getTerm(String field, String queryText) throws ParseException {
|
164
|
|
165
|
try {
|
166
|
SpellChecker spellChecker = new SpellChecker(directory);
|
167
|
if (spellChecker.exist(queryText)) {
|
168
|
return new Term(field, queryText);
|
169
|
}
|
170
|
String[] similarWords = spellChecker.suggestSimilar(queryText, 1);
|
171
|
if (similarWords.length == 0) {
|
172
|
return new Term(field, queryText);
|
173
|
}
|
174
|
suggestedQuery = true;
|
175
|
return new Term(field, similarWords[0]);
|
176
|
} catch (IOException e) {
|
177
|
throw new ParseException(e.getMessage());
|
178
|
}
|
179
|
}
|
180
|
public boolean hasSuggestedQuery() {
|
181
|
return suggestedQuery;
|
182
|
}
|
183
|
}
|
184
|
|
185
|
@Override
|
186
|
public void refresh() {
|
187
|
FullTextSession fullTextSession = Search.getFullTextSession(getSession());
|
188
|
SearchFactory searchFactory = fullTextSession.getSearchFactory();
|
189
|
try {
|
190
|
SpellChecker spellChecker = new SpellChecker(directory);
|
191
|
|
192
|
for(Class<? extends T> indexedClass : indexedClasses) {
|
193
|
//OLD
|
194
|
// DirectoryProvider<?> directoryProvider = searchFactory.getDirectoryProviders(indexedClass)[0];
|
195
|
// ReaderProvider readerProvider = searchFactory.getReaderProvider();
|
196
|
IndexReaderAccessor ira = searchFactory.getIndexReaderAccessor();
|
197
|
// IndexReader indexReader = ira.open(indexedClass);
|
198
|
IndexReader indexReader = null;
|
199
|
|
200
|
try {
|
201
|
|
202
|
indexReader = ira.open(indexedClass);
|
203
|
// indexReader = readerProvider.openIndexReader(); // .openReader(directoryProvider);
|
204
|
log.debug("Creating new dictionary for words in " + defaultField + " docs " + indexReader.numDocs());
|
205
|
|
206
|
Dictionary dictionary = new LuceneDictionary(indexReader, defaultField);
|
207
|
if(log.isDebugEnabled()) {
|
208
|
BytesRefIterator iterator = dictionary.getEntryIterator();
|
209
|
BytesRef bytesRef;
|
210
|
while((bytesRef = iterator.next()) != null) {
|
211
|
log.debug("Indexing word " + bytesRef);
|
212
|
}
|
213
|
}
|
214
|
|
215
|
|
216
|
// OLD: spellChecker.indexDictionary(dictionary);
|
217
|
//FIXME preliminary for Hibernate 4 migration see # 3344
|
218
|
IndexWriterConfig config = new IndexWriterConfig( new StandardAnalyzer());
|
219
|
boolean fullMerge = true;
|
220
|
spellChecker.indexDictionary(dictionary, config, fullMerge);
|
221
|
|
222
|
} catch (CorruptIndexException cie) {
|
223
|
log.error("Spellings index is corrupted", cie);
|
224
|
} finally {
|
225
|
if (indexReader != null) {
|
226
|
// readerProvider.closeIndexReader(indexReader);
|
227
|
ira.close(indexReader);
|
228
|
}
|
229
|
}
|
230
|
}
|
231
|
}catch (IOException ioe) {
|
232
|
log.error(ioe);
|
233
|
}
|
234
|
}
|
235
|
|
236
|
}
|