Project

General

Profile

Download (6.22 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy 
4
 * http://www.e-taxonomy.eu
5
 * 
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.persistence.dao.hibernate;
11

    
12

    
13
import java.io.IOException;
14
import java.io.StringReader;
15
import java.util.Iterator;
16
import java.util.Vector;
17

    
18
import org.apache.commons.logging.Log;
19
import org.apache.commons.logging.LogFactory;
20
import org.apache.lucene.analysis.Analyzer;
21
import org.apache.lucene.analysis.Token;
22
import org.apache.lucene.analysis.TokenStream;
23
import org.apache.lucene.analysis.standard.StandardAnalyzer;
24
import org.apache.lucene.index.CorruptIndexException;
25
import org.apache.lucene.index.IndexReader;
26
import org.apache.lucene.index.Term;
27
import org.apache.lucene.queryParser.ParseException;
28
import org.apache.lucene.queryParser.QueryParser;
29
import org.apache.lucene.search.PhraseQuery;
30
import org.apache.lucene.search.Query;
31
import org.apache.lucene.search.TermQuery;
32
import org.apache.lucene.search.spell.Dictionary;
33
import org.apache.lucene.search.spell.LuceneDictionary;
34
import org.apache.lucene.search.spell.SpellChecker;
35
import org.apache.lucene.store.Directory;
36
import org.hibernate.SessionFactory;
37
import org.hibernate.search.FullTextSession;
38
import org.hibernate.search.Search;
39
import org.hibernate.search.SearchFactory;
40
import org.hibernate.search.reader.ReaderProvider;
41
import org.hibernate.search.store.DirectoryProvider;
42
import org.springframework.beans.factory.annotation.Autowired;
43
import org.springframework.orm.hibernate3.support.HibernateDaoSupport;
44

    
45
import eu.etaxonomy.cdm.model.common.CdmBase;
46
import eu.etaxonomy.cdm.persistence.dao.IAlternativeSpellingSuggestionParser;
47

    
48

    
49
public abstract class AlternativeSpellingSuggestionParser<T extends CdmBase> extends HibernateDaoSupport  implements
50
IAlternativeSpellingSuggestionParser {
51
	private static Log log = LogFactory.getLog(AlternativeSpellingSuggestionParser.class);
52

    
53
	private String defaultField;
54
	protected Directory directory;
55
	private Class<T> type;
56
	private Class<? extends T> indexedClasses[];
57

    
58
	public AlternativeSpellingSuggestionParser(Class<T> type) {
59
		this.type = type;
60
	}
61

    
62
	public void setIndexedClasses(Class<? extends T> indexedClasses[]) {
63
		this.indexedClasses = indexedClasses;
64
	}
65

    
66
	public abstract void setDirectory(Directory directory);
67

    
68
	@Autowired
69
	public void setHibernateSessionFactory(SessionFactory sessionFactory) {
70
		super.setSessionFactory(sessionFactory);
71
	}
72

    
73
	public void setDefaultField(String defaultField) {
74
		this.defaultField = defaultField;
75
	}
76

    
77
	public Query parse(String queryString) throws ParseException {
78
		QueryParser queryParser = new QueryParser(defaultField, new StandardAnalyzer());		
79
		return queryParser.parse(queryString);
80
	}
81

    
82
	public Query suggest(String queryString) throws ParseException {
83
		QuerySuggester querySuggester = new QuerySuggester(defaultField, new StandardAnalyzer());
84
		Query query = querySuggester.parse(queryString);
85
		return querySuggester.hasSuggestedQuery() ? query : null;
86
	}
87

    
88
	private class QuerySuggester extends QueryParser {
89
		private boolean suggestedQuery = false;
90
		public QuerySuggester(String field, Analyzer analyzer) {
91
			super(field, analyzer);
92
		}
93
		protected Query getFieldQuery(String field, String queryText) throws ParseException {
94
			// Copied from org.apache.lucene.queryParser.QueryParser
95
			// replacing construction of TermQuery with call to getTermQuery()
96
			// which finds close matches.
97
			TokenStream source = getAnalyzer().tokenStream(field, new StringReader(queryText));
98
			Vector v = new Vector();
99
			Token t;
100

    
101
			while (true) {
102
				try {
103
					t = source.next();
104
				} catch (IOException e) {
105
					t = null;
106
				}
107
				if (t == null)
108
					break;
109
				v.addElement(t.termText());
110
			}
111
			try {
112
				source.close();
113
			} catch (IOException e) {
114
				// ignore
115
			}
116

    
117
			if (v.size() == 0)
118
				return null;
119
			else if (v.size() == 1)
120
				return new TermQuery(getTerm(field, (String) v.elementAt(0)));
121
			else {
122
				PhraseQuery q = new PhraseQuery();
123
				q.setSlop(getPhraseSlop());
124
				for (int i = 0; i < v.size(); i++) {
125
					q.add(getTerm(field, (String) v.elementAt(i)));
126
				}
127
				return q;
128
			}
129
		}
130

    
131
		private Term getTerm(String field, String queryText) throws ParseException {
132

    
133
			try {
134
				SpellChecker spellChecker = new SpellChecker(directory);
135
				if (spellChecker.exist(queryText)) {
136
					return new Term(field, queryText);
137
				}
138
				String[] similarWords = spellChecker.suggestSimilar(queryText, 1);
139
				if (similarWords.length == 0) {
140
					return new Term(field, queryText);
141
				}			
142
				suggestedQuery = true;
143
				return new Term(field, similarWords[0]);
144
			} catch (IOException e) {
145
				throw new ParseException(e.getMessage());
146
			}
147
		}		
148
		public boolean hasSuggestedQuery() {
149
			return suggestedQuery;
150
		}	
151
	}
152

    
153
	public void refresh() {
154
		FullTextSession fullTextSession = Search.getFullTextSession(getSession());
155
		SearchFactory searchFactory = fullTextSession.getSearchFactory();
156
		try {
157
			SpellChecker spellChecker = new SpellChecker(directory);
158

    
159
			for(Class<? extends T> indexedClass : indexedClasses) {
160
				DirectoryProvider directoryProvider = searchFactory.getDirectoryProviders(indexedClass)[0];
161
				ReaderProvider readerProvider = searchFactory.getReaderProvider();
162
				IndexReader indexReader = null;
163

    
164
				try {
165

    
166
					indexReader = readerProvider.openReader(directoryProvider);
167
					log.debug("Creating new dictionary for words in " + defaultField + " docs " + indexReader.numDocs());
168

    
169
					Dictionary dictionary = new LuceneDictionary(indexReader, defaultField);
170
					if(log.isDebugEnabled()) {
171
						Iterator iterator = dictionary.getWordsIterator();
172
						while(iterator.hasNext()) {
173
							log.debug("Indexing word " + iterator.next());
174
						}
175
					}
176

    
177
					spellChecker.indexDictionary(dictionary);
178
				} catch (CorruptIndexException cie) {
179
					log.error("Spellings index is corrupted", cie);
180
				} finally {
181
					if (indexReader != null) {
182
						readerProvider.closeReader(indexReader);
183
					}
184
				} 
185
			} 
186
		}catch (IOException ioe) {
187
			log.error(ioe);
188
		}
189
	}
190

    
191
}
    (1-1/1)