Project

General

Profile

Download (6.46 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy 
4
 * http://www.e-taxonomy.eu
5
 * 
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.persistence.dao.hibernate;
11

    
12

    
13
import java.io.IOException;
14
import java.io.StringReader;
15
import java.util.Iterator;
16
import java.util.Vector;
17

    
18
import org.apache.commons.logging.Log;
19
import org.apache.commons.logging.LogFactory;
20
import org.apache.lucene.analysis.Analyzer;
21
import org.apache.lucene.analysis.Token;
22
import org.apache.lucene.analysis.TokenStream;
23
import org.apache.lucene.analysis.standard.StandardAnalyzer;
24
import org.apache.lucene.index.CorruptIndexException;
25
import org.apache.lucene.index.IndexReader;
26
import org.apache.lucene.index.Term;
27
import org.apache.lucene.queryParser.ParseException;
28
import org.apache.lucene.queryParser.QueryParser;
29
import org.apache.lucene.search.PhraseQuery;
30
import org.apache.lucene.search.Query;
31
import org.apache.lucene.search.TermQuery;
32
import org.apache.lucene.search.spell.Dictionary;
33
import org.apache.lucene.search.spell.LuceneDictionary;
34
import org.apache.lucene.search.spell.SpellChecker;
35
import org.apache.lucene.store.Directory;
36
import org.hibernate.SessionFactory;
37
import org.hibernate.search.FullTextSession;
38
import org.hibernate.search.Search;
39
import org.hibernate.search.SearchFactory;
40
import org.hibernate.search.reader.ReaderProvider;
41
import org.hibernate.search.store.DirectoryProvider;
42
import org.springframework.beans.factory.annotation.Autowired;
43
import org.springframework.orm.hibernate3.support.HibernateDaoSupport;
44
import org.springmodules.lucene.index.factory.IndexFactory;
45
import org.springmodules.lucene.index.factory.LuceneIndexWriter;
46

    
47
import eu.etaxonomy.cdm.model.common.CdmBase;
48
import eu.etaxonomy.cdm.persistence.dao.IAlternativeSpellingSuggestionParser;
49

    
50

    
51
public abstract class AlternativeSpellingSuggestionParser<T extends CdmBase> extends HibernateDaoSupport  implements
52
		IAlternativeSpellingSuggestionParser {
53
	private static Log log = LogFactory.getLog(AlternativeSpellingSuggestionParser.class);
54
	
55
	private String defaultField;
56
	protected IndexFactory indexFactory;
57
	protected Directory directory;
58
	private Class<T> type;
59
	
60
	public AlternativeSpellingSuggestionParser(Class<T> type) {
61
		this.type = type;
62
	}
63
	
64
	public abstract void setDirectory(Directory directory);
65
	
66
	public abstract void setIndexFactory(IndexFactory indexFactory);
67
	
68
	@Autowired
69
	public void setHibernateSessionFactory(SessionFactory sessionFactory) {
70
		super.setSessionFactory(sessionFactory);
71
	}
72
	
73
	public void setDefaultField(String defaultField) {
74
		this.defaultField = defaultField;
75
	}
76

    
77
	public Query parse(String queryString) throws ParseException {
78
		QueryParser queryParser = new QueryParser(defaultField, new StandardAnalyzer());		
79
		return queryParser.parse(queryString);
80
	}
81

    
82
	public Query suggest(String queryString) throws ParseException {
83
		QuerySuggester querySuggester = new QuerySuggester(defaultField, new StandardAnalyzer());
84
		Query query = querySuggester.parse(queryString);
85
		return querySuggester.hasSuggestedQuery() ? query : null;
86
	}
87
	
88
	private class QuerySuggester extends QueryParser {
89
		private boolean suggestedQuery = false;
90
		public QuerySuggester(String field, Analyzer analyzer) {
91
			super(field, analyzer);
92
		}
93
		protected Query getFieldQuery(String field, String queryText) throws ParseException {
94
			// Copied from org.apache.lucene.queryParser.QueryParser
95
			// replacing construction of TermQuery with call to getTermQuery()
96
			// which finds close matches.
97
		    TokenStream source = getAnalyzer().tokenStream(field, new StringReader(queryText));
98
			Vector v = new Vector();
99
			Token t;
100

    
101
			while (true) {
102
				try {
103
					t = source.next();
104
				} catch (IOException e) {
105
					t = null;
106
				}
107
				if (t == null)
108
					break;
109
				v.addElement(t.termText());
110
			}
111
			try {
112
				source.close();
113
			} catch (IOException e) {
114
				// ignore
115
			}
116

    
117
			if (v.size() == 0)
118
				return null;
119
			else if (v.size() == 1)
120
				return new TermQuery(getTerm(field, (String) v.elementAt(0)));
121
			else {
122
				PhraseQuery q = new PhraseQuery();
123
				q.setSlop(getPhraseSlop());
124
				for (int i = 0; i < v.size(); i++) {
125
					q.add(getTerm(field, (String) v.elementAt(i)));
126
				}
127
				return q;
128
			}
129
		}
130
		
131
		private Term getTerm(String field, String queryText) throws ParseException {
132
						
133
			try {
134
				SpellChecker spellChecker = new SpellChecker(directory);
135
				if (spellChecker.exist(queryText)) {
136
					return new Term(field, queryText);
137
				}
138
				String[] similarWords = spellChecker.suggestSimilar(queryText, 1);
139
				if (similarWords.length == 0) {
140
					return new Term(field, queryText);
141
				}			
142
				suggestedQuery = true;
143
				return new Term(field, similarWords[0]);
144
			} catch (IOException e) {
145
				throw new ParseException(e.getMessage());
146
			}
147
		}		
148
		public boolean hasSuggestedQuery() {
149
			return suggestedQuery;
150
		}	
151
	}
152
	
153
	public void refresh() {
154
		
155
		FullTextSession fullTextSession = Search.createFullTextSession(getSession());
156
		SearchFactory searchFactory = fullTextSession.getSearchFactory();
157
        DirectoryProvider directoryProvider = searchFactory.getDirectoryProviders(type)[0];
158
        
159
        ReaderProvider readerProvider = searchFactory.getReaderProvider();
160
        
161
		IndexReader indexReader = null;
162
		LuceneIndexWriter indexWriter = null;
163
		try {
164
			try {
165
				indexWriter = indexFactory.getIndexWriter();
166
				indexReader = readerProvider.openReader(directoryProvider);
167

    
168
				log.debug("Creating new dictionary for words in " + defaultField + " docs " + indexReader.numDocs());
169
				Dictionary dictionary = new LuceneDictionary(indexReader, defaultField);
170
				
171
				if(log.isDebugEnabled()) {
172
				    Iterator iterator = dictionary.getWordsIterator();
173
				    while(iterator.hasNext()) {
174
					    log.debug("Indexing word " + iterator.next());
175
				    }
176
				}
177
				
178
	            SpellChecker spellChecker = new SpellChecker(directory);
179
	            spellChecker.indexDictionary(dictionary);
180
			} catch (CorruptIndexException cie) {
181
				log.error("Spellings index is corrupted", cie);
182
			} finally {
183
				if (indexReader != null) {
184
					readerProvider.closeReader(indexReader);
185
				}
186
				if(indexWriter != null) {
187
                    indexWriter.close();
188
                }
189
			} 
190
		} catch (IOException ioe) {
191
			log.error(ioe);
192
		}
193
	}
194

    
195
}
    (1-1/1)