cdmlib-persistence/src/main/java/eu/etaxonomy/cdm/persistence/dao/hibernate/AlternativeSpellingSuggestionParser.java

   1 /**
   2  * Copyright (C) 2007 EDIT
   3  * European Distributed Institute of Taxonomy
   4  * http://www.e-taxonomy.eu
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version 1.1
   7  * See LICENSE.TXT at the top of this package for the full license terms.
   8  */
   9
  10 package eu.etaxonomy.cdm.persistence.dao.hibernate;
  11
  12
  13 import java.io.IOException;
  14 import java.io.StringReader;
  15 import java.util.Vector;
  16
  17 import org.apache.commons.logging.Log;
  18 import org.apache.commons.logging.LogFactory;
  19 import org.apache.lucene.analysis.Analyzer;
  20 import org.apache.lucene.analysis.Token;
  21 import org.apache.lucene.analysis.TokenStream;
  22 import org.apache.lucene.analysis.standard.StandardAnalyzer;
  23 import org.apache.lucene.index.CorruptIndexException;
  24 import org.apache.lucene.index.IndexReader;
  25 import org.apache.lucene.index.IndexWriterConfig;
  26 import org.apache.lucene.index.Term;
  27 import org.apache.lucene.queryparser.classic.ParseException;
  28 import org.apache.lucene.queryparser.classic.QueryParser;
  29 import org.apache.lucene.search.PhraseQuery;
  30 import org.apache.lucene.search.Query;
  31 import org.apache.lucene.search.TermQuery;
  32 import org.apache.lucene.search.spell.Dictionary;
  33 import org.apache.lucene.search.spell.LuceneDictionary;
  34 import org.apache.lucene.search.spell.SpellChecker;
  35 import org.apache.lucene.store.Directory;
  36 import org.apache.lucene.util.BytesRef;
  37 import org.apache.lucene.util.BytesRefIterator;
  38 import org.hibernate.SessionFactory;
  39 import org.hibernate.search.FullTextSession;
  40 import org.hibernate.search.Search;
  41 import org.hibernate.search.SearchFactory;
  42 import org.hibernate.search.indexes.IndexReaderAccessor;
  43 import org.springframework.beans.factory.annotation.Autowired;
  44 import org.springframework.orm.hibernate3.support.HibernateDaoSupport;
  45
  46 import eu.etaxonomy.cdm.model.common.CdmBase;
  47 import eu.etaxonomy.cdm.persistence.dao.IAlternativeSpellingSuggestionParser;
  48
  49
  50 /**
  51  * @author unknown
  52  *
  53  * @param <T>
  54  * @deprecated Use current methods for alternative spelling suggestions. This class is no longer supported
  55  * after migration to hibernate 4.x.
  56  */
  57 @Deprecated
  58 public abstract class AlternativeSpellingSuggestionParser<T extends CdmBase>
  59                 extends HibernateDaoSupport
  60                 implements IAlternativeSpellingSuggestionParser {
  61         private static Log log = LogFactory.getLog(AlternativeSpellingSuggestionParser.class);
  62
  63         private String defaultField;
  64         protected Directory directory;
  65         private final Class<T> type;
  66         private Class<? extends T> indexedClasses[];
  67
  68
  69         public AlternativeSpellingSuggestionParser(Class<T> type) {
  70                 this.type = type;
  71         }
  72
  73         public void setIndexedClasses(Class<? extends T> indexedClasses[]) {
  74                 this.indexedClasses = indexedClasses;
  75         }
  76
  77         public abstract void setDirectory(Directory directory);
  78
  79         @Autowired
  80         public void setHibernateSessionFactory(SessionFactory sessionFactory) {
  81                 super.setSessionFactory(sessionFactory);
  82         }
  83
  84         public void setDefaultField(String defaultField) {
  85                 this.defaultField = defaultField;
  86         }
  87
  88         @Override
  89     public Query parse(String queryString) throws ParseException {
  90                 QueryParser queryParser = new QueryParser(defaultField, new StandardAnalyzer());
  91                 return queryParser.parse(queryString);
  92         }
  93
  94         @Override
  95     public Query suggest(String queryString) throws ParseException {
  96                 QuerySuggester querySuggester = new QuerySuggester(defaultField, new StandardAnalyzer());
  97                 Query query = querySuggester.parse(queryString);
  98                 return querySuggester.hasSuggestedQuery() ? query : null;
  99         }
 100
 101         private class QuerySuggester extends QueryParser {
 102                 private boolean suggestedQuery = false;
 103                 public QuerySuggester(String field, Analyzer analyzer) {
 104                         super(field, analyzer);
 105                 }
 106                 @Override
 107         protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
 108                         // Copied from org.apache.lucene.queryParser.QueryParser
 109                         // replacing construction of TermQuery with call to getTermQuery()
 110                         // which finds close matches.
 111                         TokenStream source;
 112             source = getAnalyzer().tokenStream(field, new StringReader(queryText));
 113                         Vector<Object> v = new Vector<Object>();
 114                         Token t;
 115
 116                         while (true) {
 117                                 try {
 118                                         //OLD
 119 //                                      t = source.next();
 120
 121                                         //FIXME this is new after Hibernate 4 migration
 122                                         //but completely unchecked and unsure if correct
 123                                         //#3344
 124                                         boolean it = source.incrementToken();
 125                                         t = source.getAttribute(Token.class);
 126
 127
 128
 129                                 } catch (IOException e) {
 130                                         t = null;
 131                                 }
 132                                 if (t == null){
 133                                         break;
 134                                 }
 135
 136 //              OLD             v.addElement(t.termText());
 137                                 //FIXME unchecked #3344
 138                                 //FIXME #4716  not sure if this implementation equals the old t.term()
 139                 String term = new String(t.buffer(), 0, t.length());
 140
 141                                 v.addElement(term);
 142                         }
 143                         try {
 144                                 source.close();
 145                         } catch (IOException e) {
 146                                 // ignore
 147                         }
 148
 149                         if (v.size() == 0) {
 150                 return null;
 151             } else if (v.size() == 1) {
 152                 return new TermQuery(getTerm(field, (String) v.elementAt(0)));
 153             } else {
 154                                 PhraseQuery q = new PhraseQuery();
 155                                 q.setSlop(getPhraseSlop());
 156                                 for (int i = 0; i < v.size(); i++) {
 157                                         q.add(getTerm(field, (String) v.elementAt(i)));
 158                                 }
 159                                 return q;
 160                         }
 161                 }
 162
 163                 private Term getTerm(String field, String queryText) throws ParseException {
 164
 165                         try {
 166                                 SpellChecker spellChecker = new SpellChecker(directory);
 167                                 if (spellChecker.exist(queryText)) {
 168                                         return new Term(field, queryText);
 169                                 }
 170                                 String[] similarWords = spellChecker.suggestSimilar(queryText, 1);
 171                                 if (similarWords.length == 0) {
 172                                         return new Term(field, queryText);
 173                                 }
 174                                 suggestedQuery = true;
 175                                 return new Term(field, similarWords[0]);
 176                         } catch (IOException e) {
 177                                 throw new ParseException(e.getMessage());
 178                         }
 179                 }
 180                 public boolean hasSuggestedQuery() {
 181                         return suggestedQuery;
 182                 }
 183         }
 184
 185         @Override
 186     public void refresh() {
 187                 FullTextSession fullTextSession = Search.getFullTextSession(getSession());
 188                 SearchFactory searchFactory = fullTextSession.getSearchFactory();
 189                 try {
 190                         SpellChecker spellChecker = new SpellChecker(directory);
 191
 192                         for(Class<? extends T> indexedClass : indexedClasses) {
 193                                 //OLD
 194 //                              DirectoryProvider<?> directoryProvider = searchFactory.getDirectoryProviders(indexedClass)[0];
 195 //                              ReaderProvider readerProvider = searchFactory.getReaderProvider();
 196                                 IndexReaderAccessor ira = searchFactory.getIndexReaderAccessor();
 197 //                              IndexReader indexReader = ira.open(indexedClass);
 198                                 IndexReader indexReader = null;
 199
 200                                 try {
 201
 202                                         indexReader = ira.open(indexedClass);
 203 //                                      indexReader = readerProvider.openIndexReader(); //  .openReader(directoryProvider);
 204                                         log.debug("Creating new dictionary for words in " + defaultField + " docs " + indexReader.numDocs());
 205
 206                                         Dictionary dictionary = new LuceneDictionary(indexReader, defaultField);
 207                                         if(log.isDebugEnabled()) {
 208                                                 BytesRefIterator iterator = dictionary.getEntryIterator();
 209                                                 BytesRef bytesRef;
 210                                                 while((bytesRef = iterator.next())  != null) {
 211                                                         log.debug("Indexing word " + bytesRef);
 212                                                 }
 213                                         }
 214
 215
 216 //                                      OLD: spellChecker.indexDictionary(dictionary);
 217                                         //FIXME preliminary for Hibernate 4 migration see # 3344
 218                                         IndexWriterConfig config = new IndexWriterConfig( new StandardAnalyzer());
 219                                         boolean fullMerge = true;
 220                                         spellChecker.indexDictionary(dictionary, config, fullMerge);
 221
 222                                 } catch (CorruptIndexException cie) {
 223                                         log.error("Spellings index is corrupted", cie);
 224                                 } finally {
 225                                         if (indexReader != null) {
 226 //                                              readerProvider.closeIndexReader(indexReader);
 227                                                 ira.close(indexReader);
 228                                         }
 229                                 }
 230                         }
 231                 }catch (IOException ioe) {
 232                         log.error(ioe);
 233                 }
 234         }
 235
 236 }