fixing stack overflow due to incorrect usage of freetext sessions
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / search / CdmMassIndexer.java
index db4ecf3cee385a3dc96def7461ad74ea1fe45527..6390309cffa4a2bde98f607c58f3842d7a236b12 100644 (file)
@@ -9,7 +9,19 @@
 */
 package eu.etaxonomy.cdm.api.service.search;
 
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
 import org.apache.log4j.Logger;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.spell.Dictionary;
+import org.apache.lucene.search.spell.LuceneDictionary;
+import org.apache.lucene.search.spell.SpellChecker;
+import org.apache.lucene.store.Directory;
 import org.hibernate.CacheMode;
 import org.hibernate.FlushMode;
 import org.hibernate.ScrollMode;
@@ -17,6 +29,9 @@ import org.hibernate.ScrollableResults;
 import org.hibernate.Session;
 import org.hibernate.search.FullTextSession;
 import org.hibernate.search.Search;
+import org.hibernate.search.engine.spi.SearchFactoryImplementor;
+import org.hibernate.search.indexes.impl.DirectoryBasedIndexManager;
+import org.hibernate.search.indexes.spi.IndexManager;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.orm.hibernate4.HibernateTransactionManager;
 import org.springframework.stereotype.Component;
@@ -27,8 +42,12 @@ import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
 import eu.etaxonomy.cdm.common.monitor.NullProgressMonitor;
 import eu.etaxonomy.cdm.common.monitor.RestServiceProgressMonitor;
 import eu.etaxonomy.cdm.common.monitor.SubProgressMonitor;
+import eu.etaxonomy.cdm.config.Configuration;
 import eu.etaxonomy.cdm.model.common.CdmBase;
 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
+import eu.etaxonomy.cdm.model.name.NonViralName;
+import eu.etaxonomy.cdm.model.name.TaxonNameBase;
+import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationBase;
 import eu.etaxonomy.cdm.model.taxon.Classification;
 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
 
@@ -43,7 +62,7 @@ public class CdmMassIndexer implements ICdmMassIndexer {
 
     public static final Logger logger = Logger.getLogger(CdmMassIndexer.class);
 
-    private static final int BATCH_SIZE = 100;
+    private static final int BATCH_SIZE = 2000;
 
     public HibernateTransactionManager transactionManager;
 
@@ -70,7 +89,7 @@ public class CdmMassIndexer implements ICdmMassIndexer {
         Long countResult = countEntities(type);
         int numOfBatches = calculateNumOfBatches(countResult);
 
-        SubProgressMonitor subMonitor = new SubProgressMonitor(monitor, numOfBatches);
+        SubProgressMonitor subMonitor = new SubProgressMonitor(monitor, 1);
         subMonitor.beginTask("Indexing " + type.getSimpleName(), numOfBatches);
 
         // Scrollable results will avoid loading too many objects in memory
@@ -86,9 +105,8 @@ public class CdmMassIndexer implements ICdmMassIndexer {
                     batchesWorked++;
                     fullTextSession.flushToIndexes(); // apply changes to indexes
                     fullTextSession.clear(); // clear since the queue is processed
-                    //                calculateNumOfBatches(index == countResult ? countResult : index);
+                    subMonitor.worked(1);
                     logger.info("\tbatch " + batchesWorked + "/" + numOfBatches + " processed");
-                    subMonitor.internalWorked(1);
                     //if(index / BATCH_SIZE > 10 ) break;
                 }
             }
@@ -102,6 +120,69 @@ public class CdmMassIndexer implements ICdmMassIndexer {
         subMonitor.done();
     }
 
+    /**
+     *
+     *
+     * @param type
+     * @param monitor
+     */
+    protected <T extends CdmBase> void createDictionary(Class<T> type, IProgressMonitor monitor)  {
+        String indexName = null;
+        if(type.isAnnotationPresent(org.hibernate.search.annotations.Indexed.class)) {
+               indexName = type.getAnnotation(org.hibernate.search.annotations.Indexed.class).index();
+        } else {
+               //TODO:give some indication that this class is infact not indexed
+               return;
+        }
+        SearchFactoryImplementor searchFactory = (SearchFactoryImplementor)Search.getFullTextSession(getSession()).getSearchFactory();
+        IndexManager indexManager = searchFactory.getAllIndexesManager().getIndexManager(indexName);
+        IndexReader indexReader = searchFactory.getIndexReaderAccessor().open(type);
+       List<String> idFields = getIndexedDeclaredFields(type);
+
+        monitor.subTask("creating dictionary " + type.getSimpleName());
+
+        SubProgressMonitor subMonitor = new SubProgressMonitor(monitor, 1);
+        subMonitor.beginTask("Creating dictionary " + type.getSimpleName(), 1);
+
+        Directory directory = ((DirectoryBasedIndexManager) indexManager).getDirectoryProvider().getDirectory();
+        SpellChecker spellChecker = null;
+       try {
+               spellChecker = new SpellChecker(directory);
+               Iterator<String> itr = idFields.iterator();
+               while(itr.hasNext()) {
+                       String indexedField = itr.next();
+                       logger.info("creating dictionary for field " + indexedField);
+                       Dictionary dictionary = new LuceneDictionary(indexReader, indexedField);
+                       IndexWriterConfig iwc = new IndexWriterConfig(Configuration.luceneVersion, searchFactory.getAnalyzer(type));
+                       spellChecker.indexDictionary(dictionary, iwc, true);
+               }
+               subMonitor.internalWorked(1);
+       } catch (IOException e) {
+               logger.error("IOException when creating dictionary", e);
+               //TODO better means to notify that the process has been stopped, using the STOPPED_WORK_INDICATOR is only a hack
+            monitor.worked(RestServiceProgressMonitor.STOPPED_WORK_INDICATOR);
+            monitor.done();
+       } catch (RuntimeException e) {
+               logger.error("RuntimeException when creating dictionary", e);
+               //TODO better means to notify that the process has been stopped, using the STOPPED_WORK_INDICATOR is only a hack
+            monitor.worked(RestServiceProgressMonitor.STOPPED_WORK_INDICATOR);
+            monitor.done();
+       } finally {
+               searchFactory.getIndexReaderAccessor().close(indexReader);
+       }
+       if (spellChecker != null) {
+               try {
+                       logger.info("closing spellchecker ");
+                       spellChecker.close();
+               } catch (IOException e) {
+                       logger.error("IOException when closing spellchecker", e);
+               }
+       }
+
+       logger.info("end creating dictionary " + type.getName());
+       subMonitor.done();
+    }
+
     /**
      * @param countResult
      * @return
@@ -126,6 +207,30 @@ public class CdmMassIndexer implements ICdmMassIndexer {
         FullTextSession fullTextSession = Search.getFullTextSession(getSession());
         logger.info("purging " + type.getName());
         fullTextSession.purgeAll(type);
+
+
+        SearchFactoryImplementor searchFactory = (SearchFactoryImplementor)fullTextSession.getSearchFactory();
+        IndexManager indexManager = searchFactory.getAllIndexesManager().getIndexManager(type.getName());
+        Directory directory = ((DirectoryBasedIndexManager) indexManager).getDirectoryProvider().getDirectory();
+        SpellChecker spellChecker = null;
+       try {
+               spellChecker = new SpellChecker(directory);
+               spellChecker.clearIndex();
+       } catch (IOException e) {
+               logger.error("IOException when creating dictionary", e);
+               //TODO better means to notify that the process has been stopped, using the STOPPED_WORK_INDICATOR is only a hack
+            monitor.worked(RestServiceProgressMonitor.STOPPED_WORK_INDICATOR);
+            monitor.done();
+       }
+
+       if (spellChecker != null) {
+               try {
+                       logger.info("closing spellchecker ");
+                       spellChecker.close();
+               } catch (IOException e) {
+                       logger.error("IOException when closing spellchecker", e);
+               }
+       }
     }
 
 
@@ -140,26 +245,44 @@ public class CdmMassIndexer implements ICdmMassIndexer {
         }
 
         monitor.setTaskName("CdmMassIndexer");
-        int steps = totalBatchCount() + 1; // +1 for optimize
+        int steps = indexedClasses().length + 1; // +1 for optimize
         monitor.beginTask("Reindexing " + indexedClasses().length + " classes", steps);
 
-        for(Class type : indexedClasses()){
+        for(Class<? extends CdmBase> type : indexedClasses()){
             reindex(type, monitor);
+            // clear the session after each class to free memory
+            getSession().clear();
         }
-        optimize(monitor);
+
+        optimize();
+        monitor.worked(1);
+
         monitor.done();
     }
 
-    protected void optimize(IProgressMonitor monitor) {
+       @Override
+       public void createDictionary(IProgressMonitor monitor) {
+        if(monitor == null){
+            monitor = new NullProgressMonitor();
+        }
 
-        monitor.subTask("optimizing");
-        SubProgressMonitor subMonitor = new SubProgressMonitor(monitor, 1);
+        monitor.setTaskName("CdmMassIndexer_Dictionary");
+        int steps = dictionaryClasses().length; // +1 for optimize
+        monitor.beginTask("Creating Dictionary " + dictionaryClasses().length + " classes", steps);
+
+        for(Class type : dictionaryClasses()){
+               createDictionary(type, monitor);
+        }
+
+        monitor.done();
+
+       }
+    protected void optimize() {
 
         FullTextSession fullTextSession = Search.getFullTextSession(getSession());
         fullTextSession.getSearchFactory().optimize();
-
-        subMonitor.beginTask("optimizing", 1);
-        subMonitor.done();
+        fullTextSession.flushToIndexes();
+        fullTextSession.clear();
     }
 
     /**
@@ -187,31 +310,71 @@ public class CdmMassIndexer implements ICdmMassIndexer {
         int steps = indexedClasses().length + 1; // +1 for optimize
         monitor.beginTask("Purging " + indexedClasses().length + " classes", steps);
 
-        for(Class type : indexedClasses()){
+        for(Class<? extends CdmBase> type : indexedClasses()){
             purge(type, monitor);
             monitor.worked(1);
         }
+        // need to flush to the index before optimizing
+        // the purge method is not doing the flushing by itself
+        FullTextSession fullTextSession = Search.getFullTextSession(getSession());
+        fullTextSession.flushToIndexes();
 
-//        // need to commit and start new transaction before optimizing
-//        FullTextSession fullTextSession = Search.getFullTextSession(getSession());
-//        Transaction tx = fullTextSession.getTransaction();
-//        tx.commit();
-//        fullTextSession.beginTransaction(); // will be committed automatically at the end of this method since this class is transactional
-
-//        optimize(monitor);
+        // optimize
+        optimize();
+        monitor.worked(1);
 
+        // done
         monitor.done();
     }
 
+
     /**
+     * Returns a list of declared indexable fields within a class through reflection.
+     *
+     * @param clazz
      * @return
      */
+    private List<String> getIndexedDeclaredFields(Class clazz) {
+       List<String> idFields = new ArrayList<String>();
+       if(clazz.isAnnotationPresent(org.hibernate.search.annotations.Indexed.class)) {
+               Field[] declaredFields = clazz.getDeclaredFields();
+               for(int i=0;i<declaredFields.length;i++ ) {
+                       logger.info("checking field " + declaredFields[i].getName());
+                       if(declaredFields[i].isAnnotationPresent(org.hibernate.search.annotations.Field.class) ||
+                                       declaredFields[i].isAnnotationPresent(org.hibernate.search.annotations.Fields.class)) {
+                               idFields.add(declaredFields[i].getName());
+                               logger.info("adding field " + declaredFields[i].getName());
+                       }
+               }
+       }
+       return idFields;
+    }
+    /**
+     * @return
+     */
+    @SuppressWarnings("unchecked")
     @Override
-    public Class[] indexedClasses() {
+    public Class<? extends CdmBase>[] indexedClasses() {
         return new Class[] {
                 DescriptionElementBase.class,
                 Classification.class,
-                TaxonBase.class
+                TaxonBase.class,
+                TaxonNameBase.class,
+                SpecimenOrObservationBase.class
                 };
     }
+
+    /**
+     * @return
+     */
+    @Override
+    public Class[] dictionaryClasses() {
+        return new Class[] {
+                       NonViralName.class
+                };
+    }
+
+
+
+
 }
\ No newline at end of file