From f1fd0785b079fa5cdd0f41c5ce539a5070595142 Mon Sep 17 00:00:00 2001 From: Andreas Kohlbecker Date: Wed, 25 Sep 2013 09:06:56 +0000 Subject: [PATCH] refactoring for a lucene cross index search --- .gitattributes | 2 + .../cdm/api/service/NameServiceImpl.java | 179 +++++++++--------- .../api/service/OccurrenceServiceImpl.java | 9 +- .../cdm/api/service/TaxonServiceImpl.java | 86 +++++---- .../search/ILuceneIndexToolProvider.java | 66 +++++++ .../search/LuceneIndexToolProviderImpl.java | 139 ++++++++++++++ .../api/service/search/LuceneMultiSearch.java | 21 +- .../cdm/api/service/search/LuceneSearch.java | 92 +++------ .../cdm/api/service/search/QueryFactory.java | 43 +++-- 9 files changed, 410 insertions(+), 227 deletions(-) create mode 100644 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/ILuceneIndexToolProvider.java create mode 100644 cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneIndexToolProviderImpl.java diff --git a/.gitattributes b/.gitattributes index b12ff1382a..1861093a78 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2117,7 +2117,9 @@ cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/pager/impl/StringLabe cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/CdmMassIndexer.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/DocumentSearchResult.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/ICdmMassIndexer.java -text +cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/ILuceneIndexToolProvider.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/ISearchResultBuilder.java -text +cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneIndexToolProviderImpl.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneMultiSearch.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneMultiSearchException.java -text cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneSearch.java -text diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NameServiceImpl.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NameServiceImpl.java index ca590b0fe9..8c908fe8c0 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NameServiceImpl.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/NameServiceImpl.java @@ -41,6 +41,7 @@ import eu.etaxonomy.cdm.api.service.pager.Pager; import eu.etaxonomy.cdm.api.service.pager.impl.AbstractPagerImpl; import eu.etaxonomy.cdm.api.service.pager.impl.DefaultPagerImpl; import eu.etaxonomy.cdm.api.service.search.DocumentSearchResult; +import eu.etaxonomy.cdm.api.service.search.ILuceneIndexToolProvider; import eu.etaxonomy.cdm.api.service.search.ISearchResultBuilder; import eu.etaxonomy.cdm.api.service.search.LuceneSearch; import eu.etaxonomy.cdm.api.service.search.QueryFactory; @@ -106,6 +107,8 @@ public class NameServiceImpl extends IdentifiableServiceBase clazz, - NonViralName nvn, - float accuracy, - int maxNoOfResults, - List languages, - boolean highlightFragments) { - String similarity = Float.toString(accuracy); - String searchSuffix = "~" + similarity; + NonViralName nvn, + float accuracy, + int maxNoOfResults, + List languages, + boolean highlightFragments) { + String similarity = Float.toString(accuracy); + String searchSuffix = "~" + similarity; - BooleanQuery finalQuery = new BooleanQuery(false); - BooleanQuery textQuery = new BooleanQuery(false); + BooleanQuery finalQuery = new BooleanQuery(false); + BooleanQuery textQuery = new BooleanQuery(false); - LuceneSearch luceneSearch = new LuceneSearch(getSession(), TaxonNameBase.class); - QueryFactory queryFactory = new QueryFactory(luceneSearch); + LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, TaxonNameBase.class); + QueryFactory queryFactory = luceneIndexToolProvider.newQueryFactoryFor(TaxonNameBase.class); // SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", SortField.STRING, false)}; // luceneSearch.setSortFields(sortFields); - // ---- search criteria - luceneSearch.setClazz(clazz); + // ---- search criteria + luceneSearch.setCdmTypRestriction(clazz); - FuzzyLikeThisQuery fltq = new FuzzyLikeThisQuery(maxNoOfResults, luceneSearch.getAnalyzer()); - if(nvn.getGenusOrUninomial() != null && !nvn.getGenusOrUninomial().equals("")) { - fltq.addTerms(nvn.getGenusOrUninomial().toLowerCase(), "genusOrUninomial", accuracy, 3); - } else { - //textQuery.add(new RegexQuery (new Term ("genusOrUninomial", "^[a-zA-Z]*")), Occur.MUST_NOT); - textQuery.add(queryFactory.newTermQuery("genusOrUninomial", "_null_", false), Occur.MUST); - } + FuzzyLikeThisQuery fltq = new FuzzyLikeThisQuery(maxNoOfResults, luceneSearch.getAnalyzer()); + if(nvn.getGenusOrUninomial() != null && !nvn.getGenusOrUninomial().equals("")) { + fltq.addTerms(nvn.getGenusOrUninomial().toLowerCase(), "genusOrUninomial", accuracy, 3); + } else { + //textQuery.add(new RegexQuery (new Term ("genusOrUninomial", "^[a-zA-Z]*")), Occur.MUST_NOT); + textQuery.add(queryFactory.newTermQuery("genusOrUninomial", "_null_", false), Occur.MUST); + } - if(nvn.getInfraGenericEpithet() != null && !nvn.getInfraGenericEpithet().equals("")){ - fltq.addTerms(nvn.getInfraGenericEpithet().toLowerCase(), "infraGenericEpithet", accuracy, 3); - } else { - //textQuery.add(new RegexQuery (new Term ("infraGenericEpithet", "^[a-zA-Z]*")), Occur.MUST_NOT); - textQuery.add(queryFactory.newTermQuery("infraGenericEpithet", "_null_", false), Occur.MUST); - } + if(nvn.getInfraGenericEpithet() != null && !nvn.getInfraGenericEpithet().equals("")){ + fltq.addTerms(nvn.getInfraGenericEpithet().toLowerCase(), "infraGenericEpithet", accuracy, 3); + } else { + //textQuery.add(new RegexQuery (new Term ("infraGenericEpithet", "^[a-zA-Z]*")), Occur.MUST_NOT); + textQuery.add(queryFactory.newTermQuery("infraGenericEpithet", "_null_", false), Occur.MUST); + } - if(nvn.getSpecificEpithet() != null && !nvn.getSpecificEpithet().equals("")){ - fltq.addTerms(nvn.getSpecificEpithet().toLowerCase(), "specificEpithet", accuracy, 3); - } else { - //textQuery.add(new RegexQuery (new Term ("specificEpithet", "^[a-zA-Z]*")), Occur.MUST_NOT); - textQuery.add(queryFactory.newTermQuery("specificEpithet", "_null_", false), Occur.MUST); - } + if(nvn.getSpecificEpithet() != null && !nvn.getSpecificEpithet().equals("")){ + fltq.addTerms(nvn.getSpecificEpithet().toLowerCase(), "specificEpithet", accuracy, 3); + } else { + //textQuery.add(new RegexQuery (new Term ("specificEpithet", "^[a-zA-Z]*")), Occur.MUST_NOT); + textQuery.add(queryFactory.newTermQuery("specificEpithet", "_null_", false), Occur.MUST); + } - if(nvn.getInfraSpecificEpithet() != null && !nvn.getInfraSpecificEpithet().equals("")){ - fltq.addTerms(nvn.getInfraSpecificEpithet().toLowerCase(), "infraSpecificEpithet", accuracy, 3); - } else { - //textQuery.add(new RegexQuery (new Term ("infraSpecificEpithet", "^[a-zA-Z]*")), Occur.MUST_NOT); - textQuery.add(queryFactory.newTermQuery("infraSpecificEpithet", "_null_", false), Occur.MUST); - } + if(nvn.getInfraSpecificEpithet() != null && !nvn.getInfraSpecificEpithet().equals("")){ + fltq.addTerms(nvn.getInfraSpecificEpithet().toLowerCase(), "infraSpecificEpithet", accuracy, 3); + } else { + //textQuery.add(new RegexQuery (new Term ("infraSpecificEpithet", "^[a-zA-Z]*")), Occur.MUST_NOT); + textQuery.add(queryFactory.newTermQuery("infraSpecificEpithet", "_null_", false), Occur.MUST); + } - if(nvn.getAuthorshipCache() != null && !nvn.getAuthorshipCache().equals("")){ - fltq.addTerms(nvn.getAuthorshipCache().toLowerCase(), "authorshipCache", accuracy, 3); - } else { - //textQuery.add(new RegexQuery (new Term ("authorshipCache", "^[a-zA-Z]*")), Occur.MUST_NOT); - } + if(nvn.getAuthorshipCache() != null && !nvn.getAuthorshipCache().equals("")){ + fltq.addTerms(nvn.getAuthorshipCache().toLowerCase(), "authorshipCache", accuracy, 3); + } else { + //textQuery.add(new RegexQuery (new Term ("authorshipCache", "^[a-zA-Z]*")), Occur.MUST_NOT); + } - textQuery.add(fltq, Occur.MUST); + textQuery.add(fltq, Occur.MUST); - finalQuery.add(textQuery, Occur.MUST); + finalQuery.add(textQuery, Occur.MUST); - luceneSearch.setQuery(finalQuery); + luceneSearch.setQuery(finalQuery); - if(highlightFragments){ - luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray()); - } - return luceneSearch; + if(highlightFragments){ + luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray()); + } + return luceneSearch; } protected LuceneSearch prepareFindByFuzzyNameCacheSearch(Class clazz, - String name, - float accuracy, - int maxNoOfResults, - List languages, + String name, + float accuracy, + int maxNoOfResults, + List languages, boolean highlightFragments) { - LuceneSearch luceneSearch = new LuceneSearch(getSession(), TaxonNameBase.class); - QueryFactory queryFactory = new QueryFactory(luceneSearch); + LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, TaxonNameBase.class); + QueryFactory queryFactory = luceneIndexToolProvider.newQueryFactoryFor(TaxonNameBase.class); // SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", SortField.STRING, false)}; // luceneSearch.setSortFields(sortFields); // ---- search criteria - luceneSearch.setClazz(clazz); + luceneSearch.setCdmTypRestriction(clazz); FuzzyLikeThisQuery fltq = new FuzzyLikeThisQuery(maxNoOfResults, luceneSearch.getAnalyzer()); fltq.addTerms(name, "nameCache", accuracy, 3); - BooleanQuery finalQuery = new BooleanQuery(false); + BooleanQuery finalQuery = new BooleanQuery(false); - finalQuery.add(fltq, Occur.MUST); + finalQuery.add(fltq, Occur.MUST); luceneSearch.setQuery(finalQuery); @@ -657,28 +660,28 @@ public class NameServiceImpl extends IdentifiableServiceBase clazz, - String name, - boolean wildcard, - List languages, + String name, + boolean wildcard, + List languages, boolean highlightFragments) { BooleanQuery finalQuery = new BooleanQuery(); BooleanQuery textQuery = new BooleanQuery(); - LuceneSearch luceneSearch = new LuceneSearch(getSession(), TaxonNameBase.class); - QueryFactory queryFactory = new QueryFactory(luceneSearch); + LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, TaxonNameBase.class); + QueryFactory queryFactory = luceneIndexToolProvider.newQueryFactoryFor(TaxonNameBase.class); // SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", SortField.STRING, false)}; // luceneSearch.setSortFields(sortFields); // ---- search criteria - luceneSearch.setClazz(clazz); + luceneSearch.setCdmTypRestriction(clazz); if(name != null && !name.equals("")) { - if(wildcard) { - textQuery.add(new WildcardQuery(new Term("nameCache", name + "*")), Occur.MUST); - } else { - textQuery.add(queryFactory.newTermQuery("nameCache", name, false), Occur.MUST); - } + if(wildcard) { + textQuery.add(new WildcardQuery(new Term("nameCache", name + "*")), Occur.MUST); + } else { + textQuery.add(queryFactory.newTermQuery("nameCache", name, false), Occur.MUST); + } } luceneSearch.setQuery(textQuery); @@ -698,13 +701,13 @@ public class NameServiceImpl extends IdentifiableServiceBase propertyPaths, int maxNoOfResults) throws CorruptIndexException, IOException, ParseException { - logger.info("Name to fuzzy search for : " + name); - // parse the input name - NonViralNameParserImpl parser = new NonViralNameParserImpl(); - NonViralName nvn = parser.parseFullName(name); - if(name != null && !name.equals("") && nvn == null) { - throw new ParseException("Could not parse name " + name); - } + logger.info("Name to fuzzy search for : " + name); + // parse the input name + NonViralNameParserImpl parser = new NonViralNameParserImpl(); + NonViralName nvn = parser.parseFullName(name); + if(name != null && !name.equals("") && nvn == null) { + throw new ParseException("Could not parse name " + name); + } LuceneSearch luceneSearch = prepareFindByFuzzyNameSearch(null, nvn, accuracy, maxNoOfResults, languages, highlightFragments); // --- execute search @@ -733,13 +736,13 @@ public class NameServiceImpl extends IdentifiableServiceBase findByFuzzyNameCacheSearch( String name, - float accuracy, + float accuracy, List languages, boolean highlightFragments, int maxNoOfResults) throws CorruptIndexException, IOException, ParseException { - logger.info("Name to fuzzy search for : " + name); + logger.info("Name to fuzzy search for : " + name); LuceneSearch luceneSearch = prepareFindByFuzzyNameCacheSearch(null, name, accuracy, maxNoOfResults, languages, highlightFragments); @@ -789,9 +792,9 @@ public class NameServiceImpl extends IdentifiableServiceBase 0){ // - http://www.javaranch.com/journal/2009/02/filtering-a-lucene-search.html @@ -1572,11 +1574,9 @@ public class TaxonServiceImpl extends IdentifiableServiceBase 0 ){ - finalQuery.add(queryFactory.newEntityUuidsQuery("feature.uuid", features), Occur.MUST); + finalQuery.add(descriptionElementQueryFactory.newEntityUuidsQuery("feature.uuid", features), Occur.MUST); } // the description must be associated with a taxon - finalQuery.add(queryFactory.newIsNotNullQuery("inDescription.taxon.id"), Occur.MUST); + finalQuery.add(descriptionElementQueryFactory.newIsNotNullQuery("inDescription.taxon.id"), Occur.MUST); logger.info("prepareByDescriptionElementFullTextSearch() query: " + finalQuery.toString()); luceneSearch.setQuery(finalQuery); if(highlightFragments){ - luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray()); + luceneSearch.setHighlightFields(descriptionElementQueryFactory.getTextFieldNamesAsArray()); } return luceneSearch; } diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/ILuceneIndexToolProvider.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/ILuceneIndexToolProvider.java new file mode 100644 index 0000000000..a699d0f887 --- /dev/null +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/ILuceneIndexToolProvider.java @@ -0,0 +1,66 @@ +// $Id$ +/** + * Copyright (C) 2013 EDIT + * European Distributed Institute of Taxonomy + * http://www.e-taxonomy.eu + * + * The contents of this file are subject to the Mozilla Public License Version 1.1 + * See LICENSE.TXT at the top of this package for the full license terms. + */ +package eu.etaxonomy.cdm.api.service.search; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queryParser.QueryParser; +import org.hibernate.search.indexes.IndexReaderAccessor; + +import eu.etaxonomy.cdm.model.common.CdmBase; + +/** + * @author a.kohlbecker + * @date Sep 18, 2013 + * + */ +public interface ILuceneIndexToolProvider { + + /** + * @return the IndexReader suitable for the lucene index of the given + * clazz + */ + public abstract IndexReader getIndexReaderFor(Class clazz); + + /** + * Either creates a new QueryParser or returns the QueryParser which has + * been created before for the specified class. The QueryParsers per CdmBase + * type are cached in a Map. + * + * @return the QueryParser suitable for the lucene index of the given + * clazz + */ + public abstract QueryParser getQueryParserFor(Class clazz); + + /** + * WARING The implementation of this method might return an Analyzer + * which is not suitable for all fields of the lucene document. This method + * internally uses the simplified method from {@link { + * @link org.hibernate.search.SearchFactory#getAnalyzer(Class)} + * + * @return the Analyzer suitable for the lucene index of the given + * clazz + */ + public abstract Analyzer getAnalyzerFor(Class clazz); + + /** + * Creates new QueryFactory for the specified Cdm type. + * + * @return A new QueryFactory suitable for the lucene index of the given + * clazz + */ + public abstract QueryFactory newQueryFactoryFor(Class clazz); + + /** + * @return the IndexReaderAccessor from the SearchFactory + */ + public abstract IndexReaderAccessor getIndexReaderAccessor(); + +} \ No newline at end of file diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneIndexToolProviderImpl.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneIndexToolProviderImpl.java new file mode 100644 index 0000000000..3fb815dd42 --- /dev/null +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneIndexToolProviderImpl.java @@ -0,0 +1,139 @@ +// $Id$ +/** + * Copyright (C) 2013 EDIT + * European Distributed Institute of Taxonomy + * http://www.e-taxonomy.eu + * + * The contents of this file are subject to the Mozilla Public License Version 1.1 + * See LICENSE.TXT at the top of this package for the full license terms. + */ +package eu.etaxonomy.cdm.api.service.search; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queryParser.QueryParser; +import org.hibernate.SessionFactory; +import org.hibernate.search.Search; +import org.hibernate.search.SearchFactory; +import org.hibernate.search.indexes.IndexReaderAccessor; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import eu.etaxonomy.cdm.config.Configuration; +import eu.etaxonomy.cdm.model.common.CdmBase; +import eu.etaxonomy.cdm.model.description.DescriptionElementBase; +import eu.etaxonomy.cdm.model.description.TextData; +import eu.etaxonomy.cdm.model.name.NonViralName; +import eu.etaxonomy.cdm.model.name.TaxonNameBase; +import eu.etaxonomy.cdm.model.taxon.Taxon; +import eu.etaxonomy.cdm.model.taxon.TaxonBase; + +/** + * @author a.kohlbecker + * @date Sep 18, 2013 + * + */ +@Component +public class LuceneIndexToolProviderImpl implements ILuceneIndexToolProvider { + + private final static String DEFAULT_QURERY_FIELD_NAME = "titleCache"; + + @Autowired + private SessionFactory sessionFactory; + + private final Map, QueryParser> queryParsers = new HashMap, QueryParser>(); + + /** + * @param sessionfactory + * @return + */ + private SearchFactory getCurrentSearchFactory() { + return Search.getFullTextSession(sessionFactory.getCurrentSession()).getSearchFactory(); + } + + + /** + * TODO the abstract base class DescriptionElementBase can not be used, so + * we are using an arbitrary subclass to find the DirectoryProvider, future + * versions of hibernate search my allow using abstract base classes see + * {@link http://stackoverflow.com/questions/492184/how-do-you-find-all-subclasses-of-a-given-class-in-java} + * + * @param type must not be null + * @return + */ + protected Class pushAbstractBaseTypeDown(Class type) { + if(type == null) { + throw new NullPointerException("parameter type must not be null"); + } + if (type.equals(DescriptionElementBase.class)) { + return TextData.class; + } + if (type.equals(TaxonBase.class)) { + return Taxon.class; + } + if (type.equals(TaxonNameBase.class)) { + return NonViralName.class; + } + return type; + } + + /* (non-Javadoc) + * @see eu.etaxonomy.cdm.api.service.search.ILuceneIndexToolProvider#getIndexReaderFor(java.lang.Class) + */ + @Override + public IndexReader getIndexReaderFor(Class clazz) { + IndexReader reader = getCurrentSearchFactory().getIndexReaderAccessor().open(pushAbstractBaseTypeDown(clazz)); + return reader; + } + + /* (non-Javadoc) + * @see eu.etaxonomy.cdm.api.service.search.ILuceneIndexToolProvider#getQueryParserFor(java.lang.Class) + */ + @Override + public QueryParser getQueryParserFor(Class clazz) { + if(!queryParsers.containsKey(clazz)){ + Analyzer analyzer = getAnalyzerFor(clazz); + QueryParser parser = new QueryParser(Configuration.luceneVersion, DEFAULT_QURERY_FIELD_NAME, analyzer); + queryParsers.put(clazz, parser); + } + return queryParsers.get(clazz); + } + + /** + * WARING This method might return an Analyzer + * which is not suitable for all fields of the lucene document. This method + * internally uses the simplified method from {@link { + * @link org.hibernate.search.SearchFactory#getAnalyzer(Class)} + * + * TODO implement method which allows to retrieve the correct Analyzer + * per document field, this method will have another signature. + * + * @return the Analyzer suitable for the lucene index of the given + * clazz + */ + @Override + public Analyzer getAnalyzerFor(Class clazz) { + Analyzer analyzer = getCurrentSearchFactory().getAnalyzer(pushAbstractBaseTypeDown(clazz)); + return analyzer; + } + + /* (non-Javadoc) + * @see eu.etaxonomy.cdm.api.service.search.ILuceneIndexToolProvider#getQueryFactoryFor(java.lang.Class) + */ + @Override + public QueryFactory newQueryFactoryFor(Class clazz){ + return new QueryFactory(this, pushAbstractBaseTypeDown(clazz)); + } + + /* (non-Javadoc) + * @see eu.etaxonomy.cdm.api.service.search.ILuceneIndexToolProvider#getIndexReaderAccessor() + */ + @Override + public IndexReaderAccessor getIndexReaderAccessor(){ + return getCurrentSearchFactory().getIndexReaderAccessor(); + } + +} \ No newline at end of file diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneMultiSearch.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneMultiSearch.java index 6bf69dae90..63e1187d27 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneMultiSearch.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneMultiSearch.java @@ -23,8 +23,6 @@ import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.SortField; -import org.hibernate.search.Search; -import org.hibernate.search.SearchFactory; import org.hibernate.search.indexes.IndexReaderAccessor; import eu.etaxonomy.cdm.model.common.CdmBase; @@ -48,9 +46,9 @@ public class LuceneMultiSearch extends LuceneSearch { * @param luceneSearch the searches to execute together as a union like search * @throws Exception */ - public LuceneMultiSearch(LuceneSearch... luceneSearch) throws LuceneMultiSearchException { + public LuceneMultiSearch(ILuceneIndexToolProvider toolProvider, LuceneSearch... luceneSearch) throws LuceneMultiSearchException { - session = luceneSearch[0].session; + this.toolProvider = toolProvider; groupByField = null; //reset BooleanQuery query = new BooleanQuery(); @@ -66,13 +64,13 @@ public class LuceneMultiSearch extends LuceneSearch { highlightFields.addAll(Arrays.asList(search.getHighlightFields())); // set the class for each of the sub searches - if(search.clazz != null){ - if(clazz != null && !clazz.equals(search.clazz)){ + if(search.cdmTypRestriction != null){ + if(cdmTypRestriction != null && !cdmTypRestriction.equals(search.cdmTypRestriction)){ throw new LuceneMultiSearchException( "LuceneMultiSearch can only handle once class restriction, but multiple given: " + - getClazz() + ", " + search.getClazz()); + getCdmTypRestriction() + ", " + search.getCdmTypRestriction()); } - setClazz(search.getClazz()); + setCdmTypRestriction(search.getCdmTypRestriction()); } // set the groupByField for each of the sub searches @@ -106,8 +104,6 @@ public class LuceneMultiSearch extends LuceneSearch { public IndexSearcher getSearcher() { if(searcher == null){ - - SearchFactory searchFactory = Search.getFullTextSession(session).getSearchFactory(); List readers = new ArrayList(); for(Class type : directorySelectClasses){ //OLD @@ -115,7 +111,7 @@ public class LuceneMultiSearch extends LuceneSearch { // logger.info(directoryProviders[0].getDirectory().toString()); // ReaderProvider readerProvider = searchFactory.getReaderProvider(); - IndexReaderAccessor ira = searchFactory.getIndexReaderAccessor(); + IndexReaderAccessor ira = toolProvider.getIndexReaderAccessor(); IndexReader reader = ira.open(type); // readers.add(readerProvider.openReader(directoryProviders[0])); readers.add(reader); @@ -140,10 +136,9 @@ public class LuceneMultiSearch extends LuceneSearch { */ @Override public Analyzer getAnalyzer() { - SearchFactory searchFactory = Search.getFullTextSession(session).getSearchFactory(); Analyzer analyzer = null; for(Class type : directorySelectClasses){ - Analyzer a = searchFactory.getAnalyzer(type); + Analyzer a = toolProvider.getAnalyzerFor(type); if(isEqual(analyzer, a)){ throw new RuntimeException("The LuceneMultiSearch must only be used on indexes which are using the same Analyzer."); } diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneSearch.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneSearch.java index a083460972..2ca3715d2c 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneSearch.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneSearch.java @@ -14,10 +14,8 @@ import java.util.Collection; import org.apache.log4j.Logger; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; -import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Filter; @@ -34,12 +32,8 @@ import org.apache.lucene.search.grouping.TermAllGroupsCollector; import org.apache.lucene.search.grouping.TermFirstPassGroupingCollector; import org.apache.lucene.search.grouping.TermSecondPassGroupingCollector; import org.apache.lucene.search.grouping.TopGroups; -import org.hibernate.Session; import org.hibernate.search.ProjectionConstants; -import org.hibernate.search.Search; -import org.hibernate.search.SearchFactory; -import eu.etaxonomy.cdm.config.Configuration; import eu.etaxonomy.cdm.model.common.CdmBase; import eu.etaxonomy.cdm.model.description.DescriptionElementBase; import eu.etaxonomy.cdm.model.description.TextData; @@ -62,7 +56,7 @@ public class LuceneSearch { public static final Logger logger = Logger.getLogger(LuceneSearch.class); - protected Session session; + protected ILuceneIndexToolProvider toolProvider; protected IndexSearcher searcher; @@ -79,11 +73,11 @@ public class LuceneSearch { /** * classFilter */ - protected Class clazz; + protected Class cdmTypRestriction; - public Class getClazz() { - return clazz; + public Class getCdmTypRestriction() { + return cdmTypRestriction; } /** @@ -105,7 +99,7 @@ public class LuceneSearch { * directorySelectClass the Class is set to null * @param clazz */ - public void setClazz(Class clazz) { + public void setCdmTypRestriction(Class clazz) { /* * NOTE: @@ -115,7 +109,7 @@ public class LuceneSearch { if(clazz != null && clazz.equals(directorySelectClass)){ clazz = null; } - this.clazz = clazz; + this.cdmTypRestriction = clazz; } /** @@ -144,40 +138,41 @@ public class LuceneSearch { /** * @param session */ - public LuceneSearch(Session session, Class directorySelectClass) { - this.session = session; + public LuceneSearch(ILuceneIndexToolProvider toolProvider, Class directorySelectClass) { + this.toolProvider = toolProvider; this.directorySelectClass = directorySelectClass; } /** * @param session */ - public LuceneSearch(Session session, String groupByField, Class directorySelectClass) { - this.session = session; - this.directorySelectClass = directorySelectClass; - this.groupByField = groupByField; + public LuceneSearch(ILuceneIndexToolProvider toolProvider, String groupByField, Class directorySelectClass) { + this.toolProvider = toolProvider; + this.directorySelectClass = directorySelectClass; + this.groupByField = groupByField; } /** * TODO the abstract base class DescriptionElementBase can not be used, so - * we are using an arbitraty subclass to find the DirectoryProvider, future + * we are using an arbitrary subclass to find the DirectoryProvider, future * versions of hibernate search my allow using abstract base classes see * {@link http://stackoverflow.com/questions/492184/how-do-you-find-all-subclasses-of-a-given-class-in-java} * * @param type must not be null * @return */ - protected Class pushAbstractBaseTypeDown(Class type) { + private Class pushAbstractBaseTypeDown(Class type) { + Class returnType = type; if (type.equals(DescriptionElementBase.class)) { - type = TextData.class; + returnType = TextData.class; } if (type.equals(TaxonBase.class)) { - type = Taxon.class; + returnType = Taxon.class; } if (type.equals(TaxonNameBase.class)) { - type = NonViralName.class; + returnType = NonViralName.class; } - return type; + return returnType; } protected LuceneSearch() { @@ -189,51 +184,26 @@ public class LuceneSearch { */ public IndexSearcher getSearcher() { if(searcher == null){ - searcher = new IndexSearcher(getIndexReader()); + searcher = new IndexSearcher(toolProvider.getIndexReaderFor(directorySelectClass)); searcher.setDefaultFieldSortScoring(true, true); } return searcher; } /** - * @return - */ - public IndexReader getIndexReader() { - SearchFactory searchFactory = Search.getFullTextSession(session).getSearchFactory(); - IndexReader reader = searchFactory.getIndexReaderAccessor().open(getDirectorySelectClass()); - return reader; - } - - /** - * @return - */ - public IndexReader getIndexReaderFor(Class clazz) { - SearchFactory searchFactory = Search.getFullTextSession(session).getSearchFactory(); - IndexReader reader = searchFactory.getIndexReaderAccessor().open(pushAbstractBaseTypeDown(clazz)); - return reader; - } - - /** - * @return - */ - public QueryParser getQueryParser() { - Analyzer analyzer = getAnalyzer(); - QueryParser parser = new QueryParser(Configuration.luceneVersion, "titleCache", analyzer); - return parser; - } - - /** - * @return + * Convenience method which delegated the call to the available + * {@link ILuceneIndexToolProvider#getAnalyzerFor(Class)} method. + * + * @return the Analyzer suitable for the directorySelectClass + * of the LuceneSearch */ public Analyzer getAnalyzer() { - SearchFactory searchFactory = Search.getFullTextSession(session).getSearchFactory(); - Analyzer analyzer = searchFactory.getAnalyzer(getDirectorySelectClass()); - return analyzer; + return toolProvider.getAnalyzerFor(directorySelectClass); } /** * @param luceneQueryString - * @param clazz the type as additional filter criterion + * @param cdmTypRestriction the type as additional filter criterion * @param pageSize if the page size is null or in an invalid range it will be set to MAX_HITS_ALLOWED * @param pageNumber a 0-based index of the page to return, will default to 0 if null or negative. * @return @@ -255,7 +225,7 @@ public class LuceneSearch { */ public Query parse(String luceneQueryString) throws ParseException { logger.debug("luceneQueryString to be parsed: " + luceneQueryString); - Query luceneQuery = getQueryParser().parse(luceneQueryString); + Query luceneQuery = toolProvider.getQueryParserFor(directorySelectClass).parse(luceneQueryString); return luceneQuery; } @@ -345,15 +315,15 @@ public class LuceneSearch { } /** - * @param clazz + * @param cdmTypRestriction */ protected Query expandQuery() { Query fullQuery; - if(clazz != null){ + if(cdmTypRestriction != null){ BooleanQuery filteredQuery = new BooleanQuery(); BooleanQuery classFilter = new BooleanQuery(); - Term t = new Term(ProjectionConstants.OBJECT_CLASS, clazz.getName()); + Term t = new Term(ProjectionConstants.OBJECT_CLASS, cdmTypRestriction.getName()); TermQuery termQuery = new TermQuery(t); classFilter.setBoost(0); diff --git a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/QueryFactory.java b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/QueryFactory.java index 6fa781512a..5053d79b4e 100644 --- a/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/QueryFactory.java +++ b/cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/QueryFactory.java @@ -45,6 +45,21 @@ import eu.etaxonomy.cdm.model.common.IdentifiableEntity; import eu.etaxonomy.cdm.model.common.Language; /** + * QueryFactory creates queries for a specific lucene index that means queries + * specific to the various CDM base types. Therefore the QueryFactory hold a + * reference to a {@link LuceneSearch} instance which has been created for a + * CDM base type.
+ * The field names used in queries created on free text fields are remembered + * and can be accessed by {@link #getTextFieldNames()} or {@link #getTextFieldNamesAsArray()}. + * This is useful for highlighting the matches with {@link LuceneSearch#setHighlightFields(String[])} + *

+ * The index specific methods from {@link LuceneSearch} which are + * used by QueryFactory directly or indirectly are: + *

    + *
  • {@link LuceneSearch#getAnalyzer()}
  • + *
+ * + * * @author a.kohlbecker * @date Sep 14, 2012 * @@ -53,13 +68,13 @@ public class QueryFactory { public static final Logger logger = Logger.getLogger(QueryFactory.class); - private final LuceneSearch luceneSearch; + protected ILuceneIndexToolProvider toolProvider; Set textFieldNames = new HashSet(); Map, IndexSearcher> indexSearcherMap = new HashMap, IndexSearcher>(); - private BooleanQuery finalQuery; + private final Class cdmBaseType; public Set getTextFieldNames() { return textFieldNames; @@ -69,9 +84,9 @@ public class QueryFactory { return textFieldNames.toArray(new String[textFieldNames.size()]); } - - public QueryFactory(LuceneSearch luceneSearch){ - this.luceneSearch = luceneSearch; + public QueryFactory(ILuceneIndexToolProvider toolProvider, Class cdmBaseType){ + this.cdmBaseType = cdmBaseType; + this.toolProvider = toolProvider; } /** @@ -96,7 +111,7 @@ public class QueryFactory { // in order to support the full query syntax we must use the parser // here try { - return luceneSearch.parse(luceneQueryString); + return toolProvider.getQueryParserFor(cdmBaseType).parse(luceneQueryString); } catch (ParseException e) { logger.error(e); } @@ -219,7 +234,7 @@ public class QueryFactory { * @param entity * @return */ - private Query newEntityUuidQuery(String uuidFieldName, IdentifiableEntity entity) { + public Query newEntityUuidQuery(String uuidFieldName, IdentifiableEntity entity) { return newTermQuery(uuidFieldName, entity.getUuid().toString(), false); } @@ -261,18 +276,6 @@ public class QueryFactory { return uuidInQuery; } - public void setFinalQuery(BooleanQuery finalQuery) { - this.finalQuery = finalQuery; - } - - public BooleanQuery getFinalQuery(){ - return finalQuery; - } - - public LuceneSearch getLuceneSearch() { - return luceneSearch; - } - /** * Returns a Lucene Query which rely on double numeric range query @@ -341,7 +344,7 @@ public class QueryFactory { private IndexSearcher indexSearcherFor(Class clazz) { if(indexSearcherMap.get(clazz) == null){ - IndexReader indexReader = luceneSearch.getIndexReaderFor(clazz); + IndexReader indexReader = toolProvider.getIndexReaderFor(clazz); IndexSearcher searcher = new IndexSearcher(indexReader); searcher.setDefaultFieldSortScoring(true, true); indexSearcherMap.put(clazz, searcher); -- 2.34.1