#4716 removing deprecated usage of new BooleanQuery, using BooleanQuery.Builder instead
[cdmlib.git] / cdmlib-services / src / main / java / eu / etaxonomy / cdm / api / service / search / LuceneSearch.java
index 750fc2e4911a7809288c5b534771052525c3e0d8..57d338a00bb7e4e817f2e2c333137b8122ecc00c 100644 (file)
 package eu.etaxonomy.cdm.api.service.search;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.Collection;
 
 import org.apache.log4j.Logger;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BooleanQuery.Builder;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MultiCollector;
@@ -24,10 +28,11 @@ import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.grouping.GroupDocs;
 import org.apache.lucene.search.grouping.SearchGroup;
-import org.apache.lucene.search.grouping.TermAllGroupsCollector;
-import org.apache.lucene.search.grouping.TermFirstPassGroupingCollector;
-import org.apache.lucene.search.grouping.TermSecondPassGroupingCollector;
 import org.apache.lucene.search.grouping.TopGroups;
+import org.apache.lucene.search.grouping.term.TermAllGroupsCollector;
+import org.apache.lucene.search.grouping.term.TermFirstPassGroupingCollector;
+import org.apache.lucene.search.grouping.term.TermSecondPassGroupingCollector;
+import org.apache.lucene.util.BytesRef;
 
 import eu.etaxonomy.cdm.model.common.CdmBase;
 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
@@ -115,7 +120,7 @@ public class LuceneSearch {
      */
     public final int MAX_HITS_ALLOWED = 10000;
 
-    protected Query query;
+    protected BooleanQuery query;
 
     protected String[] highlightFields = new String[0];
 
@@ -180,7 +185,7 @@ public class LuceneSearch {
     public IndexSearcher getSearcher() {
         if(searcher == null){
             searcher = new IndexSearcher(toolProvider.getIndexReaderFor(directorySelectClass));
-            searcher.setDefaultFieldSortScoring(true, true);
+//            searcher.setDefaultFieldSortScoring(true, true);
         }
         return searcher;
     }
@@ -208,8 +213,7 @@ public class LuceneSearch {
     public TopGroupsWithMaxScore executeSearch(String luceneQueryString, Integer pageSize, Integer pageNumber) throws ParseException, IOException {
 
         Query luceneQuery = parse(luceneQueryString);
-        this.query = luceneQuery;
-
+        setQuery(luceneQuery);
         return executeSearch(pageSize, pageNumber);
     }
 
@@ -264,9 +268,6 @@ public class LuceneSearch {
         Sort groupSort = null;
         Sort withinGroupSort = Sort.RELEVANCE;
         if(sortFields != null && sortFields.length > 0){
-            if(sortFields[0] != SortField.FIELD_SCORE){
-                throw new RuntimeException("Fist sort field must be SortField.FIELD_SCORE");
-            }
             groupSort = new Sort(sortFields);
         } else {
             groupSort = Sort.RELEVANCE; // == SortField.FIELD_SCORE !!
@@ -274,37 +275,46 @@ public class LuceneSearch {
 
         // perform the search (needs two passes for grouping)
         if(logger.isDebugEnabled()){
-            logger.debug("Grouping: sortFields=" + sortFields + ", groupByField=" + groupByField +
+            logger.debug("Grouping: sortFields=" + Arrays.toString(sortFields) + ", groupByField=" + groupByField +
                     ", groupSort=" + groupSort + ", withinGroupSort=" + withinGroupSort + ", limit=" + limit + ", maxDocsPerGroup="+ maxDocsPerGroup);
         }
         // - first pass
-        TermFirstPassGroupingCollector firstPassCollector = new TermFirstPassGroupingCollector(groupByField, withinGroupSort, limit);
+        TermFirstPassGroupingCollector firstPassCollector = new TermFirstPassGroupingCollector(
+                groupByField, groupSort, limit);
 
         getSearcher().search(fullQuery, filter , firstPassCollector);
-        Collection<SearchGroup<String>> topGroups = firstPassCollector.getTopGroups(0, true); // no offset here since we need the first item for the max score
+        Collection<SearchGroup<BytesRef>> topGroups = firstPassCollector.getTopGroups(0, true); // no offset here since we need the first item for the max score
 
         if (topGroups == null) {
               return null;
         }
-        // - second pass
-        boolean getScores = true;
+        // - flags for second pass
+        boolean getScores = false;
         boolean getMaxScores = true;
+        if(groupSort.getSort()[0] != SortField.FIELD_SCORE){
+            getMaxScores = false;
+            // see inner class TopGroupsWithMaxScore
+            logger.error("Fist sort field must be SortField.FIELD_SCORE otherwise the max score value will not be correct! MaxScore calculation will be skipped");
+        }
         boolean fillFields = true;
         TermAllGroupsCollector allGroupsCollector = new TermAllGroupsCollector(groupByField);
         TermSecondPassGroupingCollector secondPassCollector = new TermSecondPassGroupingCollector(
-                groupByField, topGroups, groupSort, withinGroupSort, maxDocsPerGroup , getScores, getMaxScores, fillFields
+                groupByField, topGroups, groupSort, withinGroupSort, maxDocsPerGroup , getScores,
+                getMaxScores, fillFields
                 );
         getSearcher().search(fullQuery, filter, MultiCollector.wrap(secondPassCollector, allGroupsCollector));
 
-        TopGroups<String> groupsResult = secondPassCollector.getTopGroups(0); // no offset here since we need the first item for the max score
+        TopGroups<BytesRef> groupsResult = secondPassCollector.getTopGroups(0); // no offset here since we need the first item for the max score
 
         // get max score from very first result
         float maxScore = groupsResult.groups[0].maxScore;
         if(logger.isDebugEnabled()){
             logger.debug("TopGroups: maxScore=" + maxScore + ", offset=" + offset +
-                    ", totalGroupCount=" + allGroupsCollector.getGroupCount() + ", totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
+                    ", totalGroupCount=" + allGroupsCollector.getGroupCount() +
+                    ", totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
         }
-        TopGroupsWithMaxScore topGroupsWithMaxScore = new TopGroupsWithMaxScore(groupsResult, offset, allGroupsCollector.getGroupCount(), maxScore);
+        TopGroupsWithMaxScore topGroupsWithMaxScore = new TopGroupsWithMaxScore(groupsResult,
+                offset, allGroupsCollector.getGroupCount(), maxScore);
 
         return topGroupsWithMaxScore;
     }
@@ -314,7 +324,7 @@ public class LuceneSearch {
      * <code>cdmTypeRestriction</code> is not <code>NULL</code>
      */
     protected Query expandQuery() {
-        Query fullQuery;
+        BooleanQuery fullQuery;
         if(cdmTypeRestriction != null){
             fullQuery = QueryFactory.addTypeRestriction(query, cdmTypeRestriction);
         } else {
@@ -324,10 +334,15 @@ public class LuceneSearch {
     }
 
     public void setQuery(Query query) {
-        this.query = query;
+        if( query instanceof BooleanQuery) {
+            this.query = (BooleanQuery)query;
+        } else {
+            Builder builder = new Builder();
+            this.query = builder.add(query, Occur.MUST).build();
+        }
     }
 
-    public Query getQuery() {
+    public BooleanQuery getQuery() {
         return query;
     }
 
@@ -360,27 +375,28 @@ public class LuceneSearch {
      *
      */
     public class TopGroupsWithMaxScore{
-        public TopGroups<String> topGroups;
+        public TopGroups<BytesRef> topGroups;
         public float maxScore = Float.NaN;
 
-        TopGroupsWithMaxScore(TopGroups<String> topGroups, int offset, int totalGroupCount, float maxScore){
+        TopGroupsWithMaxScore(TopGroups<BytesRef> topGroups, int offset, int totalGroupCount, float maxScore){
             this.maxScore = maxScore;
-            TopGroups<String> newTopGroups;
+            TopGroups<BytesRef> newTopGroups;
             if(offset > 0){
-                GroupDocs<String>[] newGroupDocs = new GroupDocs[topGroups.groups.length - offset];
+                GroupDocs<BytesRef>[] newGroupDocs = new GroupDocs[topGroups.groups.length - offset];
                 for(int i = offset; i < topGroups.groups.length; i++){
                     newGroupDocs[i - offset] = topGroups.groups[i];
                 }
-                newTopGroups = new TopGroups<String>(
+                newTopGroups = new TopGroups<BytesRef>(
                             topGroups.groupSort,
                             topGroups.withinGroupSort,
                             topGroups.totalHitCount,
                             topGroups.totalGroupedHitCount,
-                            newGroupDocs);
+                            newGroupDocs,
+                            maxScore);
             } else {
                 newTopGroups = topGroups;
             }
-            this.topGroups = new TopGroups<String>(newTopGroups, totalGroupCount);
+            this.topGroups = new TopGroups<BytesRef>(newTopGroups, totalGroupCount);
         }
 
     }