package eu.etaxonomy.cdm.api.service.search;
import java.io.IOException;
+import java.util.Arrays;
import java.util.Collection;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BooleanQuery.Builder;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.grouping.GroupDocs;
import org.apache.lucene.search.grouping.SearchGroup;
-import org.apache.lucene.search.grouping.TermAllGroupsCollector;
-import org.apache.lucene.search.grouping.TermFirstPassGroupingCollector;
-import org.apache.lucene.search.grouping.TermSecondPassGroupingCollector;
import org.apache.lucene.search.grouping.TopGroups;
+import org.apache.lucene.search.grouping.term.TermAllGroupsCollector;
+import org.apache.lucene.search.grouping.term.TermFirstPassGroupingCollector;
+import org.apache.lucene.search.grouping.term.TermSecondPassGroupingCollector;
+import org.apache.lucene.util.BytesRef;
import eu.etaxonomy.cdm.model.common.CdmBase;
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
*/
public final int MAX_HITS_ALLOWED = 10000;
- protected Query query;
+ protected BooleanQuery query;
protected String[] highlightFields = new String[0];
public IndexSearcher getSearcher() {
if(searcher == null){
searcher = new IndexSearcher(toolProvider.getIndexReaderFor(directorySelectClass));
- searcher.setDefaultFieldSortScoring(true, true);
+// searcher.setDefaultFieldSortScoring(true, true);
}
return searcher;
}
public TopGroupsWithMaxScore executeSearch(String luceneQueryString, Integer pageSize, Integer pageNumber) throws ParseException, IOException {
Query luceneQuery = parse(luceneQueryString);
- this.query = luceneQuery;
-
+ setQuery(luceneQuery);
return executeSearch(pageSize, pageNumber);
}
Sort groupSort = null;
Sort withinGroupSort = Sort.RELEVANCE;
if(sortFields != null && sortFields.length > 0){
- if(sortFields[0] != SortField.FIELD_SCORE){
- throw new RuntimeException("Fist sort field must be SortField.FIELD_SCORE");
- }
groupSort = new Sort(sortFields);
} else {
groupSort = Sort.RELEVANCE; // == SortField.FIELD_SCORE !!
// perform the search (needs two passes for grouping)
if(logger.isDebugEnabled()){
- logger.debug("Grouping: sortFields=" + sortFields + ", groupByField=" + groupByField +
+ logger.debug("Grouping: sortFields=" + Arrays.toString(sortFields) + ", groupByField=" + groupByField +
", groupSort=" + groupSort + ", withinGroupSort=" + withinGroupSort + ", limit=" + limit + ", maxDocsPerGroup="+ maxDocsPerGroup);
}
// - first pass
- TermFirstPassGroupingCollector firstPassCollector = new TermFirstPassGroupingCollector(groupByField, withinGroupSort, limit);
+ TermFirstPassGroupingCollector firstPassCollector = new TermFirstPassGroupingCollector(
+ groupByField, groupSort, limit);
getSearcher().search(fullQuery, filter , firstPassCollector);
- Collection<SearchGroup<String>> topGroups = firstPassCollector.getTopGroups(0, true); // no offset here since we need the first item for the max score
+ Collection<SearchGroup<BytesRef>> topGroups = firstPassCollector.getTopGroups(0, true); // no offset here since we need the first item for the max score
if (topGroups == null) {
return null;
}
- // - second pass
- boolean getScores = true;
+ // - flags for second pass
+ boolean getScores = false;
boolean getMaxScores = true;
+ if(groupSort.getSort()[0] != SortField.FIELD_SCORE){
+ getMaxScores = false;
+ // see inner class TopGroupsWithMaxScore
+ logger.error("Fist sort field must be SortField.FIELD_SCORE otherwise the max score value will not be correct! MaxScore calculation will be skipped");
+ }
boolean fillFields = true;
TermAllGroupsCollector allGroupsCollector = new TermAllGroupsCollector(groupByField);
TermSecondPassGroupingCollector secondPassCollector = new TermSecondPassGroupingCollector(
- groupByField, topGroups, groupSort, withinGroupSort, maxDocsPerGroup , getScores, getMaxScores, fillFields
+ groupByField, topGroups, groupSort, withinGroupSort, maxDocsPerGroup , getScores,
+ getMaxScores, fillFields
);
getSearcher().search(fullQuery, filter, MultiCollector.wrap(secondPassCollector, allGroupsCollector));
- TopGroups<String> groupsResult = secondPassCollector.getTopGroups(0); // no offset here since we need the first item for the max score
+ TopGroups<BytesRef> groupsResult = secondPassCollector.getTopGroups(0); // no offset here since we need the first item for the max score
// get max score from very first result
float maxScore = groupsResult.groups[0].maxScore;
if(logger.isDebugEnabled()){
logger.debug("TopGroups: maxScore=" + maxScore + ", offset=" + offset +
- ", totalGroupCount=" + allGroupsCollector.getGroupCount() + ", totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
+ ", totalGroupCount=" + allGroupsCollector.getGroupCount() +
+ ", totalGroupedHitCount=" + groupsResult.totalGroupedHitCount);
}
- TopGroupsWithMaxScore topGroupsWithMaxScore = new TopGroupsWithMaxScore(groupsResult, offset, allGroupsCollector.getGroupCount(), maxScore);
+ TopGroupsWithMaxScore topGroupsWithMaxScore = new TopGroupsWithMaxScore(groupsResult,
+ offset, allGroupsCollector.getGroupCount(), maxScore);
return topGroupsWithMaxScore;
}
* <code>cdmTypeRestriction</code> is not <code>NULL</code>
*/
protected Query expandQuery() {
- Query fullQuery;
+ BooleanQuery fullQuery;
if(cdmTypeRestriction != null){
fullQuery = QueryFactory.addTypeRestriction(query, cdmTypeRestriction);
} else {
}
public void setQuery(Query query) {
- this.query = query;
+ if( query instanceof BooleanQuery) {
+ this.query = (BooleanQuery)query;
+ } else {
+ Builder builder = new Builder();
+ this.query = builder.add(query, Occur.MUST).build();
+ }
}
- public Query getQuery() {
+ public BooleanQuery getQuery() {
return query;
}
*
*/
public class TopGroupsWithMaxScore{
- public TopGroups<String> topGroups;
+ public TopGroups<BytesRef> topGroups;
public float maxScore = Float.NaN;
- TopGroupsWithMaxScore(TopGroups<String> topGroups, int offset, int totalGroupCount, float maxScore){
+ TopGroupsWithMaxScore(TopGroups<BytesRef> topGroups, int offset, int totalGroupCount, float maxScore){
this.maxScore = maxScore;
- TopGroups<String> newTopGroups;
+ TopGroups<BytesRef> newTopGroups;
if(offset > 0){
- GroupDocs<String>[] newGroupDocs = new GroupDocs[topGroups.groups.length - offset];
+ GroupDocs<BytesRef>[] newGroupDocs = new GroupDocs[topGroups.groups.length - offset];
for(int i = offset; i < topGroups.groups.length; i++){
newGroupDocs[i - offset] = topGroups.groups[i];
}
- newTopGroups = new TopGroups<String>(
+ newTopGroups = new TopGroups<BytesRef>(
topGroups.groupSort,
topGroups.withinGroupSort,
topGroups.totalHitCount,
topGroups.totalGroupedHitCount,
- newGroupDocs);
+ newGroupDocs,
+ maxScore);
} else {
newTopGroups = topGroups;
}
- this.topGroups = new TopGroups<String>(newTopGroups, totalGroupCount);
+ this.topGroups = new TopGroups<BytesRef>(newTopGroups, totalGroupCount);
}
}