Revision c80938b2
almost 100% fixing #2950 (Allow grouping of lucene (hibernate search) results) - only max score is not yet available
.gitattributes | ||
---|---|---|
606 | 606 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/DateTimeBridge.java -text |
607 | 607 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/DefinedTermBaseClassBridge.java -text |
608 | 608 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/DescriptionBaseClassBridge.java -text |
609 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/GroupByTaxonClassBridge.java -text |
|
609 | 610 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/IdFieldOptions.java -text |
610 | 611 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/MultilanguageTextFieldBridge.java -text |
611 | 612 |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/NotNullAwareIdBridge.java -text |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/AbstractClassBridge.java | ||
---|---|---|
17 | 17 |
protected final static NotNullAwareIdBridge idFieldBridge = new NotNullAwareIdBridge(); |
18 | 18 |
|
19 | 19 |
public static LuceneOptions idFieldOptions = new IdFieldOptions(); |
20 |
|
|
20 | 21 |
public static LuceneOptions sortFieldOptions = new SortFieldOptions(); |
21 | 22 |
|
22 | 23 |
|
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/DescriptionBaseClassBridge.java | ||
---|---|---|
50 | 50 |
*/ |
51 | 51 |
public void set(String name, Object entity, Document document, LuceneOptions luceneOptions) { |
52 | 52 |
|
53 |
|
|
54 | 53 |
if (entity instanceof TaxonDescription) { |
55 |
Taxon taxon = ((TaxonDescription) entity).getTaxon(); |
|
54 |
|
|
55 |
Taxon taxon = ((TaxonDescription)entity).getTaxon(); |
|
56 | 56 |
|
57 | 57 |
if (taxon != null) { |
58 | 58 |
|
... | ... | |
86 | 86 |
} |
87 | 87 |
} |
88 | 88 |
|
89 |
|
|
89 | 90 |
} |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/GroupByTaxonClassBridge.java | ||
---|---|---|
1 |
package eu.etaxonomy.cdm.hibernate.search; |
|
2 |
|
|
3 |
import org.apache.lucene.document.Document; |
|
4 |
import org.hibernate.search.annotations.ClassBridge; |
|
5 |
import org.hibernate.search.bridge.LuceneOptions; |
|
6 |
|
|
7 |
import eu.etaxonomy.cdm.model.description.TaxonDescription; |
|
8 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
|
9 |
|
|
10 |
/** |
|
11 |
* The <code>GroupByTaxonClassBridge</code> adds the field |
|
12 |
* <code>groupby_taxon.id</code> to the lucene document which can be used to |
|
13 |
* group search results based on the taxon which is associated with the indexed |
|
14 |
* cdm entity. So any cdm class which is involved in querying for taxa must |
|
15 |
* used this class bridge, e.g.: |
|
16 |
* |
|
17 |
* <pre> |
|
18 |
@ClassBridge(impl=GroupByTaxonClassBridge.class)} |
|
19 |
</pre> |
|
20 |
* |
|
21 |
* or |
|
22 |
* <pre> |
|
23 |
@ClassBridges({ |
|
24 |
@ClassBridge(impl=GroupByTaxonClassBridge.class), |
|
25 |
@ClassBridge(impl=DescriptionBaseClassBridge.class), |
|
26 |
}) |
|
27 |
} |
|
28 |
* </pre> |
|
29 |
* |
|
30 |
* @author a.kohlbecker |
|
31 |
* @date Oct 4, 2012 |
|
32 |
* |
|
33 |
*/ |
|
34 |
public class GroupByTaxonClassBridge extends AbstractClassBridge { |
|
35 |
|
|
36 |
public static final String GROUPBY_TAXON_FIELD = "groupby_taxon.id"; |
|
37 |
|
|
38 |
public GroupByTaxonClassBridge() { |
|
39 |
super(); |
|
40 |
} |
|
41 |
|
|
42 |
/** |
|
43 |
* @param entity |
|
44 |
* @return |
|
45 |
*/ |
|
46 |
protected Taxon getAssociatedTaxon(Object entity) { |
|
47 |
|
|
48 |
if (entity instanceof TaxonDescription) { |
|
49 |
return ((TaxonDescription) entity).getTaxon(); |
|
50 |
} |
|
51 |
if (entity instanceof Taxon) { |
|
52 |
return (Taxon)entity; |
|
53 |
} |
|
54 |
|
|
55 |
throw new RuntimeException("CDM class " + entity.getClass() + " not yet supported"); |
|
56 |
} |
|
57 |
|
|
58 |
@Override |
|
59 |
public void set(String name, Object value, Document document, LuceneOptions luceneOptions) { |
|
60 |
|
|
61 |
Taxon taxon = getAssociatedTaxon(value); |
|
62 |
if(taxon != null){ |
|
63 |
idFieldBridge.set(GROUPBY_TAXON_FIELD, taxon.getId(), document, idFieldOptions); |
|
64 |
} |
|
65 |
} |
|
66 |
|
|
67 |
} |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/description/DescriptionBase.java | ||
---|---|---|
34 | 34 |
import org.hibernate.annotations.CascadeType; |
35 | 35 |
import org.hibernate.envers.Audited; |
36 | 36 |
import org.hibernate.search.annotations.ClassBridge; |
37 |
import org.hibernate.search.annotations.ClassBridges; |
|
37 | 38 |
import org.hibernate.search.annotations.ContainedIn; |
38 | 39 |
import org.hibernate.search.annotations.Field; |
39 | 40 |
|
40 | 41 |
import eu.etaxonomy.cdm.hibernate.search.DescriptionBaseClassBridge; |
42 |
import eu.etaxonomy.cdm.hibernate.search.GroupByTaxonClassBridge; |
|
41 | 43 |
import eu.etaxonomy.cdm.model.common.IdentifiableEntity; |
42 | 44 |
import eu.etaxonomy.cdm.model.name.NameRelationship; |
43 | 45 |
import eu.etaxonomy.cdm.model.name.TypeDesignationBase; |
... | ... | |
73 | 75 |
@Entity |
74 | 76 |
@Audited |
75 | 77 |
@Inheritance(strategy=InheritanceType.SINGLE_TABLE) |
76 |
@ClassBridge(impl=DescriptionBaseClassBridge.class) |
|
78 |
@ClassBridges({ |
|
79 |
@ClassBridge(impl=DescriptionBaseClassBridge.class), |
|
80 |
@ClassBridge(impl=GroupByTaxonClassBridge.class) |
|
81 |
}) |
|
77 | 82 |
public abstract class DescriptionBase<S extends IIdentifiableEntityCacheStrategy> extends IdentifiableEntity<S> { |
78 | 83 |
private static final long serialVersionUID = 5504218413819040193L; |
79 | 84 |
private static final Logger logger = Logger.getLogger(DescriptionBase.class); |
cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/taxon/Taxon.java | ||
---|---|---|
40 | 40 |
import org.hibernate.annotations.Cascade; |
41 | 41 |
import org.hibernate.annotations.CascadeType; |
42 | 42 |
import org.hibernate.envers.Audited; |
43 |
import org.hibernate.search.annotations.ClassBridge; |
|
43 | 44 |
import org.hibernate.search.annotations.ContainedIn; |
44 | 45 |
import org.hibernate.search.annotations.Indexed; |
45 | 46 |
import org.hibernate.search.annotations.IndexedEmbedded; |
46 | 47 |
import org.springframework.beans.factory.annotation.Configurable; |
47 | 48 |
import org.springframework.util.ReflectionUtils; |
48 | 49 |
|
50 |
import eu.etaxonomy.cdm.hibernate.search.GroupByTaxonClassBridge; |
|
49 | 51 |
import eu.etaxonomy.cdm.model.common.IRelated; |
50 | 52 |
import eu.etaxonomy.cdm.model.common.RelationshipBase; |
51 | 53 |
import eu.etaxonomy.cdm.model.description.TaxonDescription; |
... | ... | |
82 | 84 |
@Indexed(index = "eu.etaxonomy.cdm.model.taxon.TaxonBase") |
83 | 85 |
@Audited |
84 | 86 |
@Configurable |
87 |
@ClassBridge(impl=GroupByTaxonClassBridge.class) |
|
85 | 88 |
public class Taxon extends TaxonBase<IIdentifiableEntityCacheStrategy<Taxon>> implements IRelated<RelationshipBase>, Cloneable{ |
86 | 89 |
private static final long serialVersionUID = -584946869762749006L; |
87 | 90 |
private static final Logger logger = Logger.getLogger(Taxon.class); |
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/TaxonServiceImpl.java | ||
---|---|---|
1217 | 1217 |
Map<CdmBaseType, String> idFieldMap = new HashMap<CdmBaseType, String>(); |
1218 | 1218 |
idFieldMap.put(CdmBaseType.DESCRIPTION_ELEMENT, "inDescription.taxon.id"); |
1219 | 1219 |
|
1220 |
// --- initialize taxa, thighlight matches ....
|
|
1220 |
// --- initialize taxa, highlight matches .... |
|
1221 | 1221 |
ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearch, luceneSearch.getQuery()); |
1222 | 1222 |
@SuppressWarnings("rawtypes") |
1223 | 1223 |
List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet( |
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneSearch.java | ||
---|---|---|
42 | 42 |
import org.hibernate.search.reader.ReaderProvider; |
43 | 43 |
import org.hibernate.search.store.DirectoryProvider; |
44 | 44 |
|
45 |
import eu.etaxonomy.cdm.hibernate.search.GroupByTaxonClassBridge; |
|
45 | 46 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
46 | 47 |
import eu.etaxonomy.cdm.model.description.DescriptionElementBase; |
47 | 48 |
import eu.etaxonomy.cdm.model.description.TextData; |
... | ... | |
56 | 57 |
*/ |
57 | 58 |
public class LuceneSearch { |
58 | 59 |
|
59 |
private static final String GROUP_BY_FIELD = "id";
|
|
60 |
private static final String GROUP_BY_FIELD = GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD;
|
|
60 | 61 |
|
61 | 62 |
public static final Logger logger = Logger.getLogger(LuceneSearch.class); |
62 | 63 |
|
... | ... | |
112 | 113 |
|
113 | 114 |
protected String[] highlightFields = new String[0]; |
114 | 115 |
|
116 |
private int maxDocsPerGroup = 10; |
|
117 |
|
|
118 |
|
|
119 |
public int getMaxDocsPerGroup() { |
|
120 |
return maxDocsPerGroup; |
|
121 |
} |
|
122 |
|
|
123 |
public void setMaxDocsPerGroup(int maxDocsPerGroup) { |
|
124 |
this.maxDocsPerGroup = maxDocsPerGroup; |
|
125 |
} |
|
115 | 126 |
|
116 | 127 |
/** |
117 | 128 |
* @param session |
... | ... | |
238 | 249 |
} |
239 | 250 |
|
240 | 251 |
Query fullQuery = expandQuery(); |
241 |
|
|
242 | 252 |
logger.info("final query: " + fullQuery.toString()); |
243 | 253 |
|
244 | 254 |
int offset = pageNumber * pageSize; |
245 | 255 |
int limit = (pageNumber + 1) * pageSize - 1 ; |
246 |
|
|
247 | 256 |
logger.debug("start: " + offset + "; limit:" + limit); |
248 | 257 |
|
249 |
// TopDocs topDocs = null; |
|
250 |
|
|
251 |
// sort must be non null default: Sort.RELEVANCE |
|
252 | 258 |
Sort groupSort = null; |
253 | 259 |
Sort withinGroupSort = Sort.RELEVANCE; |
254 | 260 |
if(sortFields != null && sortFields.length > 0){ |
255 | 261 |
Sort sort = new Sort(sortFields); |
256 | 262 |
groupSort = new Sort(sortFields); |
257 |
// topDocs = getSearcher().search(fullQuery, null, limit, sort); |
|
258 | 263 |
} else { |
259 | 264 |
groupSort = Sort.RELEVANCE; // == SortField.FIELD_SCORE !! |
260 |
// topDocs = getSearcher().search(fullQuery, null, limit); |
|
261 | 265 |
} |
266 |
|
|
262 | 267 |
FirstPassGroupingCollector groupingCollector_1 = new FirstPassGroupingCollector(GROUP_BY_FIELD, withinGroupSort, limit); |
263 | 268 |
getSearcher().search(fullQuery, groupingCollector_1); |
264 | 269 |
|
... | ... | |
272 | 277 |
boolean getMaxScores = true; |
273 | 278 |
boolean fillFields = true; |
274 | 279 |
AllGroupsCollector c3 = new AllGroupsCollector(GROUP_BY_FIELD); |
275 |
SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(GROUP_BY_FIELD, topGroups, groupSort, withinGroupSort, limit, getScores, getMaxScores, fillFields);
|
|
280 |
SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(GROUP_BY_FIELD, topGroups, groupSort, withinGroupSort, maxDocsPerGroup , getScores, getMaxScores, fillFields);
|
|
276 | 281 |
getSearcher().search(fullQuery, MultiCollector.wrap(c2, c3)); |
277 | 282 |
|
278 | 283 |
TopGroups groupsResult = c2.getTopGroups(offset); |
cdmlib-services/src/test/java/eu/etaxonomy/cdm/api/service/TaxonServiceSearchTest.java | ||
---|---|---|
275 | 275 |
Pager<SearchResult<TaxonBase>> pager; |
276 | 276 |
|
277 | 277 |
pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, false, null, null, null, null); |
278 |
Assert.assertEquals("Expecting 1024 entities when searching for Rot*", Integer.valueOf(1024), pager.getCount());
|
|
278 |
Assert.assertEquals("Expecting all 1024 entities grouped into one SearchResult item when searching for Rot*", 1, pager.getCount().intValue());
|
|
279 | 279 |
} |
280 | 280 |
|
281 | 281 |
/** |
... | ... | |
356 | 356 |
pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, highlightFragments, pageSize, null, null, null); |
357 | 357 |
Assert.assertEquals("All matches should be grouped in one page", 1, pager.getPagesAvailable().intValue()); |
358 | 358 |
Map<String, String[]> highlightMap = pager.getRecords().get(0).getFieldHighlightMap(); |
359 |
Assert.assertEquals("expecting " + numOfItems+ " highlighted fragments of field 'name'", numOfItems, highlightMap.get("name").length); |
|
359 |
// maxDocsPerGroup is defined in LuceneSearch and defaults to 10 |
|
360 |
int maxDocsPerGroup = 10; |
|
361 |
Assert.assertEquals("expecting 10 highlighted fragments of field 'name'", maxDocsPerGroup, highlightMap.get("name").length); |
|
360 | 362 |
|
361 | 363 |
} |
362 | 364 |
|
cdmlib-services/src/test/resources/log4j.properties | ||
---|---|---|
35 | 35 |
#log4j.logger.eu.etaxonomy.cdm.model.taxon.TaxonNodeByNameComparator=trace |
36 | 36 |
# |
37 | 37 |
log4j.logger.eu.etaxonomy.cdm.test.function = info |
38 |
log4j.logger.eu.etaxonomy.cdm.test.integration = DEBUG
|
|
38 |
log4j.logger.eu.etaxonomy.cdm.test.integration = info
|
|
39 | 39 |
log4j.logger.eu.etaxonomy.cdm.api.application = warn |
40 | 40 |
log4j.logger.eu.etaxonomy.cdm.api.service = debug |
41 | 41 |
#log4j.logger.eu.etaxonomy.cdm.database.VocabularyStoreImpl = warn |
Also available in: Unified diff