Project

General

Profile

Revision c80938b2

IDc80938b27d72b67394cbf5cfc68d7959e3c18f9d
Parent 914aa9d8
Child 78fae82d

Added by Andreas Kohlbecker over 8 years ago

almost 100% fixing #2950 (Allow grouping of lucene (hibernate search) results) - only max score is not yet available

View differences:

.gitattributes
606 606
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/DateTimeBridge.java -text
607 607
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/DefinedTermBaseClassBridge.java -text
608 608
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/DescriptionBaseClassBridge.java -text
609
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/GroupByTaxonClassBridge.java -text
609 610
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/IdFieldOptions.java -text
610 611
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/MultilanguageTextFieldBridge.java -text
611 612
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/NotNullAwareIdBridge.java -text
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/AbstractClassBridge.java
17 17
    protected final static NotNullAwareIdBridge idFieldBridge = new NotNullAwareIdBridge();
18 18

  
19 19
    public static LuceneOptions idFieldOptions = new IdFieldOptions();
20

  
20 21
    public static LuceneOptions sortFieldOptions = new SortFieldOptions();
21 22

  
22 23

  
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/DescriptionBaseClassBridge.java
50 50
     */
51 51
    public void set(String name, Object entity, Document document, LuceneOptions luceneOptions) {
52 52

  
53

  
54 53
            if (entity instanceof TaxonDescription) {
55
                Taxon taxon = ((TaxonDescription) entity).getTaxon();
54

  
55
                Taxon taxon = ((TaxonDescription)entity).getTaxon();
56 56

  
57 57
                if (taxon != null) {
58 58

  
......
86 86
            }
87 87
    }
88 88

  
89

  
89 90
}
cdmlib-model/src/main/java/eu/etaxonomy/cdm/hibernate/search/GroupByTaxonClassBridge.java
1
package eu.etaxonomy.cdm.hibernate.search;
2

  
3
import org.apache.lucene.document.Document;
4
import org.hibernate.search.annotations.ClassBridge;
5
import org.hibernate.search.bridge.LuceneOptions;
6

  
7
import eu.etaxonomy.cdm.model.description.TaxonDescription;
8
import eu.etaxonomy.cdm.model.taxon.Taxon;
9

  
10
/**
11
 * The <code>GroupByTaxonClassBridge</code> adds the field
12
 * <code>groupby_taxon.id</code> to the lucene document which can be used to
13
 * group search results based on the taxon which is associated with the indexed
14
 * cdm entity. So any cdm class which is involved in querying for taxa must
15
 * used this class bridge, e.g.:
16
 *
17
 * <pre>
18
   @ClassBridge(impl=GroupByTaxonClassBridge.class)}
19
  </pre>
20
 *
21
 * or
22
 * <pre>
23
   @ClassBridges({
24
     @ClassBridge(impl=GroupByTaxonClassBridge.class),
25
     @ClassBridge(impl=DescriptionBaseClassBridge.class),
26
     })
27
  }
28
 * </pre>
29
 *
30
 * @author a.kohlbecker
31
 * @date Oct 4, 2012
32
 *
33
 */
34
public class GroupByTaxonClassBridge extends AbstractClassBridge {
35

  
36
    public static final String GROUPBY_TAXON_FIELD = "groupby_taxon.id";
37

  
38
    public GroupByTaxonClassBridge() {
39
        super();
40
    }
41

  
42
    /**
43
     * @param entity
44
     * @return
45
     */
46
    protected Taxon getAssociatedTaxon(Object entity) {
47

  
48
        if (entity instanceof TaxonDescription) {
49
            return ((TaxonDescription) entity).getTaxon();
50
        }
51
        if (entity instanceof Taxon) {
52
            return (Taxon)entity;
53
        }
54

  
55
        throw new RuntimeException("CDM class " + entity.getClass() + " not yet supported");
56
    }
57

  
58
    @Override
59
    public void set(String name, Object value, Document document, LuceneOptions luceneOptions) {
60

  
61
        Taxon taxon = getAssociatedTaxon(value);
62
        if(taxon != null){
63
            idFieldBridge.set(GROUPBY_TAXON_FIELD, taxon.getId(), document, idFieldOptions);
64
        }
65
    }
66

  
67
}
cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/description/DescriptionBase.java
34 34
import org.hibernate.annotations.CascadeType;
35 35
import org.hibernate.envers.Audited;
36 36
import org.hibernate.search.annotations.ClassBridge;
37
import org.hibernate.search.annotations.ClassBridges;
37 38
import org.hibernate.search.annotations.ContainedIn;
38 39
import org.hibernate.search.annotations.Field;
39 40

  
40 41
import eu.etaxonomy.cdm.hibernate.search.DescriptionBaseClassBridge;
42
import eu.etaxonomy.cdm.hibernate.search.GroupByTaxonClassBridge;
41 43
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
42 44
import eu.etaxonomy.cdm.model.name.NameRelationship;
43 45
import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
......
73 75
@Entity
74 76
@Audited
75 77
@Inheritance(strategy=InheritanceType.SINGLE_TABLE)
76
@ClassBridge(impl=DescriptionBaseClassBridge.class)
78
@ClassBridges({
79
    @ClassBridge(impl=DescriptionBaseClassBridge.class),
80
    @ClassBridge(impl=GroupByTaxonClassBridge.class)
81
})
77 82
public abstract class DescriptionBase<S extends IIdentifiableEntityCacheStrategy> extends IdentifiableEntity<S> {
78 83
    private static final long serialVersionUID = 5504218413819040193L;
79 84
    private static final Logger logger = Logger.getLogger(DescriptionBase.class);
cdmlib-model/src/main/java/eu/etaxonomy/cdm/model/taxon/Taxon.java
40 40
import org.hibernate.annotations.Cascade;
41 41
import org.hibernate.annotations.CascadeType;
42 42
import org.hibernate.envers.Audited;
43
import org.hibernate.search.annotations.ClassBridge;
43 44
import org.hibernate.search.annotations.ContainedIn;
44 45
import org.hibernate.search.annotations.Indexed;
45 46
import org.hibernate.search.annotations.IndexedEmbedded;
46 47
import org.springframework.beans.factory.annotation.Configurable;
47 48
import org.springframework.util.ReflectionUtils;
48 49

  
50
import eu.etaxonomy.cdm.hibernate.search.GroupByTaxonClassBridge;
49 51
import eu.etaxonomy.cdm.model.common.IRelated;
50 52
import eu.etaxonomy.cdm.model.common.RelationshipBase;
51 53
import eu.etaxonomy.cdm.model.description.TaxonDescription;
......
82 84
@Indexed(index = "eu.etaxonomy.cdm.model.taxon.TaxonBase")
83 85
@Audited
84 86
@Configurable
87
@ClassBridge(impl=GroupByTaxonClassBridge.class)
85 88
public class Taxon extends TaxonBase<IIdentifiableEntityCacheStrategy<Taxon>> implements IRelated<RelationshipBase>, Cloneable{
86 89
    private static final long serialVersionUID = -584946869762749006L;
87 90
    private static final Logger logger = Logger.getLogger(Taxon.class);
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/TaxonServiceImpl.java
1217 1217
        Map<CdmBaseType, String> idFieldMap = new HashMap<CdmBaseType, String>();
1218 1218
        idFieldMap.put(CdmBaseType.DESCRIPTION_ELEMENT, "inDescription.taxon.id");
1219 1219

  
1220
        // --- initialize taxa, thighlight matches ....
1220
        // --- initialize taxa, highlight matches ....
1221 1221
        ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearch, luceneSearch.getQuery());
1222 1222
        @SuppressWarnings("rawtypes")
1223 1223
        List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet(
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/search/LuceneSearch.java
42 42
import org.hibernate.search.reader.ReaderProvider;
43 43
import org.hibernate.search.store.DirectoryProvider;
44 44

  
45
import eu.etaxonomy.cdm.hibernate.search.GroupByTaxonClassBridge;
45 46
import eu.etaxonomy.cdm.model.common.CdmBase;
46 47
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
47 48
import eu.etaxonomy.cdm.model.description.TextData;
......
56 57
 */
57 58
public class LuceneSearch {
58 59

  
59
    private static final String GROUP_BY_FIELD = "id";
60
    private static final String GROUP_BY_FIELD = GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD;
60 61

  
61 62
    public static final Logger logger = Logger.getLogger(LuceneSearch.class);
62 63

  
......
112 113

  
113 114
    protected String[] highlightFields = new String[0];
114 115

  
116
    private int maxDocsPerGroup = 10;
117

  
118

  
119
    public int getMaxDocsPerGroup() {
120
        return maxDocsPerGroup;
121
    }
122

  
123
    public void setMaxDocsPerGroup(int maxDocsPerGroup) {
124
        this.maxDocsPerGroup = maxDocsPerGroup;
125
    }
115 126

  
116 127
    /**
117 128
     * @param session
......
238 249
        }
239 250

  
240 251
        Query fullQuery = expandQuery();
241

  
242 252
        logger.info("final query: " + fullQuery.toString());
243 253

  
244 254
        int offset = pageNumber * pageSize;
245 255
        int limit = (pageNumber + 1) * pageSize - 1 ;
246

  
247 256
        logger.debug("start: " + offset + "; limit:" + limit);
248 257

  
249
//        TopDocs topDocs = null;
250

  
251
        // sort must be non null default: Sort.RELEVANCE
252 258
        Sort groupSort = null;
253 259
        Sort withinGroupSort = Sort.RELEVANCE;
254 260
        if(sortFields != null && sortFields.length > 0){
255 261
            Sort sort = new Sort(sortFields);
256 262
            groupSort = new Sort(sortFields);
257
//            topDocs = getSearcher().search(fullQuery, null, limit, sort);
258 263
        } else {
259 264
            groupSort = Sort.RELEVANCE; // == SortField.FIELD_SCORE !!
260
//            topDocs = getSearcher().search(fullQuery, null, limit);
261 265
        }
266

  
262 267
        FirstPassGroupingCollector groupingCollector_1 = new FirstPassGroupingCollector(GROUP_BY_FIELD, withinGroupSort, limit);
263 268
        getSearcher().search(fullQuery, groupingCollector_1);
264 269

  
......
272 277
        boolean getMaxScores = true;
273 278
        boolean fillFields = true;
274 279
        AllGroupsCollector c3 = new AllGroupsCollector(GROUP_BY_FIELD);
275
        SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(GROUP_BY_FIELD, topGroups, groupSort, withinGroupSort, limit, getScores, getMaxScores, fillFields);
280
        SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(GROUP_BY_FIELD, topGroups, groupSort, withinGroupSort, maxDocsPerGroup , getScores, getMaxScores, fillFields);
276 281
        getSearcher().search(fullQuery, MultiCollector.wrap(c2, c3));
277 282

  
278 283
        TopGroups groupsResult = c2.getTopGroups(offset);
cdmlib-services/src/test/java/eu/etaxonomy/cdm/api/service/TaxonServiceSearchTest.java
275 275
        Pager<SearchResult<TaxonBase>> pager;
276 276

  
277 277
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, false, null, null, null, null);
278
        Assert.assertEquals("Expecting 1024 entities when searching for Rot*", Integer.valueOf(1024), pager.getCount());
278
        Assert.assertEquals("Expecting all 1024 entities grouped into one SearchResult item when searching for Rot*", 1, pager.getCount().intValue());
279 279
    }
280 280

  
281 281
    /**
......
356 356
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, highlightFragments, pageSize, null, null, null);
357 357
        Assert.assertEquals("All matches should be grouped in one page", 1, pager.getPagesAvailable().intValue());
358 358
        Map<String, String[]> highlightMap = pager.getRecords().get(0).getFieldHighlightMap();
359
        Assert.assertEquals("expecting " + numOfItems+ " highlighted fragments of field 'name'", numOfItems, highlightMap.get("name").length);
359
        // maxDocsPerGroup is defined in LuceneSearch and defaults to 10
360
        int maxDocsPerGroup = 10;
361
        Assert.assertEquals("expecting 10 highlighted fragments of field 'name'", maxDocsPerGroup, highlightMap.get("name").length);
360 362

  
361 363
    }
362 364

  
cdmlib-services/src/test/resources/log4j.properties
35 35
#log4j.logger.eu.etaxonomy.cdm.model.taxon.TaxonNodeByNameComparator=trace
36 36
#
37 37
log4j.logger.eu.etaxonomy.cdm.test.function = info
38
log4j.logger.eu.etaxonomy.cdm.test.integration = DEBUG
38
log4j.logger.eu.etaxonomy.cdm.test.integration = info
39 39
log4j.logger.eu.etaxonomy.cdm.api.application = warn
40 40
log4j.logger.eu.etaxonomy.cdm.api.service = debug
41 41
#log4j.logger.eu.etaxonomy.cdm.database.VocabularyStoreImpl = warn

Also available in: Unified diff

Add picture from clipboard (Maximum size: 40 MB)