Project

General

Profile

« Previous | Next » 

Revision 6e30169a

Added by Andreas Kohlbecker almost 12 years ago

reverting erroneously commits r16027 to r16031

View differences:

cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/TaxonServiceImpl.java
21 21
import org.apache.log4j.Logger;
22 22
import org.apache.lucene.index.CorruptIndexException;
23 23
import org.apache.lucene.queryParser.ParseException;
24
import org.apache.lucene.search.BooleanClause.Occur;
25
import org.apache.lucene.search.BooleanQuery;
26 24
import org.apache.lucene.search.Query;
27 25
import org.apache.lucene.search.SortField;
28 26
import org.apache.lucene.search.TopDocs;
27
import org.hibernate.criterion.Criterion;
29 28
import org.springframework.beans.factory.annotation.Autowired;
30 29
import org.springframework.stereotype.Service;
31 30
import org.springframework.transaction.annotation.Propagation;
......
42 41
import eu.etaxonomy.cdm.api.service.pager.impl.DefaultPagerImpl;
43 42
import eu.etaxonomy.cdm.api.service.search.ISearchResultBuilder;
44 43
import eu.etaxonomy.cdm.api.service.search.LuceneSearch;
45
import eu.etaxonomy.cdm.api.service.search.QueryFactory;
46 44
import eu.etaxonomy.cdm.api.service.search.SearchResult;
47 45
import eu.etaxonomy.cdm.api.service.search.SearchResultBuilder;
46
import eu.etaxonomy.cdm.api.service.search.SearchResultHighligther;
48 47
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
49 48
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
50 49
import eu.etaxonomy.cdm.hibernate.search.DefinedTermBaseClassBridge;
51
import eu.etaxonomy.cdm.hibernate.search.MultilanguageTextFieldBridge;
50
import eu.etaxonomy.cdm.hibernate.search.PaddedIntegerBridge;
52 51
import eu.etaxonomy.cdm.model.common.CdmBase;
52
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
53 53
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
54 54
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
55 55
import eu.etaxonomy.cdm.model.common.Language;
......
63 63
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
64 64
import eu.etaxonomy.cdm.model.description.TaxonDescription;
65 65
import eu.etaxonomy.cdm.model.description.TaxonInteraction;
66
import eu.etaxonomy.cdm.model.description.TextData;
67 66
import eu.etaxonomy.cdm.model.media.Media;
68 67
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
69 68
import eu.etaxonomy.cdm.model.media.MediaUtils;
70 69
import eu.etaxonomy.cdm.model.name.HomotypicalGroup;
70
import eu.etaxonomy.cdm.model.name.NonViralName;
71 71
import eu.etaxonomy.cdm.model.name.Rank;
72 72
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
73 73
import eu.etaxonomy.cdm.model.name.ZoologicalName;
......
1129 1129
        return dao.getUuidAndTitleCacheSynonym();
1130 1130
    }
1131 1131

  
1132
    /* (non-Javadoc)
1133
     * @see eu.etaxonomy.cdm.api.service.ITaxonService#findByFullText(java.lang.Class, java.lang.String, eu.etaxonomy.cdm.model.taxon.Classification, java.util.List, boolean, java.lang.Integer, java.lang.Integer, java.util.List, java.util.List)
1134
     */
1135
    @Override
1136
    public Pager<SearchResult<TaxonBase>> findByFullText(
1137
            Class<? extends TaxonBase> clazz, String queryString,
1138
            Classification classification, List<Language> languages,
1139
            boolean highlightFragments, Integer pageSize, Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) throws CorruptIndexException, IOException, ParseException {
1140

  
1141
        // -- set defaults
1142
        // see LuceneSearch.pushAbstractBaseTypeDown()
1143
        Class<? extends TaxonBase> directorySelectClass = TaxonBase.class;
1144
        if(clazz != null && clazz.equals(directorySelectClass)){
1145
            clazz = null;
1146
        }
1147

  
1148
        LuceneSearch luceneSearch = prepareFindByFullTextSearch(queryString, classification, languages, highlightFragments);
1149

  
1150
        // --- execute search
1151
        TopDocs topDocsResultSet = luceneSearch.executeSearch(clazz, pageSize, pageNumber);
1152

  
1153
        // ---  initialize taxa, thighlight matches ....
1154
        ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearch, luceneSearch.getQuery());
1155
        List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet(
1156
                topDocsResultSet, luceneSearch.getHighlightFields(), dao, "taxon.id", propertyPaths);
1157

  
1158
        return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, searchResults.size(), pageSize, searchResults);
1159
    }
1160

  
1161
    /**
1162
     * @param queryString
1163
     * @param classification
1164
     * @param languages
1165
     * @param highlightFragments
1166
     * @param directorySelectClass
1167
     * @return
1168
     */
1169
    public LuceneSearch prepareFindByFullTextSearch(String queryString, Classification classification, List<Language> languages,
1170
            boolean highlightFragments) {
1171
        BooleanQuery finalQuery = new BooleanQuery();
1172
        BooleanQuery textQuery = new BooleanQuery();
1173

  
1174
        LuceneSearch luceneSearch = new LuceneSearch(getSession(), TaxonBase.class);
1175
        QueryFactory queryFactory = new QueryFactory(luceneSearch);
1176

  
1177
        SortField[] sortFields = new  SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", false)};
1178
        luceneSearch.setSortFields(sortFields);
1179

  
1180
        // ---- search criteria
1181
        textQuery.add(queryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD);
1182
        textQuery.add(queryFactory.newDefinedTermBaseQuery("name.rank", queryString, languages), Occur.SHOULD);
1183

  
1184
        finalQuery.add(textQuery, Occur.MUST);
1185

  
1186
        if(classification != null){
1187
            finalQuery.add(queryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);
1188
        }
1189
        luceneSearch.setQuery(finalQuery);
1190

  
1191
        if(highlightFragments){
1192
            luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray());
1193
        }
1194
        return luceneSearch;
1195
    }
1196

  
1197

  
1198
    /* (non-Javadoc)
1199
     * @see eu.etaxonomy.cdm.api.service.ITaxonService#findByDescriptionElementFullText(java.lang.Class, java.lang.String, eu.etaxonomy.cdm.model.taxon.Classification, java.util.List, java.util.List, boolean, java.lang.Integer, java.lang.Integer, java.util.List, java.util.List)
1200
     */
1201 1132
    @Override
1202 1133
    public Pager<SearchResult<TaxonBase>> findByDescriptionElementFullText(
1203 1134
            Class<? extends DescriptionElementBase> clazz, String queryString,
1204 1135
            Classification classification, List<Feature> features, List<Language> languages,
1205 1136
            boolean highlightFragments, Integer pageSize, Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) throws CorruptIndexException, IOException, ParseException {
1206 1137

  
1207
        // -- set defaults
1208
        // see LuceneSearch.pushAbstractBaseTypeDown()
1209 1138
        Class<? extends DescriptionElementBase> directorySelectClass = DescriptionElementBase.class;
1210
        if(clazz != null && clazz.equals(directorySelectClass)){
1211
            clazz = null;
1139
        if(clazz != null){
1140
            directorySelectClass = clazz;
1212 1141
        }
1213 1142

  
1214
        LuceneSearch luceneSearch = prepareByDescriptionElementFullTextSearch(queryString, classification, features, languages, highlightFragments);
1215

  
1216
        // --- execute search
1217
        TopDocs topDocsResultSet = luceneSearch.executeSearch(clazz, pageSize, pageNumber);
1218

  
1219
        // --- initialize taxa, thighlight matches ....
1220
        ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearch, luceneSearch.getQuery());
1221
        List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet(
1222
                topDocsResultSet, luceneSearch.getHighlightFields(), dao, "inDescription.taxon.id", propertyPaths);
1223

  
1224
        return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, searchResults.size(), pageSize, searchResults);
1225

  
1226
    }
1227

  
1228
    public Pager<SearchResult<TaxonBase>> findByEveryThingFullText(
1229
            Class<? extends DescriptionElementBase> clazz, String queryString,
1230
            Classification classification, List<Feature> features, List<Language> languages,
1231
            boolean highlightFragments, Integer pageSize, Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) throws CorruptIndexException, IOException, ParseException {
1232

  
1233
        // -- set defaults
1234
        // see LuceneSearch.pushAbstractBaseTypeDown()
1235
        Class<? extends DescriptionElementBase> directorySelectClass = DescriptionElementBase.class;
1236
        if(clazz != null && clazz.equals(directorySelectClass)){
1237
            clazz = null;
1238
        }
1239

  
1240
        LuceneSearch luceneSearchByDescriptionElement = prepareByDescriptionElementFullTextSearch(queryString, classification, features, languages, highlightFragments);
1241
        LuceneSearch luceneSearchByTaxonBase = prepareFindByFullTextSearch(queryString, classification, languages, highlightFragments);
1242

  
1243
        //FIXME use MultiSearcher ....
1244

  
1245
        // --- execute search
1246
        TopDocs topDocsResultSet = luceneSearchByDescriptionElement.executeSearch(clazz, pageSize, pageNumber);
1247

  
1248
        // --- initialize taxa, thighlight matches ....
1249
        ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearchByDescriptionElement, luceneSearchByDescriptionElement.getQuery());
1250
        List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet(
1251
                topDocsResultSet, luceneSearchByDescriptionElement.getHighlightFields(), dao, "inDescription.taxon.id", propertyPaths);
1252

  
1253
        return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, searchResults.size(), pageSize, searchResults);
1254

  
1255
    }
1256

  
1257

  
1258
    /**
1259
     * @param queryString
1260
     * @param classification
1261
     * @param features
1262
     * @param languages
1263
     * @param highlightFragments
1264
     * @param directorySelectClass
1265
     * @return
1266
     */
1267
    public LuceneSearch prepareByDescriptionElementFullTextSearch(String queryString, Classification classification, List<Feature> features,
1268
            List<Language> languages, boolean highlightFragments) {
1269
        BooleanQuery finalQuery = new BooleanQuery();
1270
        BooleanQuery textQuery = new BooleanQuery();
1271

  
1272
        LuceneSearch luceneSearch = new LuceneSearch(getSession(), DescriptionElementBase.class);
1273
        QueryFactory queryFactory = new QueryFactory(luceneSearch);
1274

  
1275
        SortField[] sortFields = new  SortField[]{SortField.FIELD_SCORE, new SortField("inDescription.taxon.titleCache__sort", false)};
1276
        luceneSearch.setSortFields(sortFields);
1277

  
1143
        Set<String> freetextFields = new HashSet<String>();
1278 1144
        // ---- search criteria
1279
        textQuery.add(queryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD);
1280

  
1145
        freetextFields.add("titleCache");
1146
        StringBuilder luceneQueryTemplate = new StringBuilder();
1147
        luceneQueryTemplate.append("+(");
1148
        luceneQueryTemplate.append("titleCache:(%1$s) ");
1281 1149
        // common name
1282
        Query nameQuery;
1150
        freetextFields.add("name");
1283 1151
        if(languages == null || languages.size() == 0){
1284
            nameQuery = queryFactory.newTermQuery("name", queryString);
1152
            luceneQueryTemplate.append("name:(%1$s) ");
1285 1153
        } else {
1286
            nameQuery = new BooleanQuery();
1287
            BooleanQuery languageSubQuery = new BooleanQuery();
1154
            luceneQueryTemplate.append("(+name:(%1$s) ");
1288 1155
            for(Language lang : languages){
1289
                languageSubQuery.add(queryFactory.newTermQuery("language.uuid",  lang.getUuid().toString()), Occur.SHOULD);
1156
                luceneQueryTemplate.append(" +language.uuid:" + lang.getUuid().toString());
1290 1157
            }
1291
            ((BooleanQuery) nameQuery).add(queryFactory.newTermQuery("name", queryString), Occur.MUST);
1292
            ((BooleanQuery) nameQuery).add(languageSubQuery, Occur.MUST);
1158
            luceneQueryTemplate.append(")");
1293 1159
        }
1294
        textQuery.add(nameQuery, Occur.SHOULD);
1295

  
1296

  
1297 1160
        // text field from TextData
1298
        textQuery.add(queryFactory.newLocalizedTermQuery("text", queryString, languages), Occur.SHOULD);
1299

  
1300
        // --- TermBase fields - by representation ----
1161
        freetextFields.add("text.ALL");
1162
        appendLocalizedFieldQuery("text", languages, luceneQueryTemplate).append(" ");
1301 1163
        // state field from CategoricalData
1302
        textQuery.add(queryFactory.newLocalizedTermQuery("states.state.representation", queryString, languages), Occur.SHOULD);
1303

  
1164
        freetextFields.add("states.state.representation.ALL");
1165
        appendLocalizedFieldQuery("states.state.representation", languages, luceneQueryTemplate).append(" ");
1304 1166
        // state field from CategoricalData
1305
        textQuery.add(queryFactory.newLocalizedTermQuery("states.modifyingText", queryString, languages), Occur.SHOULD);
1306

  
1307
        finalQuery.add(textQuery, Occur.MUST);
1308
        // --- classification ----
1167
        freetextFields.add("states.modifyingText.ALL");
1168
        appendLocalizedFieldQuery("states.modifyingText", languages, luceneQueryTemplate).append(" ");
1169
        luceneQueryTemplate.append(") ");
1309 1170

  
1310 1171
        if(classification != null){
1311
            finalQuery.add(queryFactory.newEntityIdQuery("inDescription.taxon.taxonNodes.classification.id", classification), Occur.MUST);
1172
            luceneQueryTemplate.append("+inDescription.taxon.taxonNodes.classification.id:").append(PaddedIntegerBridge.paddInteger(classification.getId())).append(" ");
1312 1173
        }
1313 1174

  
1314
        // --- IdentifieableEntity fields - by uuid
1315 1175
        if(features != null && features.size() > 0 ){
1316
            finalQuery.add(queryFactory.newEntityUuidQuery("feature.uuid", features), Occur.MUST);
1176
            luceneQueryTemplate.append("+feature.uuid:(");
1177
            for(Feature feature : features){
1178
                luceneQueryTemplate.append(feature.getUuid()).append(" ");
1179
            }
1180
            luceneQueryTemplate.append(") ");
1317 1181
        }
1318 1182

  
1319 1183
        // the description must be associated with a taxon
1320
        finalQuery.add(queryFactory.newIdNotNullQuery("inDescription.taxon.id"), Occur.MUST);
1184
        // TODO open range queries [0 TO *] not working in the current version of lucene (https://issues.apache.org/jira/browse/LUCENE-995)
1185
        //       so we are using integer maximum as workaround
1186
        luceneQueryTemplate.append("+inDescription.taxon.id:[ " + PaddedIntegerBridge.paddInteger(0) + " TO " + PaddedIntegerBridge.paddInteger(Integer.MAX_VALUE) + "] ");
1187
        //luceneQueryTemplate.append("-inDescription.taxon.id:" + PaddedIntegerBridge.NULL_STRING);
1188

  
1189
        String luceneQueryStr = String.format(luceneQueryTemplate.toString(), queryString);
1190

  
1191
        // --- sort fields
1192
        SortField[] sortFields = new  SortField[]{SortField.FIELD_SCORE, new SortField("inDescription.taxon.titleCache__sort", false)};
1193

  
1194
        // ---- execute criteria
1195
        LuceneSearch luceneSearch = new LuceneSearch(getSession(), directorySelectClass);
1321 1196

  
1322
        luceneSearch.setQuery(finalQuery);
1197
        Query luceneQuery = luceneSearch.parse(luceneQueryStr);
1198
        TopDocs topDocsResultSet = luceneSearch.executeSearch(luceneQuery, clazz, pageSize, pageNumber, sortFields);
1323 1199

  
1200
        String[] highlightFields = null;
1324 1201
        if(highlightFragments){
1325
            luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray());
1202
            highlightFields = freetextFields.toArray(new String[freetextFields.size()]);
1326 1203
        }
1327
        return luceneSearch;
1204

  
1205
        // initialize taxa, thighlight matches ....
1206
        ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearch, luceneQuery);
1207
        List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet(
1208
                topDocsResultSet, highlightFields, dao, "inDescription.taxon.id", propertyPaths);
1209

  
1210
        return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, searchResults.size(), pageSize, searchResults);
1211

  
1328 1212
    }
1329 1213

  
1330 1214
    /**

Also available in: Unified diff