Revision 6e30169a
Added by Andreas Kohlbecker almost 12 years ago
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/TaxonServiceImpl.java | ||
---|---|---|
21 | 21 |
import org.apache.log4j.Logger; |
22 | 22 |
import org.apache.lucene.index.CorruptIndexException; |
23 | 23 |
import org.apache.lucene.queryParser.ParseException; |
24 |
import org.apache.lucene.search.BooleanClause.Occur; |
|
25 |
import org.apache.lucene.search.BooleanQuery; |
|
26 | 24 |
import org.apache.lucene.search.Query; |
27 | 25 |
import org.apache.lucene.search.SortField; |
28 | 26 |
import org.apache.lucene.search.TopDocs; |
27 |
import org.hibernate.criterion.Criterion; |
|
29 | 28 |
import org.springframework.beans.factory.annotation.Autowired; |
30 | 29 |
import org.springframework.stereotype.Service; |
31 | 30 |
import org.springframework.transaction.annotation.Propagation; |
... | ... | |
42 | 41 |
import eu.etaxonomy.cdm.api.service.pager.impl.DefaultPagerImpl; |
43 | 42 |
import eu.etaxonomy.cdm.api.service.search.ISearchResultBuilder; |
44 | 43 |
import eu.etaxonomy.cdm.api.service.search.LuceneSearch; |
45 |
import eu.etaxonomy.cdm.api.service.search.QueryFactory; |
|
46 | 44 |
import eu.etaxonomy.cdm.api.service.search.SearchResult; |
47 | 45 |
import eu.etaxonomy.cdm.api.service.search.SearchResultBuilder; |
46 |
import eu.etaxonomy.cdm.api.service.search.SearchResultHighligther; |
|
48 | 47 |
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor; |
49 | 48 |
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper; |
50 | 49 |
import eu.etaxonomy.cdm.hibernate.search.DefinedTermBaseClassBridge; |
51 |
import eu.etaxonomy.cdm.hibernate.search.MultilanguageTextFieldBridge;
|
|
50 |
import eu.etaxonomy.cdm.hibernate.search.PaddedIntegerBridge;
|
|
52 | 51 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
52 |
import eu.etaxonomy.cdm.model.common.DefinedTermBase; |
|
53 | 53 |
import eu.etaxonomy.cdm.model.common.IdentifiableEntity; |
54 | 54 |
import eu.etaxonomy.cdm.model.common.IdentifiableSource; |
55 | 55 |
import eu.etaxonomy.cdm.model.common.Language; |
... | ... | |
63 | 63 |
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode; |
64 | 64 |
import eu.etaxonomy.cdm.model.description.TaxonDescription; |
65 | 65 |
import eu.etaxonomy.cdm.model.description.TaxonInteraction; |
66 |
import eu.etaxonomy.cdm.model.description.TextData; |
|
67 | 66 |
import eu.etaxonomy.cdm.model.media.Media; |
68 | 67 |
import eu.etaxonomy.cdm.model.media.MediaRepresentation; |
69 | 68 |
import eu.etaxonomy.cdm.model.media.MediaUtils; |
70 | 69 |
import eu.etaxonomy.cdm.model.name.HomotypicalGroup; |
70 |
import eu.etaxonomy.cdm.model.name.NonViralName; |
|
71 | 71 |
import eu.etaxonomy.cdm.model.name.Rank; |
72 | 72 |
import eu.etaxonomy.cdm.model.name.TaxonNameBase; |
73 | 73 |
import eu.etaxonomy.cdm.model.name.ZoologicalName; |
... | ... | |
1129 | 1129 |
return dao.getUuidAndTitleCacheSynonym(); |
1130 | 1130 |
} |
1131 | 1131 |
|
1132 |
/* (non-Javadoc) |
|
1133 |
* @see eu.etaxonomy.cdm.api.service.ITaxonService#findByFullText(java.lang.Class, java.lang.String, eu.etaxonomy.cdm.model.taxon.Classification, java.util.List, boolean, java.lang.Integer, java.lang.Integer, java.util.List, java.util.List) |
|
1134 |
*/ |
|
1135 |
@Override |
|
1136 |
public Pager<SearchResult<TaxonBase>> findByFullText( |
|
1137 |
Class<? extends TaxonBase> clazz, String queryString, |
|
1138 |
Classification classification, List<Language> languages, |
|
1139 |
boolean highlightFragments, Integer pageSize, Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) throws CorruptIndexException, IOException, ParseException { |
|
1140 |
|
|
1141 |
// -- set defaults |
|
1142 |
// see LuceneSearch.pushAbstractBaseTypeDown() |
|
1143 |
Class<? extends TaxonBase> directorySelectClass = TaxonBase.class; |
|
1144 |
if(clazz != null && clazz.equals(directorySelectClass)){ |
|
1145 |
clazz = null; |
|
1146 |
} |
|
1147 |
|
|
1148 |
LuceneSearch luceneSearch = prepareFindByFullTextSearch(queryString, classification, languages, highlightFragments); |
|
1149 |
|
|
1150 |
// --- execute search |
|
1151 |
TopDocs topDocsResultSet = luceneSearch.executeSearch(clazz, pageSize, pageNumber); |
|
1152 |
|
|
1153 |
// --- initialize taxa, thighlight matches .... |
|
1154 |
ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearch, luceneSearch.getQuery()); |
|
1155 |
List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet( |
|
1156 |
topDocsResultSet, luceneSearch.getHighlightFields(), dao, "taxon.id", propertyPaths); |
|
1157 |
|
|
1158 |
return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, searchResults.size(), pageSize, searchResults); |
|
1159 |
} |
|
1160 |
|
|
1161 |
/** |
|
1162 |
* @param queryString |
|
1163 |
* @param classification |
|
1164 |
* @param languages |
|
1165 |
* @param highlightFragments |
|
1166 |
* @param directorySelectClass |
|
1167 |
* @return |
|
1168 |
*/ |
|
1169 |
public LuceneSearch prepareFindByFullTextSearch(String queryString, Classification classification, List<Language> languages, |
|
1170 |
boolean highlightFragments) { |
|
1171 |
BooleanQuery finalQuery = new BooleanQuery(); |
|
1172 |
BooleanQuery textQuery = new BooleanQuery(); |
|
1173 |
|
|
1174 |
LuceneSearch luceneSearch = new LuceneSearch(getSession(), TaxonBase.class); |
|
1175 |
QueryFactory queryFactory = new QueryFactory(luceneSearch); |
|
1176 |
|
|
1177 |
SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", false)}; |
|
1178 |
luceneSearch.setSortFields(sortFields); |
|
1179 |
|
|
1180 |
// ---- search criteria |
|
1181 |
textQuery.add(queryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD); |
|
1182 |
textQuery.add(queryFactory.newDefinedTermBaseQuery("name.rank", queryString, languages), Occur.SHOULD); |
|
1183 |
|
|
1184 |
finalQuery.add(textQuery, Occur.MUST); |
|
1185 |
|
|
1186 |
if(classification != null){ |
|
1187 |
finalQuery.add(queryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST); |
|
1188 |
} |
|
1189 |
luceneSearch.setQuery(finalQuery); |
|
1190 |
|
|
1191 |
if(highlightFragments){ |
|
1192 |
luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray()); |
|
1193 |
} |
|
1194 |
return luceneSearch; |
|
1195 |
} |
|
1196 |
|
|
1197 |
|
|
1198 |
/* (non-Javadoc) |
|
1199 |
* @see eu.etaxonomy.cdm.api.service.ITaxonService#findByDescriptionElementFullText(java.lang.Class, java.lang.String, eu.etaxonomy.cdm.model.taxon.Classification, java.util.List, java.util.List, boolean, java.lang.Integer, java.lang.Integer, java.util.List, java.util.List) |
|
1200 |
*/ |
|
1201 | 1132 |
@Override |
1202 | 1133 |
public Pager<SearchResult<TaxonBase>> findByDescriptionElementFullText( |
1203 | 1134 |
Class<? extends DescriptionElementBase> clazz, String queryString, |
1204 | 1135 |
Classification classification, List<Feature> features, List<Language> languages, |
1205 | 1136 |
boolean highlightFragments, Integer pageSize, Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) throws CorruptIndexException, IOException, ParseException { |
1206 | 1137 |
|
1207 |
// -- set defaults |
|
1208 |
// see LuceneSearch.pushAbstractBaseTypeDown() |
|
1209 | 1138 |
Class<? extends DescriptionElementBase> directorySelectClass = DescriptionElementBase.class; |
1210 |
if(clazz != null && clazz.equals(directorySelectClass)){
|
|
1211 |
clazz = null;
|
|
1139 |
if(clazz != null){ |
|
1140 |
directorySelectClass = clazz;
|
|
1212 | 1141 |
} |
1213 | 1142 |
|
1214 |
LuceneSearch luceneSearch = prepareByDescriptionElementFullTextSearch(queryString, classification, features, languages, highlightFragments); |
|
1215 |
|
|
1216 |
// --- execute search |
|
1217 |
TopDocs topDocsResultSet = luceneSearch.executeSearch(clazz, pageSize, pageNumber); |
|
1218 |
|
|
1219 |
// --- initialize taxa, thighlight matches .... |
|
1220 |
ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearch, luceneSearch.getQuery()); |
|
1221 |
List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet( |
|
1222 |
topDocsResultSet, luceneSearch.getHighlightFields(), dao, "inDescription.taxon.id", propertyPaths); |
|
1223 |
|
|
1224 |
return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, searchResults.size(), pageSize, searchResults); |
|
1225 |
|
|
1226 |
} |
|
1227 |
|
|
1228 |
public Pager<SearchResult<TaxonBase>> findByEveryThingFullText( |
|
1229 |
Class<? extends DescriptionElementBase> clazz, String queryString, |
|
1230 |
Classification classification, List<Feature> features, List<Language> languages, |
|
1231 |
boolean highlightFragments, Integer pageSize, Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) throws CorruptIndexException, IOException, ParseException { |
|
1232 |
|
|
1233 |
// -- set defaults |
|
1234 |
// see LuceneSearch.pushAbstractBaseTypeDown() |
|
1235 |
Class<? extends DescriptionElementBase> directorySelectClass = DescriptionElementBase.class; |
|
1236 |
if(clazz != null && clazz.equals(directorySelectClass)){ |
|
1237 |
clazz = null; |
|
1238 |
} |
|
1239 |
|
|
1240 |
LuceneSearch luceneSearchByDescriptionElement = prepareByDescriptionElementFullTextSearch(queryString, classification, features, languages, highlightFragments); |
|
1241 |
LuceneSearch luceneSearchByTaxonBase = prepareFindByFullTextSearch(queryString, classification, languages, highlightFragments); |
|
1242 |
|
|
1243 |
//FIXME use MultiSearcher .... |
|
1244 |
|
|
1245 |
// --- execute search |
|
1246 |
TopDocs topDocsResultSet = luceneSearchByDescriptionElement.executeSearch(clazz, pageSize, pageNumber); |
|
1247 |
|
|
1248 |
// --- initialize taxa, thighlight matches .... |
|
1249 |
ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearchByDescriptionElement, luceneSearchByDescriptionElement.getQuery()); |
|
1250 |
List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet( |
|
1251 |
topDocsResultSet, luceneSearchByDescriptionElement.getHighlightFields(), dao, "inDescription.taxon.id", propertyPaths); |
|
1252 |
|
|
1253 |
return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, searchResults.size(), pageSize, searchResults); |
|
1254 |
|
|
1255 |
} |
|
1256 |
|
|
1257 |
|
|
1258 |
/** |
|
1259 |
* @param queryString |
|
1260 |
* @param classification |
|
1261 |
* @param features |
|
1262 |
* @param languages |
|
1263 |
* @param highlightFragments |
|
1264 |
* @param directorySelectClass |
|
1265 |
* @return |
|
1266 |
*/ |
|
1267 |
public LuceneSearch prepareByDescriptionElementFullTextSearch(String queryString, Classification classification, List<Feature> features, |
|
1268 |
List<Language> languages, boolean highlightFragments) { |
|
1269 |
BooleanQuery finalQuery = new BooleanQuery(); |
|
1270 |
BooleanQuery textQuery = new BooleanQuery(); |
|
1271 |
|
|
1272 |
LuceneSearch luceneSearch = new LuceneSearch(getSession(), DescriptionElementBase.class); |
|
1273 |
QueryFactory queryFactory = new QueryFactory(luceneSearch); |
|
1274 |
|
|
1275 |
SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("inDescription.taxon.titleCache__sort", false)}; |
|
1276 |
luceneSearch.setSortFields(sortFields); |
|
1277 |
|
|
1143 |
Set<String> freetextFields = new HashSet<String>(); |
|
1278 | 1144 |
// ---- search criteria |
1279 |
textQuery.add(queryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD); |
|
1280 |
|
|
1145 |
freetextFields.add("titleCache"); |
|
1146 |
StringBuilder luceneQueryTemplate = new StringBuilder(); |
|
1147 |
luceneQueryTemplate.append("+("); |
|
1148 |
luceneQueryTemplate.append("titleCache:(%1$s) "); |
|
1281 | 1149 |
// common name |
1282 |
Query nameQuery;
|
|
1150 |
freetextFields.add("name");
|
|
1283 | 1151 |
if(languages == null || languages.size() == 0){ |
1284 |
nameQuery = queryFactory.newTermQuery("name", queryString);
|
|
1152 |
luceneQueryTemplate.append("name:(%1$s) ");
|
|
1285 | 1153 |
} else { |
1286 |
nameQuery = new BooleanQuery(); |
|
1287 |
BooleanQuery languageSubQuery = new BooleanQuery(); |
|
1154 |
luceneQueryTemplate.append("(+name:(%1$s) "); |
|
1288 | 1155 |
for(Language lang : languages){ |
1289 |
languageSubQuery.add(queryFactory.newTermQuery("language.uuid", lang.getUuid().toString()), Occur.SHOULD);
|
|
1156 |
luceneQueryTemplate.append(" +language.uuid:" + lang.getUuid().toString());
|
|
1290 | 1157 |
} |
1291 |
((BooleanQuery) nameQuery).add(queryFactory.newTermQuery("name", queryString), Occur.MUST); |
|
1292 |
((BooleanQuery) nameQuery).add(languageSubQuery, Occur.MUST); |
|
1158 |
luceneQueryTemplate.append(")"); |
|
1293 | 1159 |
} |
1294 |
textQuery.add(nameQuery, Occur.SHOULD); |
|
1295 |
|
|
1296 |
|
|
1297 | 1160 |
// text field from TextData |
1298 |
textQuery.add(queryFactory.newLocalizedTermQuery("text", queryString, languages), Occur.SHOULD); |
|
1299 |
|
|
1300 |
// --- TermBase fields - by representation ---- |
|
1161 |
freetextFields.add("text.ALL"); |
|
1162 |
appendLocalizedFieldQuery("text", languages, luceneQueryTemplate).append(" "); |
|
1301 | 1163 |
// state field from CategoricalData |
1302 |
textQuery.add(queryFactory.newLocalizedTermQuery("states.state.representation", queryString, languages), Occur.SHOULD);
|
|
1303 |
|
|
1164 |
freetextFields.add("states.state.representation.ALL");
|
|
1165 |
appendLocalizedFieldQuery("states.state.representation", languages, luceneQueryTemplate).append(" "); |
|
1304 | 1166 |
// state field from CategoricalData |
1305 |
textQuery.add(queryFactory.newLocalizedTermQuery("states.modifyingText", queryString, languages), Occur.SHOULD); |
|
1306 |
|
|
1307 |
finalQuery.add(textQuery, Occur.MUST); |
|
1308 |
// --- classification ---- |
|
1167 |
freetextFields.add("states.modifyingText.ALL"); |
|
1168 |
appendLocalizedFieldQuery("states.modifyingText", languages, luceneQueryTemplate).append(" "); |
|
1169 |
luceneQueryTemplate.append(") "); |
|
1309 | 1170 |
|
1310 | 1171 |
if(classification != null){ |
1311 |
finalQuery.add(queryFactory.newEntityIdQuery("inDescription.taxon.taxonNodes.classification.id", classification), Occur.MUST);
|
|
1172 |
luceneQueryTemplate.append("+inDescription.taxon.taxonNodes.classification.id:").append(PaddedIntegerBridge.paddInteger(classification.getId())).append(" ");
|
|
1312 | 1173 |
} |
1313 | 1174 |
|
1314 |
// --- IdentifieableEntity fields - by uuid |
|
1315 | 1175 |
if(features != null && features.size() > 0 ){ |
1316 |
finalQuery.add(queryFactory.newEntityUuidQuery("feature.uuid", features), Occur.MUST); |
|
1176 |
luceneQueryTemplate.append("+feature.uuid:("); |
|
1177 |
for(Feature feature : features){ |
|
1178 |
luceneQueryTemplate.append(feature.getUuid()).append(" "); |
|
1179 |
} |
|
1180 |
luceneQueryTemplate.append(") "); |
|
1317 | 1181 |
} |
1318 | 1182 |
|
1319 | 1183 |
// the description must be associated with a taxon |
1320 |
finalQuery.add(queryFactory.newIdNotNullQuery("inDescription.taxon.id"), Occur.MUST); |
|
1184 |
// TODO open range queries [0 TO *] not working in the current version of lucene (https://issues.apache.org/jira/browse/LUCENE-995) |
|
1185 |
// so we are using integer maximum as workaround |
|
1186 |
luceneQueryTemplate.append("+inDescription.taxon.id:[ " + PaddedIntegerBridge.paddInteger(0) + " TO " + PaddedIntegerBridge.paddInteger(Integer.MAX_VALUE) + "] "); |
|
1187 |
//luceneQueryTemplate.append("-inDescription.taxon.id:" + PaddedIntegerBridge.NULL_STRING); |
|
1188 |
|
|
1189 |
String luceneQueryStr = String.format(luceneQueryTemplate.toString(), queryString); |
|
1190 |
|
|
1191 |
// --- sort fields |
|
1192 |
SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("inDescription.taxon.titleCache__sort", false)}; |
|
1193 |
|
|
1194 |
// ---- execute criteria |
|
1195 |
LuceneSearch luceneSearch = new LuceneSearch(getSession(), directorySelectClass); |
|
1321 | 1196 |
|
1322 |
luceneSearch.setQuery(finalQuery); |
|
1197 |
Query luceneQuery = luceneSearch.parse(luceneQueryStr); |
|
1198 |
TopDocs topDocsResultSet = luceneSearch.executeSearch(luceneQuery, clazz, pageSize, pageNumber, sortFields); |
|
1323 | 1199 |
|
1200 |
String[] highlightFields = null; |
|
1324 | 1201 |
if(highlightFragments){ |
1325 |
luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray());
|
|
1202 |
highlightFields = freetextFields.toArray(new String[freetextFields.size()]);
|
|
1326 | 1203 |
} |
1327 |
return luceneSearch; |
|
1204 |
|
|
1205 |
// initialize taxa, thighlight matches .... |
|
1206 |
ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearch, luceneQuery); |
|
1207 |
List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet( |
|
1208 |
topDocsResultSet, highlightFields, dao, "inDescription.taxon.id", propertyPaths); |
|
1209 |
|
|
1210 |
return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, searchResults.size(), pageSize, searchResults); |
|
1211 |
|
|
1328 | 1212 |
} |
1329 | 1213 |
|
1330 | 1214 |
/** |
Also available in: Unified diff
reverting erroneously commits r16027 to r16031