Revision d716c44b
Added by Andreas Müller almost 11 years ago
cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/TaxonServiceImpl.java | ||
---|---|---|
22 | 22 |
|
23 | 23 |
import org.apache.log4j.Logger; |
24 | 24 |
import org.apache.lucene.index.CorruptIndexException; |
25 |
import org.apache.lucene.index.IndexReader; |
|
26 | 25 |
import org.apache.lucene.queryParser.ParseException; |
27 | 26 |
import org.apache.lucene.search.BooleanClause.Occur; |
27 |
import org.apache.lucene.search.BooleanFilter; |
|
28 | 28 |
import org.apache.lucene.search.BooleanQuery; |
29 |
import org.apache.lucene.search.IndexSearcher;
|
|
29 |
import org.apache.lucene.search.DocIdSet;
|
|
30 | 30 |
import org.apache.lucene.search.Query; |
31 |
import org.apache.lucene.search.QueryWrapperFilter; |
|
31 | 32 |
import org.apache.lucene.search.SortField; |
32 |
import org.apache.lucene.search.join.JoinUtil; |
|
33 | 33 |
import org.springframework.beans.factory.annotation.Autowired; |
34 | 34 |
import org.springframework.stereotype.Service; |
35 | 35 |
import org.springframework.transaction.annotation.Transactional; |
... | ... | |
43 | 43 |
import eu.etaxonomy.cdm.api.service.exception.ReferencedObjectUndeletableException; |
44 | 44 |
import eu.etaxonomy.cdm.api.service.pager.Pager; |
45 | 45 |
import eu.etaxonomy.cdm.api.service.pager.impl.DefaultPagerImpl; |
46 |
import eu.etaxonomy.cdm.api.service.search.DocIdBitSetPrinter; |
|
47 |
import eu.etaxonomy.cdm.api.service.search.ILuceneIndexToolProvider; |
|
46 | 48 |
import eu.etaxonomy.cdm.api.service.search.ISearchResultBuilder; |
47 | 49 |
import eu.etaxonomy.cdm.api.service.search.LuceneMultiSearch; |
48 | 50 |
import eu.etaxonomy.cdm.api.service.search.LuceneMultiSearchException; |
... | ... | |
70 | 72 |
import eu.etaxonomy.cdm.model.description.CommonTaxonName; |
71 | 73 |
import eu.etaxonomy.cdm.model.description.DescriptionBase; |
72 | 74 |
import eu.etaxonomy.cdm.model.description.DescriptionElementBase; |
75 |
import eu.etaxonomy.cdm.model.description.Distribution; |
|
73 | 76 |
import eu.etaxonomy.cdm.model.description.Feature; |
74 | 77 |
import eu.etaxonomy.cdm.model.description.IIdentificationKey; |
75 | 78 |
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode; |
79 |
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase; |
|
76 | 80 |
import eu.etaxonomy.cdm.model.description.SpecimenDescription; |
77 | 81 |
import eu.etaxonomy.cdm.model.description.TaxonDescription; |
78 | 82 |
import eu.etaxonomy.cdm.model.description.TaxonInteraction; |
... | ... | |
152 | 156 |
@Autowired |
153 | 157 |
private AbstractBeanInitializer beanInitializer; |
154 | 158 |
|
155 |
private static IndexSearcher taxonRelationshipSearcher; |
|
159 |
@Autowired |
|
160 |
private ILuceneIndexToolProvider luceneIndexToolProvider; |
|
161 |
|
|
156 | 162 |
|
157 | 163 |
/** |
158 | 164 |
* Constructor |
... | ... | |
420 | 426 |
* @see eu.etaxonomy.cdm.api.service.ITaxonService#findTaxaByName(java.lang.Class, java.lang.String, java.lang.String, java.lang.String, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, java.lang.Integer, java.lang.Integer) |
421 | 427 |
*/ |
422 | 428 |
@Override |
423 |
public Pager<TaxonBase> findTaxaByName(Class<? extends TaxonBase> clazz, String uninomial, String infragenericEpithet, String specificEpithet, String infraspecificEpithet, Rank rank, Integer pageSize,Integer pageNumber) { |
|
429 |
public Pager<TaxonBase> findTaxaByName(Class<? extends TaxonBase> clazz, |
|
430 |
String uninomial, String infragenericEpithet, String specificEpithet, |
|
431 |
String infraspecificEpithet, Rank rank, Integer pageSize,Integer pageNumber) { |
|
424 | 432 |
Integer numberOfResults = dao.countTaxaByName(clazz, uninomial, infragenericEpithet, specificEpithet, infraspecificEpithet, rank); |
425 | 433 |
|
426 | 434 |
List<TaxonBase> results = new ArrayList<TaxonBase>(); |
... | ... | |
431 | 439 |
return new DefaultPagerImpl<TaxonBase>(pageNumber, numberOfResults, pageSize, results); |
432 | 440 |
} |
433 | 441 |
|
442 |
|
|
434 | 443 |
/* (non-Javadoc) |
435 | 444 |
* @see eu.etaxonomy.cdm.api.service.ITaxonService#listTaxaByName(java.lang.Class, java.lang.String, java.lang.String, java.lang.String, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, java.lang.Integer, java.lang.Integer) |
436 | 445 |
*/ |
... | ... | |
1383 | 1392 |
return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, totalHits, pageSize, searchResults); |
1384 | 1393 |
} |
1385 | 1394 |
|
1395 |
@Override |
|
1396 |
public Pager<SearchResult<TaxonBase>> findByDistribution(List<NamedArea> areaFilter, List<PresenceAbsenceTermBase<?>> statusFilter, |
|
1397 |
Classification classification, |
|
1398 |
Integer pageSize, Integer pageNumber, |
|
1399 |
List<OrderHint> orderHints, List<String> propertyPaths) throws IOException, ParseException { |
|
1400 |
|
|
1401 |
LuceneSearch luceneSearch = prepareByDistributionSearch(areaFilter, statusFilter, classification); |
|
1402 |
|
|
1403 |
// --- execute search |
|
1404 |
TopGroupsWithMaxScore topDocsResultSet = luceneSearch.executeSearch(pageSize, pageNumber); |
|
1405 |
|
|
1406 |
Map<CdmBaseType, String> idFieldMap = new HashMap<CdmBaseType, String>(); |
|
1407 |
idFieldMap.put(CdmBaseType.TAXON, "id"); |
|
1408 |
|
|
1409 |
// --- initialize taxa, thighlight matches .... |
|
1410 |
ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearch, luceneSearch.getQuery()); |
|
1411 |
List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet( |
|
1412 |
topDocsResultSet, luceneSearch.getHighlightFields(), dao, idFieldMap, propertyPaths); |
|
1413 |
|
|
1414 |
int totalHits = topDocsResultSet != null ? topDocsResultSet.topGroups.totalGroupCount : 0; |
|
1415 |
return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, totalHits, pageSize, searchResults); |
|
1416 |
} |
|
1417 |
|
|
1386 | 1418 |
/** |
1387 | 1419 |
* @param clazz |
1388 | 1420 |
* @param queryString |
... | ... | |
1397 | 1429 |
BooleanQuery finalQuery = new BooleanQuery(); |
1398 | 1430 |
BooleanQuery textQuery = new BooleanQuery(); |
1399 | 1431 |
|
1400 |
LuceneSearch luceneSearch = new LuceneSearch(getSession(), GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, TaxonBase.class);
|
|
1401 |
QueryFactory queryFactory = new QueryFactory(luceneSearch);
|
|
1432 |
LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, TaxonBase.class);
|
|
1433 |
QueryFactory taxonBaseQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(TaxonBase.class);
|
|
1402 | 1434 |
|
1403 | 1435 |
SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", SortField.STRING, false)}; |
1404 | 1436 |
luceneSearch.setSortFields(sortFields); |
1405 | 1437 |
|
1406 | 1438 |
// ---- search criteria |
1407 |
luceneSearch.setClazz(clazz);
|
|
1439 |
luceneSearch.setCdmTypRestriction(clazz);
|
|
1408 | 1440 |
|
1409 |
textQuery.add(queryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD);
|
|
1410 |
textQuery.add(queryFactory.newDefinedTermQuery("name.rank", queryString, languages), Occur.SHOULD);
|
|
1441 |
textQuery.add(taxonBaseQueryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD);
|
|
1442 |
textQuery.add(taxonBaseQueryFactory.newDefinedTermQuery("name.rank", queryString, languages), Occur.SHOULD);
|
|
1411 | 1443 |
|
1412 | 1444 |
finalQuery.add(textQuery, Occur.MUST); |
1413 | 1445 |
|
1414 | 1446 |
if(classification != null){ |
1415 |
finalQuery.add(queryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);
|
|
1447 |
finalQuery.add(taxonBaseQueryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);
|
|
1416 | 1448 |
} |
1417 | 1449 |
luceneSearch.setQuery(finalQuery); |
1418 | 1450 |
|
1419 | 1451 |
if(highlightFragments){ |
1420 |
luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray());
|
|
1452 |
luceneSearch.setHighlightFields(taxonBaseQueryFactory.getTextFieldNamesAsArray());
|
|
1421 | 1453 |
} |
1422 | 1454 |
return luceneSearch; |
1423 | 1455 |
} |
... | ... | |
1439 | 1471 |
* @param languages |
1440 | 1472 |
* @param highlightFragments |
1441 | 1473 |
* @return |
1474 |
* @throws IOException |
|
1442 | 1475 |
*/ |
1443 | 1476 |
protected LuceneSearch prepareFindByTaxonRelationFullTextSearch(TaxonRelationshipEdge edge, String queryString, Classification classification, List<Language> languages, |
1444 |
boolean highlightFragments) { |
|
1477 |
boolean highlightFragments) throws IOException {
|
|
1445 | 1478 |
|
1446 |
String idField;
|
|
1479 |
String fromField;
|
|
1447 | 1480 |
String queryTermField; |
1448 | 1481 |
String toField = "id"; // TaxonBase.uuid |
1449 | 1482 |
|
... | ... | |
1451 | 1484 |
throw new RuntimeException("Bidirectional joining not supported!"); |
1452 | 1485 |
} |
1453 | 1486 |
if(edge.isEvers()){ |
1454 |
idField = "relatedFrom.id";
|
|
1487 |
fromField = "relatedFrom.id";
|
|
1455 | 1488 |
queryTermField = "relatedFrom.titleCache"; |
1456 | 1489 |
} else if(edge.isInvers()) { |
1457 |
idField = "relatedTo.id";
|
|
1490 |
fromField = "relatedTo.id";
|
|
1458 | 1491 |
queryTermField = "relatedTo.titleCache"; |
1459 | 1492 |
} else { |
1460 | 1493 |
throw new RuntimeException("Invalid direction: " + edge.getDirections()); |
1461 | 1494 |
} |
1462 | 1495 |
|
1463 | 1496 |
BooleanQuery finalQuery = new BooleanQuery(); |
1497 |
|
|
1498 |
LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, TaxonBase.class); |
|
1499 |
QueryFactory taxonBaseQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(TaxonBase.class); |
|
1500 |
|
|
1464 | 1501 |
BooleanQuery joinFromQuery = new BooleanQuery(); |
1465 |
Query joinQuery = null; |
|
1466 |
|
|
1467 |
LuceneSearch luceneSearch = new LuceneSearch(getSession(), TaxonBase.class); |
|
1468 |
QueryFactory queryFactory = new QueryFactory(luceneSearch); |
|
1469 |
|
|
1470 |
joinFromQuery.add(queryFactory.newTermQuery(queryTermField, queryString), Occur.MUST); |
|
1471 |
joinFromQuery.add(queryFactory.newEntityIdQuery("type.id", edge.getTaxonRelationshipType()), Occur.MUST); |
|
1472 |
try { |
|
1473 |
// TODO move into QueryFactory if possible |
|
1474 |
if(taxonRelationshipSearcher == null){ |
|
1475 |
IndexReader taxonRelationshipReader = luceneSearch.getIndexReaderFor(TaxonRelationship.class); |
|
1476 |
taxonRelationshipSearcher = new IndexSearcher(taxonRelationshipReader); |
|
1477 |
taxonRelationshipSearcher.setDefaultFieldSortScoring(true, true); |
|
1478 |
} |
|
1479 |
joinQuery = JoinUtil.createJoinQuery(idField, toField, joinFromQuery, taxonRelationshipSearcher); |
|
1480 |
// end of possible move |
|
1481 |
} catch (IOException e) { |
|
1482 |
logger.error(e); |
|
1483 |
} |
|
1502 |
joinFromQuery.add(taxonBaseQueryFactory.newTermQuery(queryTermField, queryString), Occur.MUST); |
|
1503 |
joinFromQuery.add(taxonBaseQueryFactory.newEntityIdQuery("type.id", edge.getTaxonRelationshipType()), Occur.MUST); |
|
1504 |
Query joinQuery = taxonBaseQueryFactory.newJoinQuery(fromField, toField, joinFromQuery, TaxonRelationship.class); |
|
1484 | 1505 |
|
1485 | 1506 |
SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", SortField.STRING, false)}; |
1486 | 1507 |
luceneSearch.setSortFields(sortFields); |
... | ... | |
1488 | 1509 |
finalQuery.add(joinQuery, Occur.MUST); |
1489 | 1510 |
|
1490 | 1511 |
if(classification != null){ |
1491 |
finalQuery.add(queryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);
|
|
1512 |
finalQuery.add(taxonBaseQueryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);
|
|
1492 | 1513 |
} |
1493 | 1514 |
luceneSearch.setQuery(finalQuery); |
1494 | 1515 |
|
1495 | 1516 |
if(highlightFragments){ |
1496 |
luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray());
|
|
1517 |
luceneSearch.setHighlightFields(taxonBaseQueryFactory.getTextFieldNamesAsArray());
|
|
1497 | 1518 |
} |
1498 | 1519 |
return luceneSearch; |
1499 | 1520 |
} |
... | ... | |
1507 | 1528 |
@Override |
1508 | 1529 |
public Pager<SearchResult<TaxonBase>> findTaxaAndNamesByFullText( |
1509 | 1530 |
EnumSet<TaxaAndNamesSearchMode> searchModes, String queryString, Classification classification, |
1510 |
Set<NamedArea> namedAreas, List<Language> languages, boolean highlightFragments, Integer pageSize, |
|
1531 |
Set<NamedArea> namedAreas, Set<PresenceAbsenceTermBase<?>> distributionStatus, List<Language> languages, |
|
1532 |
boolean highlightFragments, Integer pageSize, |
|
1511 | 1533 |
Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths) |
1512 | 1534 |
throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException { |
1513 | 1535 |
|
1536 |
if(highlightFragments){ |
|
1537 |
logger.warn("findTaxaAndNamesByFullText() : fragment highlighting is " + |
|
1538 |
"currently not fully supported by this method and thus " + |
|
1539 |
"may not work with common names and misapplied names."); |
|
1540 |
} |
|
1541 |
|
|
1542 |
// convert sets to lists |
|
1543 |
List<NamedArea> namedAreaList = null; |
|
1544 |
List<PresenceAbsenceTermBase<?>>distributionStatusList = null; |
|
1545 |
if(namedAreas != null){ |
|
1546 |
namedAreaList = new ArrayList<NamedArea>(namedAreas.size()); |
|
1547 |
namedAreaList.addAll(namedAreas); |
|
1548 |
} |
|
1549 |
if(distributionStatus != null){ |
|
1550 |
distributionStatusList = new ArrayList<PresenceAbsenceTermBase<?>>(distributionStatus.size()); |
|
1551 |
distributionStatusList.addAll(distributionStatus); |
|
1552 |
} |
|
1553 |
|
|
1514 | 1554 |
// set default if parameter is null |
1515 | 1555 |
if(searchModes == null){ |
1516 | 1556 |
searchModes = EnumSet.of(TaxaAndNamesSearchMode.doTaxa); |
1517 | 1557 |
} |
1518 | 1558 |
|
1559 |
boolean addDistributionFilter = namedAreas != null && namedAreas.size() > 0; |
|
1560 |
|
|
1519 | 1561 |
List<LuceneSearch> luceneSearches = new ArrayList<LuceneSearch>(); |
1520 | 1562 |
Map<CdmBaseType, String> idFieldMap = new HashMap<CdmBaseType, String>(); |
1521 | 1563 |
|
1522 |
|
|
1564 |
/* |
|
1565 |
======== filtering by distribution , HOWTO ======== |
|
1566 |
|
|
1567 |
- http://www.javaranch.com/journal/2009/02/filtering-a-lucene-search.html |
|
1568 |
- http://stackoverflow.com/questions/17709256/lucene-solr-using-complex-filters -> QueryWrapperFilter |
|
1569 |
add Filter to search as http://lucene.apache.org/core/3_6_0/api/all/org/apache/lucene/search/Filter.html |
|
1570 |
which will be put into a FilteredQuersy in the end ? |
|
1571 |
|
|
1572 |
|
|
1573 |
3. how does it work in spatial? |
|
1574 |
see |
|
1575 |
- http://www.nsshutdown.com/projects/lucene/whitepaper/locallucene_v2.html |
|
1576 |
- http://www.infoq.com/articles/LuceneSpatialSupport |
|
1577 |
- http://www.mhaller.de/archives/156-Spatial-search-with-Lucene.html |
|
1578 |
------------------------------------------------------------------------ |
|
1579 |
|
|
1580 |
filter strategies: |
|
1581 |
A) use a separate distribution filter per index sub-query/search: |
|
1582 |
- byTaxonSyonym (query TaxaonBase): |
|
1583 |
use a join area filter (Distribution -> TaxonBase) |
|
1584 |
- byCommonName (query DescriptionElementBase): use an area filter on |
|
1585 |
DescriptionElementBase !!! PROBLEM !!! |
|
1586 |
This cannot work since the distributions are different entities than the |
|
1587 |
common names and thus these are different lucene documents. |
|
1588 |
- byMisaplliedNames (join query TaxonRelationship -> TaxaonBase): |
|
1589 |
use a join area filter (Distribution -> TaxonBase) |
|
1590 |
|
|
1591 |
B) use a common distribution filter for all index sub-query/searches: |
|
1592 |
- use a common join area filter (Distribution -> TaxonBase) |
|
1593 |
- also implement the byCommonName as join query (CommonName -> TaxonBase) |
|
1594 |
PROBLEM in this case: we are losing the fragment highlighting for the |
|
1595 |
common names, since the returned documents are always TaxonBases |
|
1596 |
*/ |
|
1597 |
|
|
1598 |
/* The QueryFactory for creating filter queries on Distributions should |
|
1599 |
* The query factory used for the common names query cannot be reused |
|
1600 |
* for this case, since we want to only record the text fields which are |
|
1601 |
* actually used in the primary query |
|
1602 |
*/ |
|
1603 |
QueryFactory distributionFilterQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(Distribution.class); |
|
1604 |
|
|
1605 |
BooleanFilter multiIndexByAreaFilter = new BooleanFilter(); |
|
1606 |
|
|
1607 |
|
|
1608 |
// search for taxa or synonyms |
|
1523 | 1609 |
if(searchModes.contains(TaxaAndNamesSearchMode.doTaxa) || searchModes.contains(TaxaAndNamesSearchMode.doSynonyms)) { |
1524 | 1610 |
Class taxonBaseSubclass = TaxonBase.class; |
1525 | 1611 |
if(searchModes.contains(TaxaAndNamesSearchMode.doTaxa) && !searchModes.contains(TaxaAndNamesSearchMode.doSynonyms)){ |
... | ... | |
1529 | 1615 |
} |
1530 | 1616 |
luceneSearches.add(prepareFindByFullTextSearch(taxonBaseSubclass, queryString, classification, languages, highlightFragments)); |
1531 | 1617 |
idFieldMap.put(CdmBaseType.TAXON, "id"); |
1618 |
/* A) does not work!!!! |
|
1619 |
if(addDistributionFilter){ |
|
1620 |
// in this case we need a filter which uses a join query |
|
1621 |
// to get the TaxonBase documents for the DescriptionElementBase documents |
|
1622 |
// which are matching the areas in question |
|
1623 |
Query taxonAreaJoinQuery = createByDistributionJoinQuery( |
|
1624 |
namedAreaList, |
|
1625 |
distributionStatusList, |
|
1626 |
distributionFilterQueryFactory |
|
1627 |
); |
|
1628 |
multiIndexByAreaFilter.add(new QueryWrapperFilter(taxonAreaJoinQuery), Occur.SHOULD); |
|
1629 |
} |
|
1630 |
*/ |
|
1631 |
if(addDistributionFilter && searchModes.contains(TaxaAndNamesSearchMode.doSynonyms)){ |
|
1632 |
// add additional area filter for synonyms |
|
1633 |
String fromField = "inDescription.taxon.id"; // in DescriptionElementBase index |
|
1634 |
String toField = "accTaxon.id"; // id in TaxonBase index |
|
1635 |
|
|
1636 |
BooleanQuery byDistributionQuery = createByDistributionQuery(namedAreaList, distributionStatusList, distributionFilterQueryFactory); |
|
1637 |
|
|
1638 |
Query taxonAreaJoinQuery = distributionFilterQueryFactory.newJoinQuery(fromField, toField, byDistributionQuery, Distribution.class); |
|
1639 |
multiIndexByAreaFilter.add(new QueryWrapperFilter(taxonAreaJoinQuery), Occur.SHOULD); |
|
1640 |
|
|
1641 |
} |
|
1532 | 1642 |
} |
1643 |
|
|
1644 |
// search by CommonTaxonName |
|
1533 | 1645 |
if(searchModes.contains(TaxaAndNamesSearchMode.doTaxaByCommonNames)) { |
1534 |
luceneSearches.add(prepareByDescriptionElementFullTextSearch(CommonTaxonName.class, queryString, classification, null, languages, highlightFragments)); |
|
1646 |
// B) |
|
1647 |
QueryFactory descriptionElementQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(DescriptionElementBase.class); |
|
1648 |
Query byCommonNameJoinQuery = descriptionElementQueryFactory.newJoinQuery( |
|
1649 |
"inDescription.taxon.id", |
|
1650 |
"id", |
|
1651 |
createByDescriptionElementFullTextQuery(queryString, classification, null, languages, descriptionElementQueryFactory), |
|
1652 |
CommonTaxonName.class); |
|
1653 |
logger.debug("byCommonNameJoinQuery: " + byCommonNameJoinQuery.toString()); |
|
1654 |
LuceneSearch byCommonNameSearch = new LuceneSearch(luceneIndexToolProvider, GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, Taxon.class); |
|
1655 |
byCommonNameSearch.setCdmTypRestriction(Taxon.class); |
|
1656 |
byCommonNameSearch.setQuery(byCommonNameJoinQuery); |
|
1657 |
idFieldMap.put(CdmBaseType.TAXON, "id"); |
|
1658 |
|
|
1659 |
luceneSearches.add(byCommonNameSearch); |
|
1660 |
|
|
1661 |
/* A) does not work!!!! |
|
1662 |
luceneSearches.add( |
|
1663 |
prepareByDescriptionElementFullTextSearch(CommonTaxonName.class, |
|
1664 |
queryString, classification, null, languages, highlightFragments) |
|
1665 |
); |
|
1535 | 1666 |
idFieldMap.put(CdmBaseType.DESCRIPTION_ELEMENT, "inDescription.taxon.id"); |
1536 |
} |
|
1667 |
if(addDistributionFilter){ |
|
1668 |
// in this case we are able to use DescriptionElementBase documents |
|
1669 |
// which are matching the areas in question directly |
|
1670 |
BooleanQuery byDistributionQuery = createByDistributionQuery( |
|
1671 |
namedAreaList, |
|
1672 |
distributionStatusList, |
|
1673 |
distributionFilterQueryFactory |
|
1674 |
); |
|
1675 |
multiIndexByAreaFilter.add(new QueryWrapperFilter(byDistributionQuery), Occur.SHOULD); |
|
1676 |
} */ |
|
1677 |
} |
|
1678 |
|
|
1679 |
// search by misapplied names |
|
1537 | 1680 |
if(searchModes.contains(TaxaAndNamesSearchMode.doMisappliedNames)) { |
1538 | 1681 |
// NOTE: |
1539 | 1682 |
// prepareFindByTaxonRelationFullTextSearch() is making use of JoinUtil.createJoinQuery() |
1540 | 1683 |
// which allows doing query time joins |
1684 |
// finds the misapplied name (Taxon B) which is an misapplication for |
|
1685 |
// a related Taxon A. |
|
1686 |
// |
|
1541 | 1687 |
luceneSearches.add(prepareFindByTaxonRelationFullTextSearch( |
1542 | 1688 |
new TaxonRelationshipEdge(TaxonRelationshipType.MISAPPLIED_NAME_FOR(), Direction.relatedTo), |
1543 | 1689 |
queryString, classification, languages, highlightFragments)); |
1544 | 1690 |
idFieldMap.put(CdmBaseType.TAXON, "id"); |
1691 |
|
|
1692 |
if(addDistributionFilter){ |
|
1693 |
String fromField = "inDescription.taxon.id"; // in DescriptionElementBase index |
|
1694 |
|
|
1695 |
/* |
|
1696 |
* Here i was facing wired and nasty bug which took me bugging be really for hours until I found this solution. |
|
1697 |
* Maybe this is a but in java itself java. |
|
1698 |
* |
|
1699 |
* When the string toField is constructed by using the expression TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString() |
|
1700 |
* directly: |
|
1701 |
* |
|
1702 |
* String toField = "relation." + TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString() +".to.id"; |
|
1703 |
* |
|
1704 |
* The byDistributionQuery fails, however when the uuid is first stored in another string variable the query |
|
1705 |
* will execute as expected: |
|
1706 |
* |
|
1707 |
* String misappliedNameForUuid = TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString(); |
|
1708 |
* String toField = "relation." + misappliedNameForUuid +".to.id"; |
|
1709 |
* |
|
1710 |
* Comparing both strings by the String.equals method returns true, so both String are identical. |
|
1711 |
* |
|
1712 |
* The bug occurs when running eu.etaxonomy.cdm.api.service.TaxonServiceSearchTest in eclipse and in maven and seems to to be |
|
1713 |
* dependent from a specific jvm (openjdk6 6b27-1.12.6-1ubuntu0.13.04.2, openjdk7 7u25-2.3.10-1ubuntu0.13.04.2, oracle jdk1.7.0_25 tested) |
|
1714 |
* The bug is persistent after a reboot of the development computer. |
|
1715 |
*/ |
|
1716 |
// String misappliedNameForUuid = TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString(); |
|
1717 |
// String toField = "relation." + misappliedNameForUuid +".to.id"; |
|
1718 |
String toField = "relation.1ed87175-59dd-437e-959e-0d71583d8417.to.id"; |
|
1719 |
// System.out.println("relation.1ed87175-59dd-437e-959e-0d71583d8417.to.id".equals("relation." + misappliedNameForUuid +".to.id") ? " > identical" : " > different"); |
|
1720 |
// System.out.println("relation.1ed87175-59dd-437e-959e-0d71583d8417.to.id".equals("relation." + TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString() +".to.id") ? " > identical" : " > different"); |
|
1721 |
|
|
1722 |
BooleanQuery byDistributionQuery = createByDistributionQuery(namedAreaList, distributionStatusList, distributionFilterQueryFactory); |
|
1723 |
Query taxonAreaJoinQuery = distributionFilterQueryFactory.newJoinQuery(fromField, toField, byDistributionQuery, Distribution.class); |
|
1724 |
QueryWrapperFilter filter = new QueryWrapperFilter(taxonAreaJoinQuery); |
|
1725 |
|
|
1726 |
// debug code for bug described above |
|
1727 |
DocIdSet filterMatchSet = filter.getDocIdSet(luceneIndexToolProvider.getIndexReaderFor(Taxon.class)); |
|
1728 |
System.err.println(DocIdBitSetPrinter.docsAsString(filterMatchSet, 100)); |
|
1729 |
|
|
1730 |
multiIndexByAreaFilter.add(filter, Occur.SHOULD); |
|
1731 |
} |
|
1545 | 1732 |
} |
1546 | 1733 |
|
1547 |
// TODO implement area filter |
|
1734 |
LuceneMultiSearch multiSearch = new LuceneMultiSearch(luceneIndexToolProvider, |
|
1735 |
luceneSearches.toArray(new LuceneSearch[luceneSearches.size()])); |
|
1548 | 1736 |
|
1549 |
LuceneMultiSearch multiSearch = new LuceneMultiSearch(luceneSearches.toArray(new LuceneSearch[luceneSearches.size()])); |
|
1550 | 1737 |
|
1738 |
if(addDistributionFilter){ |
|
1739 |
|
|
1740 |
// B) |
|
1741 |
// in this case we need a filter which uses a join query |
|
1742 |
// to get the TaxonBase documents for the DescriptionElementBase documents |
|
1743 |
// which are matching the areas in question |
|
1744 |
// |
|
1745 |
// for toTaxa, doByCommonName |
|
1746 |
Query taxonAreaJoinQuery = createByDistributionJoinQuery( |
|
1747 |
namedAreaList, |
|
1748 |
distributionStatusList, |
|
1749 |
distributionFilterQueryFactory |
|
1750 |
); |
|
1751 |
multiIndexByAreaFilter.add(new QueryWrapperFilter(taxonAreaJoinQuery), Occur.SHOULD); |
|
1752 |
} |
|
1753 |
|
|
1754 |
if (addDistributionFilter){ |
|
1755 |
multiSearch.setFilter(multiIndexByAreaFilter); |
|
1756 |
} |
|
1551 | 1757 |
// --- execute search |
1552 | 1758 |
TopGroupsWithMaxScore topDocsResultSet = multiSearch.executeSearch(pageSize, pageNumber); |
1553 | 1759 |
|
... | ... | |
1562 | 1768 |
return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, totalHits, pageSize, searchResults); |
1563 | 1769 |
} |
1564 | 1770 |
|
1771 |
/** |
|
1772 |
* @param namedAreaList at least one area must be in the list |
|
1773 |
* @param distributionStatusList optional |
|
1774 |
* @return |
|
1775 |
* @throws IOException |
|
1776 |
*/ |
|
1777 |
protected Query createByDistributionJoinQuery( |
|
1778 |
List<NamedArea> namedAreaList, |
|
1779 |
List<PresenceAbsenceTermBase<?>> distributionStatusList, |
|
1780 |
QueryFactory queryFactory |
|
1781 |
) throws IOException { |
|
1782 |
|
|
1783 |
String fromField = "inDescription.taxon.id"; // in DescriptionElementBase index |
|
1784 |
String toField = "id"; // id in TaxonBase index |
|
1785 |
|
|
1786 |
BooleanQuery byDistributionQuery = createByDistributionQuery(namedAreaList, distributionStatusList, queryFactory); |
|
1787 |
|
|
1788 |
Query taxonAreaJoinQuery = queryFactory.newJoinQuery(fromField, toField, byDistributionQuery, Distribution.class); |
|
1789 |
|
|
1790 |
return taxonAreaJoinQuery; |
|
1791 |
} |
|
1792 |
|
|
1793 |
/** |
|
1794 |
* @param namedAreaList |
|
1795 |
* @param distributionStatusList |
|
1796 |
* @param queryFactory |
|
1797 |
* @return |
|
1798 |
*/ |
|
1799 |
private BooleanQuery createByDistributionQuery(List<NamedArea> namedAreaList, |
|
1800 |
List<PresenceAbsenceTermBase<?>> distributionStatusList, QueryFactory queryFactory) { |
|
1801 |
BooleanQuery areaQuery = new BooleanQuery(); |
|
1802 |
// area field from Distribution |
|
1803 |
areaQuery.add(queryFactory.newEntityIdsQuery("area.id", namedAreaList), Occur.MUST); |
|
1804 |
|
|
1805 |
// status field from Distribution |
|
1806 |
if(distributionStatusList != null && distributionStatusList.size() > 0){ |
|
1807 |
areaQuery.add(queryFactory.newEntityIdsQuery("status.id", distributionStatusList), Occur.MUST); |
|
1808 |
} |
|
1809 |
|
|
1810 |
logger.debug("createByDistributionQuery() query: " + areaQuery.toString()); |
|
1811 |
return areaQuery; |
|
1812 |
} |
|
1813 |
|
|
1814 |
/** |
|
1815 |
* This method has been primarily created for testing the area join query but might |
|
1816 |
* also be useful in other situations |
|
1817 |
* |
|
1818 |
* @param namedAreaList |
|
1819 |
* @param distributionStatusList |
|
1820 |
* @param classification |
|
1821 |
* @param highlightFragments |
|
1822 |
* @return |
|
1823 |
* @throws IOException |
|
1824 |
*/ |
|
1825 |
protected LuceneSearch prepareByDistributionSearch( |
|
1826 |
List<NamedArea> namedAreaList, List<PresenceAbsenceTermBase<?>> distributionStatusList, |
|
1827 |
Classification classification) throws IOException { |
|
1828 |
|
|
1829 |
BooleanQuery finalQuery = new BooleanQuery(); |
|
1830 |
|
|
1831 |
LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, Taxon.class); |
|
1832 |
|
|
1833 |
// FIXME is this query factory using the wrong type? |
|
1834 |
QueryFactory taxonQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(Taxon.class); |
|
1835 |
|
|
1836 |
SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", SortField.STRING, false)}; |
|
1837 |
luceneSearch.setSortFields(sortFields); |
|
1838 |
|
|
1839 |
|
|
1840 |
Query byAreaQuery = createByDistributionJoinQuery(namedAreaList, distributionStatusList, taxonQueryFactory); |
|
1841 |
|
|
1842 |
finalQuery.add(byAreaQuery, Occur.MUST); |
|
1843 |
|
|
1844 |
if(classification != null){ |
|
1845 |
finalQuery.add(taxonQueryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST); |
|
1846 |
} |
|
1847 |
|
|
1848 |
logger.info("prepareByAreaSearch() query: " + finalQuery.toString()); |
|
1849 |
luceneSearch.setQuery(finalQuery); |
|
1850 |
|
|
1851 |
return luceneSearch; |
|
1852 |
} |
|
1853 |
|
|
1854 |
|
|
1855 |
|
|
1565 | 1856 |
/* (non-Javadoc) |
1566 | 1857 |
* @see eu.etaxonomy.cdm.api.service.ITaxonService#findByDescriptionElementFullText(java.lang.Class, java.lang.String, eu.etaxonomy.cdm.model.taxon.Classification, java.util.List, java.util.List, boolean, java.lang.Integer, java.lang.Integer, java.util.List, java.util.List) |
1567 | 1858 |
*/ |
... | ... | |
1600 | 1891 |
LuceneSearch luceneSearchByDescriptionElement = prepareByDescriptionElementFullTextSearch(null, queryString, classification, null, languages, highlightFragments); |
1601 | 1892 |
LuceneSearch luceneSearchByTaxonBase = prepareFindByFullTextSearch(null, queryString, classification, languages, highlightFragments); |
1602 | 1893 |
|
1603 |
LuceneMultiSearch multiSearch = new LuceneMultiSearch(luceneSearchByDescriptionElement, luceneSearchByTaxonBase); |
|
1894 |
LuceneMultiSearch multiSearch = new LuceneMultiSearch(luceneIndexToolProvider, luceneSearchByDescriptionElement, luceneSearchByTaxonBase);
|
|
1604 | 1895 |
|
1605 | 1896 |
// --- execute search |
1606 | 1897 |
TopGroupsWithMaxScore topDocsResultSet = multiSearch.executeSearch(pageSize, pageNumber); |
... | ... | |
1631 | 1922 |
* @param directorySelectClass |
1632 | 1923 |
* @return |
1633 | 1924 |
*/ |
1634 |
protected LuceneSearch prepareByDescriptionElementFullTextSearch(Class<? extends CdmBase> clazz, String queryString, Classification classification, List<Feature> features, |
|
1925 |
protected LuceneSearch prepareByDescriptionElementFullTextSearch(Class<? extends CdmBase> clazz, |
|
1926 |
String queryString, Classification classification, List<Feature> features, |
|
1635 | 1927 |
List<Language> languages, boolean highlightFragments) { |
1636 |
BooleanQuery finalQuery = new BooleanQuery(); |
|
1637 |
BooleanQuery textQuery = new BooleanQuery(); |
|
1638 | 1928 |
|
1639 |
LuceneSearch luceneSearch = new LuceneSearch(getSession(), GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, DescriptionElementBase.class);
|
|
1640 |
QueryFactory queryFactory = new QueryFactory(luceneSearch);
|
|
1929 |
LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, DescriptionElementBase.class);
|
|
1930 |
QueryFactory descriptionElementQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(DescriptionElementBase.class);
|
|
1641 | 1931 |
|
1642 | 1932 |
SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("inDescription.taxon.titleCache__sort", SortField.STRING, false)}; |
1933 |
|
|
1934 |
BooleanQuery finalQuery = createByDescriptionElementFullTextQuery(queryString, classification, features, |
|
1935 |
languages, descriptionElementQueryFactory); |
|
1936 |
|
|
1643 | 1937 |
luceneSearch.setSortFields(sortFields); |
1938 |
luceneSearch.setCdmTypRestriction(clazz); |
|
1939 |
luceneSearch.setQuery(finalQuery); |
|
1940 |
if(highlightFragments){ |
|
1941 |
luceneSearch.setHighlightFields(descriptionElementQueryFactory.getTextFieldNamesAsArray()); |
|
1942 |
} |
|
1644 | 1943 |
|
1645 |
// ---- search criteria |
|
1646 |
luceneSearch.setClazz(clazz); |
|
1647 |
textQuery.add(queryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD); |
|
1944 |
return luceneSearch; |
|
1945 |
} |
|
1946 |
|
|
1947 |
/** |
|
1948 |
* @param queryString |
|
1949 |
* @param classification |
|
1950 |
* @param features |
|
1951 |
* @param languages |
|
1952 |
* @param descriptionElementQueryFactory |
|
1953 |
* @return |
|
1954 |
*/ |
|
1955 |
private BooleanQuery createByDescriptionElementFullTextQuery(String queryString, Classification classification, |
|
1956 |
List<Feature> features, List<Language> languages, QueryFactory descriptionElementQueryFactory) { |
|
1957 |
BooleanQuery finalQuery = new BooleanQuery(); |
|
1958 |
BooleanQuery textQuery = new BooleanQuery(); |
|
1959 |
textQuery.add(descriptionElementQueryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD); |
|
1648 | 1960 |
|
1649 | 1961 |
// common name |
1650 | 1962 |
Query nameQuery; |
1651 | 1963 |
if(languages == null || languages.size() == 0){ |
1652 |
nameQuery = queryFactory.newTermQuery("name", queryString);
|
|
1964 |
nameQuery = descriptionElementQueryFactory.newTermQuery("name", queryString);
|
|
1653 | 1965 |
} else { |
1654 | 1966 |
nameQuery = new BooleanQuery(); |
1655 | 1967 |
BooleanQuery languageSubQuery = new BooleanQuery(); |
1656 | 1968 |
for(Language lang : languages){ |
1657 |
languageSubQuery.add(queryFactory.newTermQuery("language.uuid", lang.getUuid().toString(), false), Occur.SHOULD);
|
|
1969 |
languageSubQuery.add(descriptionElementQueryFactory.newTermQuery("language.uuid", lang.getUuid().toString(), false), Occur.SHOULD);
|
|
1658 | 1970 |
} |
1659 |
((BooleanQuery) nameQuery).add(queryFactory.newTermQuery("name", queryString), Occur.MUST);
|
|
1971 |
((BooleanQuery) nameQuery).add(descriptionElementQueryFactory.newTermQuery("name", queryString), Occur.MUST);
|
|
1660 | 1972 |
((BooleanQuery) nameQuery).add(languageSubQuery, Occur.MUST); |
1661 | 1973 |
} |
1662 | 1974 |
textQuery.add(nameQuery, Occur.SHOULD); |
1663 | 1975 |
|
1664 | 1976 |
|
1665 | 1977 |
// text field from TextData |
1666 |
textQuery.add(queryFactory.newMultilanguageTextQuery("text", queryString, languages), Occur.SHOULD);
|
|
1978 |
textQuery.add(descriptionElementQueryFactory.newMultilanguageTextQuery("text", queryString, languages), Occur.SHOULD);
|
|
1667 | 1979 |
|
1668 | 1980 |
// --- TermBase fields - by representation ---- |
1669 | 1981 |
// state field from CategoricalData |
1670 |
textQuery.add(queryFactory.newDefinedTermQuery("stateData.state", queryString, languages), Occur.SHOULD);
|
|
1982 |
textQuery.add(descriptionElementQueryFactory.newDefinedTermQuery("stateData.state", queryString, languages), Occur.SHOULD);
|
|
1671 | 1983 |
|
1672 | 1984 |
// state field from CategoricalData |
1673 |
textQuery.add(queryFactory.newDefinedTermQuery("stateData.modifyingText", queryString, languages), Occur.SHOULD);
|
|
1985 |
textQuery.add(descriptionElementQueryFactory.newDefinedTermQuery("stateData.modifyingText", queryString, languages), Occur.SHOULD);
|
|
1674 | 1986 |
|
1675 | 1987 |
// area field from Distribution |
1676 |
textQuery.add(queryFactory.newDefinedTermQuery("area", queryString, languages), Occur.SHOULD);
|
|
1988 |
textQuery.add(descriptionElementQueryFactory.newDefinedTermQuery("area", queryString, languages), Occur.SHOULD);
|
|
1677 | 1989 |
|
1678 | 1990 |
// status field from Distribution |
1679 |
textQuery.add(queryFactory.newDefinedTermQuery("status", queryString, languages), Occur.SHOULD);
|
|
1991 |
textQuery.add(descriptionElementQueryFactory.newDefinedTermQuery("status", queryString, languages), Occur.SHOULD);
|
|
1680 | 1992 |
|
1681 | 1993 |
finalQuery.add(textQuery, Occur.MUST); |
1682 | 1994 |
// --- classification ---- |
1683 | 1995 |
|
1684 | 1996 |
if(classification != null){ |
1685 |
finalQuery.add(queryFactory.newEntityIdQuery("inDescription.taxon.taxonNodes.classification.id", classification), Occur.MUST);
|
|
1997 |
finalQuery.add(descriptionElementQueryFactory.newEntityIdQuery("inDescription.taxon.taxonNodes.classification.id", classification), Occur.MUST);
|
|
1686 | 1998 |
} |
1687 | 1999 |
|
1688 | 2000 |
// --- IdentifieableEntity fields - by uuid |
1689 | 2001 |
if(features != null && features.size() > 0 ){ |
1690 |
finalQuery.add(queryFactory.newEntityUuidQuery("feature.uuid", features), Occur.MUST);
|
|
2002 |
finalQuery.add(descriptionElementQueryFactory.newEntityUuidsQuery("feature.uuid", features), Occur.MUST);
|
|
1691 | 2003 |
} |
1692 | 2004 |
|
1693 | 2005 |
// the description must be associated with a taxon |
1694 |
finalQuery.add(queryFactory.newIsNotNullQuery("inDescription.taxon.id"), Occur.MUST);
|
|
2006 |
finalQuery.add(descriptionElementQueryFactory.newIsNotNullQuery("inDescription.taxon.id"), Occur.MUST);
|
|
1695 | 2007 |
|
1696 | 2008 |
logger.info("prepareByDescriptionElementFullTextSearch() query: " + finalQuery.toString()); |
1697 |
luceneSearch.setQuery(finalQuery); |
|
1698 |
|
|
1699 |
if(highlightFragments){ |
|
1700 |
luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray()); |
|
1701 |
} |
|
1702 |
return luceneSearch; |
|
2009 |
return finalQuery; |
|
1703 | 2010 |
} |
1704 | 2011 |
|
1705 | 2012 |
/** |
... | ... | |
2209 | 2516 |
Reference<?> sourceReference = syn.getSec(); |
2210 | 2517 |
|
2211 | 2518 |
if (sourceReference == null){ |
2212 |
logger.warn("The synonym has no sec reference because it is a misapplied name! Take the sec reference of taxon"); |
|
2213 |
//TODO:Remove |
|
2214 |
System.out.println("The synonym has no sec reference because it is a misapplied name! Take the sec reference of taxon" + taxon.getSec()); |
|
2519 |
logger.warn("The synonym has no sec reference because it is a misapplied name! Take the sec reference of taxon" + taxon.getSec()); |
|
2215 | 2520 |
sourceReference = taxon.getSec(); |
2216 | 2521 |
} |
2217 | 2522 |
|
Also available in: Unified diff
merge trunk into cdmlib v3.3 branch