Project

General

Profile

« Previous | Next » 

Revision d716c44b

Added by Andreas Müller almost 11 years ago

merge trunk into cdmlib v3.3 branch

View differences:

cdmlib-services/src/main/java/eu/etaxonomy/cdm/api/service/TaxonServiceImpl.java
22 22

  
23 23
import org.apache.log4j.Logger;
24 24
import org.apache.lucene.index.CorruptIndexException;
25
import org.apache.lucene.index.IndexReader;
26 25
import org.apache.lucene.queryParser.ParseException;
27 26
import org.apache.lucene.search.BooleanClause.Occur;
27
import org.apache.lucene.search.BooleanFilter;
28 28
import org.apache.lucene.search.BooleanQuery;
29
import org.apache.lucene.search.IndexSearcher;
29
import org.apache.lucene.search.DocIdSet;
30 30
import org.apache.lucene.search.Query;
31
import org.apache.lucene.search.QueryWrapperFilter;
31 32
import org.apache.lucene.search.SortField;
32
import org.apache.lucene.search.join.JoinUtil;
33 33
import org.springframework.beans.factory.annotation.Autowired;
34 34
import org.springframework.stereotype.Service;
35 35
import org.springframework.transaction.annotation.Transactional;
......
43 43
import eu.etaxonomy.cdm.api.service.exception.ReferencedObjectUndeletableException;
44 44
import eu.etaxonomy.cdm.api.service.pager.Pager;
45 45
import eu.etaxonomy.cdm.api.service.pager.impl.DefaultPagerImpl;
46
import eu.etaxonomy.cdm.api.service.search.DocIdBitSetPrinter;
47
import eu.etaxonomy.cdm.api.service.search.ILuceneIndexToolProvider;
46 48
import eu.etaxonomy.cdm.api.service.search.ISearchResultBuilder;
47 49
import eu.etaxonomy.cdm.api.service.search.LuceneMultiSearch;
48 50
import eu.etaxonomy.cdm.api.service.search.LuceneMultiSearchException;
......
70 72
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
71 73
import eu.etaxonomy.cdm.model.description.DescriptionBase;
72 74
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
75
import eu.etaxonomy.cdm.model.description.Distribution;
73 76
import eu.etaxonomy.cdm.model.description.Feature;
74 77
import eu.etaxonomy.cdm.model.description.IIdentificationKey;
75 78
import eu.etaxonomy.cdm.model.description.PolytomousKeyNode;
79
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
76 80
import eu.etaxonomy.cdm.model.description.SpecimenDescription;
77 81
import eu.etaxonomy.cdm.model.description.TaxonDescription;
78 82
import eu.etaxonomy.cdm.model.description.TaxonInteraction;
......
152 156
    @Autowired
153 157
    private AbstractBeanInitializer beanInitializer;
154 158

  
155
    private static IndexSearcher taxonRelationshipSearcher;
159
    @Autowired
160
    private ILuceneIndexToolProvider luceneIndexToolProvider;
161

  
156 162

  
157 163
    /**
158 164
     * Constructor
......
420 426
     * @see eu.etaxonomy.cdm.api.service.ITaxonService#findTaxaByName(java.lang.Class, java.lang.String, java.lang.String, java.lang.String, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, java.lang.Integer, java.lang.Integer)
421 427
     */
422 428
    @Override
423
    public Pager<TaxonBase> findTaxaByName(Class<? extends TaxonBase> clazz, String uninomial,	String infragenericEpithet, String specificEpithet,	String infraspecificEpithet, Rank rank, Integer pageSize,Integer pageNumber) {
429
    public Pager<TaxonBase> findTaxaByName(Class<? extends TaxonBase> clazz,
430
            String uninomial,	String infragenericEpithet, String specificEpithet,
431
            String infraspecificEpithet, Rank rank, Integer pageSize,Integer pageNumber) {
424 432
        Integer numberOfResults = dao.countTaxaByName(clazz, uninomial, infragenericEpithet, specificEpithet, infraspecificEpithet, rank);
425 433

  
426 434
        List<TaxonBase> results = new ArrayList<TaxonBase>();
......
431 439
        return new DefaultPagerImpl<TaxonBase>(pageNumber, numberOfResults, pageSize, results);
432 440
    }
433 441

  
442

  
434 443
    /* (non-Javadoc)
435 444
     * @see eu.etaxonomy.cdm.api.service.ITaxonService#listTaxaByName(java.lang.Class, java.lang.String, java.lang.String, java.lang.String, java.lang.String, eu.etaxonomy.cdm.model.name.Rank, java.lang.Integer, java.lang.Integer)
436 445
     */
......
1383 1392
        return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, totalHits, pageSize, searchResults);
1384 1393
    }
1385 1394

  
1395
    @Override
1396
    public Pager<SearchResult<TaxonBase>> findByDistribution(List<NamedArea> areaFilter, List<PresenceAbsenceTermBase<?>> statusFilter,
1397
            Classification classification,
1398
            Integer pageSize, Integer pageNumber,
1399
            List<OrderHint> orderHints, List<String> propertyPaths) throws IOException, ParseException {
1400

  
1401
        LuceneSearch luceneSearch = prepareByDistributionSearch(areaFilter, statusFilter, classification);
1402

  
1403
        // --- execute search
1404
        TopGroupsWithMaxScore topDocsResultSet = luceneSearch.executeSearch(pageSize, pageNumber);
1405

  
1406
        Map<CdmBaseType, String> idFieldMap = new HashMap<CdmBaseType, String>();
1407
        idFieldMap.put(CdmBaseType.TAXON, "id");
1408

  
1409
        // ---  initialize taxa, thighlight matches ....
1410
        ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(luceneSearch, luceneSearch.getQuery());
1411
        List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet(
1412
                topDocsResultSet, luceneSearch.getHighlightFields(), dao, idFieldMap, propertyPaths);
1413

  
1414
        int totalHits = topDocsResultSet != null ? topDocsResultSet.topGroups.totalGroupCount : 0;
1415
        return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, totalHits, pageSize, searchResults);
1416
    }
1417

  
1386 1418
    /**
1387 1419
     * @param clazz
1388 1420
     * @param queryString
......
1397 1429
        BooleanQuery finalQuery = new BooleanQuery();
1398 1430
        BooleanQuery textQuery = new BooleanQuery();
1399 1431

  
1400
        LuceneSearch luceneSearch = new LuceneSearch(getSession(), GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, TaxonBase.class);
1401
        QueryFactory queryFactory = new QueryFactory(luceneSearch);
1432
        LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, TaxonBase.class);
1433
        QueryFactory taxonBaseQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(TaxonBase.class);
1402 1434

  
1403 1435
        SortField[] sortFields = new  SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", SortField.STRING,  false)};
1404 1436
        luceneSearch.setSortFields(sortFields);
1405 1437

  
1406 1438
        // ---- search criteria
1407
        luceneSearch.setClazz(clazz);
1439
        luceneSearch.setCdmTypRestriction(clazz);
1408 1440

  
1409
        textQuery.add(queryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD);
1410
        textQuery.add(queryFactory.newDefinedTermQuery("name.rank", queryString, languages), Occur.SHOULD);
1441
        textQuery.add(taxonBaseQueryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD);
1442
        textQuery.add(taxonBaseQueryFactory.newDefinedTermQuery("name.rank", queryString, languages), Occur.SHOULD);
1411 1443

  
1412 1444
        finalQuery.add(textQuery, Occur.MUST);
1413 1445

  
1414 1446
        if(classification != null){
1415
            finalQuery.add(queryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);
1447
            finalQuery.add(taxonBaseQueryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);
1416 1448
        }
1417 1449
        luceneSearch.setQuery(finalQuery);
1418 1450

  
1419 1451
        if(highlightFragments){
1420
            luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray());
1452
            luceneSearch.setHighlightFields(taxonBaseQueryFactory.getTextFieldNamesAsArray());
1421 1453
        }
1422 1454
        return luceneSearch;
1423 1455
    }
......
1439 1471
     * @param languages
1440 1472
     * @param highlightFragments
1441 1473
     * @return
1474
     * @throws IOException
1442 1475
     */
1443 1476
    protected LuceneSearch prepareFindByTaxonRelationFullTextSearch(TaxonRelationshipEdge edge, String queryString, Classification classification, List<Language> languages,
1444
            boolean highlightFragments) {
1477
            boolean highlightFragments) throws IOException {
1445 1478

  
1446
        String idField;
1479
        String fromField;
1447 1480
        String queryTermField;
1448 1481
        String toField = "id"; // TaxonBase.uuid
1449 1482

  
......
1451 1484
            throw new RuntimeException("Bidirectional joining not supported!");
1452 1485
        }
1453 1486
        if(edge.isEvers()){
1454
            idField = "relatedFrom.id";
1487
            fromField = "relatedFrom.id";
1455 1488
            queryTermField = "relatedFrom.titleCache";
1456 1489
        } else if(edge.isInvers()) {
1457
            idField = "relatedTo.id";
1490
            fromField = "relatedTo.id";
1458 1491
            queryTermField = "relatedTo.titleCache";
1459 1492
        } else {
1460 1493
            throw new RuntimeException("Invalid direction: " + edge.getDirections());
1461 1494
        }
1462 1495

  
1463 1496
        BooleanQuery finalQuery = new BooleanQuery();
1497

  
1498
        LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, TaxonBase.class);
1499
        QueryFactory taxonBaseQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(TaxonBase.class);
1500

  
1464 1501
        BooleanQuery joinFromQuery = new BooleanQuery();
1465
        Query joinQuery = null;
1466

  
1467
        LuceneSearch luceneSearch = new LuceneSearch(getSession(), TaxonBase.class);
1468
        QueryFactory queryFactory = new QueryFactory(luceneSearch);
1469

  
1470
        joinFromQuery.add(queryFactory.newTermQuery(queryTermField, queryString), Occur.MUST);
1471
        joinFromQuery.add(queryFactory.newEntityIdQuery("type.id", edge.getTaxonRelationshipType()), Occur.MUST);
1472
        try {
1473
            // TODO move into QueryFactory if possible
1474
            if(taxonRelationshipSearcher == null){
1475
                IndexReader taxonRelationshipReader = luceneSearch.getIndexReaderFor(TaxonRelationship.class);
1476
                taxonRelationshipSearcher = new IndexSearcher(taxonRelationshipReader);
1477
                taxonRelationshipSearcher.setDefaultFieldSortScoring(true, true);
1478
            }
1479
            joinQuery = JoinUtil.createJoinQuery(idField, toField, joinFromQuery, taxonRelationshipSearcher);
1480
            // end of possible move
1481
        } catch (IOException e) {
1482
            logger.error(e);
1483
        }
1502
        joinFromQuery.add(taxonBaseQueryFactory.newTermQuery(queryTermField, queryString), Occur.MUST);
1503
        joinFromQuery.add(taxonBaseQueryFactory.newEntityIdQuery("type.id", edge.getTaxonRelationshipType()), Occur.MUST);
1504
        Query joinQuery = taxonBaseQueryFactory.newJoinQuery(fromField, toField, joinFromQuery, TaxonRelationship.class);
1484 1505

  
1485 1506
        SortField[] sortFields = new  SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", SortField.STRING,  false)};
1486 1507
        luceneSearch.setSortFields(sortFields);
......
1488 1509
        finalQuery.add(joinQuery, Occur.MUST);
1489 1510

  
1490 1511
        if(classification != null){
1491
            finalQuery.add(queryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);
1512
            finalQuery.add(taxonBaseQueryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);
1492 1513
        }
1493 1514
        luceneSearch.setQuery(finalQuery);
1494 1515

  
1495 1516
        if(highlightFragments){
1496
            luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray());
1517
            luceneSearch.setHighlightFields(taxonBaseQueryFactory.getTextFieldNamesAsArray());
1497 1518
        }
1498 1519
        return luceneSearch;
1499 1520
    }
......
1507 1528
    @Override
1508 1529
    public Pager<SearchResult<TaxonBase>> findTaxaAndNamesByFullText(
1509 1530
            EnumSet<TaxaAndNamesSearchMode> searchModes, String queryString, Classification classification,
1510
            Set<NamedArea> namedAreas, List<Language> languages, boolean highlightFragments, Integer pageSize,
1531
            Set<NamedArea> namedAreas, Set<PresenceAbsenceTermBase<?>> distributionStatus, List<Language> languages,
1532
            boolean highlightFragments, Integer pageSize,
1511 1533
            Integer pageNumber, List<OrderHint> orderHints, List<String> propertyPaths)
1512 1534
            throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1513 1535

  
1536
        if(highlightFragments){
1537
            logger.warn("findTaxaAndNamesByFullText() : fragment highlighting is " +
1538
                    "currently not fully supported by this method and thus " +
1539
                    "may not work with common names and misapplied names.");
1540
        }
1541

  
1542
        // convert sets to lists
1543
        List<NamedArea> namedAreaList = null;
1544
        List<PresenceAbsenceTermBase<?>>distributionStatusList = null;
1545
        if(namedAreas != null){
1546
            namedAreaList = new ArrayList<NamedArea>(namedAreas.size());
1547
            namedAreaList.addAll(namedAreas);
1548
        }
1549
        if(distributionStatus != null){
1550
            distributionStatusList = new ArrayList<PresenceAbsenceTermBase<?>>(distributionStatus.size());
1551
            distributionStatusList.addAll(distributionStatus);
1552
        }
1553

  
1514 1554
        // set default if parameter is null
1515 1555
        if(searchModes == null){
1516 1556
            searchModes = EnumSet.of(TaxaAndNamesSearchMode.doTaxa);
1517 1557
        }
1518 1558

  
1559
        boolean addDistributionFilter = namedAreas != null && namedAreas.size() > 0;
1560

  
1519 1561
        List<LuceneSearch> luceneSearches = new ArrayList<LuceneSearch>();
1520 1562
        Map<CdmBaseType, String> idFieldMap = new HashMap<CdmBaseType, String>();
1521 1563

  
1522

  
1564
        /*
1565
          ======== filtering by distribution , HOWTO ========
1566

  
1567
           - http://www.javaranch.com/journal/2009/02/filtering-a-lucene-search.html
1568
           - http://stackoverflow.com/questions/17709256/lucene-solr-using-complex-filters -> QueryWrapperFilter
1569
          add Filter to search as http://lucene.apache.org/core/3_6_0/api/all/org/apache/lucene/search/Filter.html
1570
          which will be put into a FilteredQuersy  in the end ?
1571

  
1572

  
1573
          3. how does it work in spatial?
1574
          see
1575
           - http://www.nsshutdown.com/projects/lucene/whitepaper/locallucene_v2.html
1576
           - http://www.infoq.com/articles/LuceneSpatialSupport
1577
           - http://www.mhaller.de/archives/156-Spatial-search-with-Lucene.html
1578
          ------------------------------------------------------------------------
1579

  
1580
          filter strategies:
1581
          A) use a separate distribution filter per index sub-query/search:
1582
           - byTaxonSyonym (query TaxaonBase):
1583
               use a join area filter (Distribution -> TaxonBase)
1584
           - byCommonName (query DescriptionElementBase): use an area filter on
1585
               DescriptionElementBase !!! PROBLEM !!!
1586
               This cannot work since the distributions are different entities than the
1587
               common names and thus these are different lucene documents.
1588
           - byMisaplliedNames (join query TaxonRelationship -> TaxaonBase):
1589
               use a join area filter (Distribution -> TaxonBase)
1590

  
1591
          B) use a common distribution filter for all index sub-query/searches:
1592
           - use a common join area filter (Distribution -> TaxonBase)
1593
           - also implement the byCommonName as join query (CommonName -> TaxonBase)
1594
           PROBLEM in this case: we are losing the fragment highlighting for the
1595
           common names, since the returned documents are always TaxonBases
1596
        */
1597

  
1598
        /* The QueryFactory for creating filter queries on Distributions should
1599
         * The query factory used for the common names query cannot be reused
1600
         * for this case, since we want to only record the text fields which are
1601
         * actually used in the primary query
1602
         */
1603
        QueryFactory distributionFilterQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(Distribution.class);
1604

  
1605
        BooleanFilter multiIndexByAreaFilter = new BooleanFilter();
1606

  
1607

  
1608
        // search for taxa or synonyms
1523 1609
        if(searchModes.contains(TaxaAndNamesSearchMode.doTaxa) || searchModes.contains(TaxaAndNamesSearchMode.doSynonyms)) {
1524 1610
            Class taxonBaseSubclass = TaxonBase.class;
1525 1611
            if(searchModes.contains(TaxaAndNamesSearchMode.doTaxa) && !searchModes.contains(TaxaAndNamesSearchMode.doSynonyms)){
......
1529 1615
            }
1530 1616
            luceneSearches.add(prepareFindByFullTextSearch(taxonBaseSubclass, queryString, classification, languages, highlightFragments));
1531 1617
            idFieldMap.put(CdmBaseType.TAXON, "id");
1618
            /* A) does not work!!!!
1619
            if(addDistributionFilter){
1620
                // in this case we need a filter which uses a join query
1621
                // to get the TaxonBase documents for the DescriptionElementBase documents
1622
                // which are matching the areas in question
1623
                Query taxonAreaJoinQuery = createByDistributionJoinQuery(
1624
                        namedAreaList,
1625
                        distributionStatusList,
1626
                        distributionFilterQueryFactory
1627
                        );
1628
                multiIndexByAreaFilter.add(new QueryWrapperFilter(taxonAreaJoinQuery), Occur.SHOULD);
1629
            }
1630
            */
1631
            if(addDistributionFilter && searchModes.contains(TaxaAndNamesSearchMode.doSynonyms)){
1632
                // add additional area filter for synonyms
1633
                String fromField = "inDescription.taxon.id"; // in DescriptionElementBase index
1634
                String toField = "accTaxon.id"; // id in TaxonBase index
1635

  
1636
                BooleanQuery byDistributionQuery = createByDistributionQuery(namedAreaList, distributionStatusList, distributionFilterQueryFactory);
1637

  
1638
                Query taxonAreaJoinQuery = distributionFilterQueryFactory.newJoinQuery(fromField, toField, byDistributionQuery, Distribution.class);
1639
                multiIndexByAreaFilter.add(new QueryWrapperFilter(taxonAreaJoinQuery), Occur.SHOULD);
1640

  
1641
            }
1532 1642
        }
1643

  
1644
        // search by CommonTaxonName
1533 1645
        if(searchModes.contains(TaxaAndNamesSearchMode.doTaxaByCommonNames)) {
1534
            luceneSearches.add(prepareByDescriptionElementFullTextSearch(CommonTaxonName.class, queryString, classification, null, languages, highlightFragments));
1646
            // B)
1647
            QueryFactory descriptionElementQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(DescriptionElementBase.class);
1648
            Query byCommonNameJoinQuery = descriptionElementQueryFactory.newJoinQuery(
1649
                    "inDescription.taxon.id",
1650
                    "id",
1651
                    createByDescriptionElementFullTextQuery(queryString, classification, null, languages, descriptionElementQueryFactory),
1652
                    CommonTaxonName.class);
1653
            logger.debug("byCommonNameJoinQuery: " + byCommonNameJoinQuery.toString());
1654
            LuceneSearch byCommonNameSearch = new LuceneSearch(luceneIndexToolProvider, GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, Taxon.class);
1655
            byCommonNameSearch.setCdmTypRestriction(Taxon.class);
1656
            byCommonNameSearch.setQuery(byCommonNameJoinQuery);
1657
            idFieldMap.put(CdmBaseType.TAXON, "id");
1658

  
1659
            luceneSearches.add(byCommonNameSearch);
1660

  
1661
            /* A) does not work!!!!
1662
            luceneSearches.add(
1663
                    prepareByDescriptionElementFullTextSearch(CommonTaxonName.class,
1664
                            queryString, classification, null, languages, highlightFragments)
1665
                        );
1535 1666
            idFieldMap.put(CdmBaseType.DESCRIPTION_ELEMENT, "inDescription.taxon.id");
1536
        }
1667
            if(addDistributionFilter){
1668
                // in this case we are able to use DescriptionElementBase documents
1669
                // which are matching the areas in question directly
1670
                BooleanQuery byDistributionQuery = createByDistributionQuery(
1671
                        namedAreaList,
1672
                        distributionStatusList,
1673
                        distributionFilterQueryFactory
1674
                        );
1675
                multiIndexByAreaFilter.add(new QueryWrapperFilter(byDistributionQuery), Occur.SHOULD);
1676
            } */
1677
        }
1678

  
1679
        // search by misapplied names
1537 1680
        if(searchModes.contains(TaxaAndNamesSearchMode.doMisappliedNames)) {
1538 1681
            // NOTE:
1539 1682
            // prepareFindByTaxonRelationFullTextSearch() is making use of JoinUtil.createJoinQuery()
1540 1683
            // which allows doing query time joins
1684
            // finds the misapplied name (Taxon B) which is an misapplication for
1685
            // a related Taxon A.
1686
            //
1541 1687
            luceneSearches.add(prepareFindByTaxonRelationFullTextSearch(
1542 1688
                    new TaxonRelationshipEdge(TaxonRelationshipType.MISAPPLIED_NAME_FOR(), Direction.relatedTo),
1543 1689
                    queryString, classification, languages, highlightFragments));
1544 1690
            idFieldMap.put(CdmBaseType.TAXON, "id");
1691

  
1692
            if(addDistributionFilter){
1693
                String fromField = "inDescription.taxon.id"; // in DescriptionElementBase index
1694

  
1695
                /*
1696
                 * Here i was facing wired and nasty bug which took me bugging be really for hours until I found this solution.
1697
                 * Maybe this is a but in java itself java.
1698
                 *
1699
                 * When the string toField is constructed by using the expression TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString()
1700
                 * directly:
1701
                 *
1702
                 *    String toField = "relation." + TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString() +".to.id";
1703
                 *
1704
                 * The byDistributionQuery fails, however when the uuid is first stored in another string variable the query
1705
                 * will execute as expected:
1706
                 *
1707
                 *    String misappliedNameForUuid = TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString();
1708
                 *    String toField = "relation." + misappliedNameForUuid +".to.id";
1709
                 *
1710
                 * Comparing both strings by the String.equals method returns true, so both String are identical.
1711
                 *
1712
                 * The bug occurs when running eu.etaxonomy.cdm.api.service.TaxonServiceSearchTest in eclipse and in maven and seems to to be
1713
                 * dependent from a specific jvm (openjdk6  6b27-1.12.6-1ubuntu0.13.04.2, openjdk7 7u25-2.3.10-1ubuntu0.13.04.2,  oracle jdk1.7.0_25 tested)
1714
                 * The bug is persistent after a reboot of the development computer.
1715
                 */
1716
//                String misappliedNameForUuid = TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString();
1717
//                String toField = "relation." + misappliedNameForUuid +".to.id";
1718
                String toField = "relation.1ed87175-59dd-437e-959e-0d71583d8417.to.id";
1719
//                System.out.println("relation.1ed87175-59dd-437e-959e-0d71583d8417.to.id".equals("relation." + misappliedNameForUuid +".to.id") ? " > identical" : " > different");
1720
//                System.out.println("relation.1ed87175-59dd-437e-959e-0d71583d8417.to.id".equals("relation." + TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString() +".to.id") ? " > identical" : " > different");
1721

  
1722
                BooleanQuery byDistributionQuery = createByDistributionQuery(namedAreaList, distributionStatusList, distributionFilterQueryFactory);
1723
                Query taxonAreaJoinQuery = distributionFilterQueryFactory.newJoinQuery(fromField, toField, byDistributionQuery, Distribution.class);
1724
                QueryWrapperFilter filter = new QueryWrapperFilter(taxonAreaJoinQuery);
1725

  
1726
//                debug code for bug described above
1727
                DocIdSet filterMatchSet = filter.getDocIdSet(luceneIndexToolProvider.getIndexReaderFor(Taxon.class));
1728
                System.err.println(DocIdBitSetPrinter.docsAsString(filterMatchSet, 100));
1729

  
1730
                multiIndexByAreaFilter.add(filter, Occur.SHOULD);
1731
            }
1545 1732
        }
1546 1733

  
1547
        // TODO implement area filter
1734
        LuceneMultiSearch multiSearch = new LuceneMultiSearch(luceneIndexToolProvider,
1735
                luceneSearches.toArray(new LuceneSearch[luceneSearches.size()]));
1548 1736

  
1549
        LuceneMultiSearch multiSearch = new LuceneMultiSearch(luceneSearches.toArray(new LuceneSearch[luceneSearches.size()]));
1550 1737

  
1738
        if(addDistributionFilter){
1739

  
1740
            // B)
1741
            // in this case we need a filter which uses a join query
1742
            // to get the TaxonBase documents for the DescriptionElementBase documents
1743
            // which are matching the areas in question
1744
            //
1745
            // for toTaxa, doByCommonName
1746
            Query taxonAreaJoinQuery = createByDistributionJoinQuery(
1747
                    namedAreaList,
1748
                    distributionStatusList,
1749
                    distributionFilterQueryFactory
1750
                    );
1751
            multiIndexByAreaFilter.add(new QueryWrapperFilter(taxonAreaJoinQuery), Occur.SHOULD);
1752
        }
1753

  
1754
        if (addDistributionFilter){
1755
            multiSearch.setFilter(multiIndexByAreaFilter);
1756
        }
1551 1757
        // --- execute search
1552 1758
        TopGroupsWithMaxScore topDocsResultSet = multiSearch.executeSearch(pageSize, pageNumber);
1553 1759

  
......
1562 1768
        return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, totalHits, pageSize, searchResults);
1563 1769
    }
1564 1770

  
1771
    /**
1772
     * @param namedAreaList at least one area must be in the list
1773
     * @param distributionStatusList optional
1774
     * @return
1775
     * @throws IOException
1776
     */
1777
    protected Query createByDistributionJoinQuery(
1778
            List<NamedArea> namedAreaList,
1779
            List<PresenceAbsenceTermBase<?>> distributionStatusList,
1780
            QueryFactory queryFactory
1781
            ) throws IOException {
1782

  
1783
        String fromField = "inDescription.taxon.id"; // in DescriptionElementBase index
1784
        String toField = "id"; // id in TaxonBase index
1785

  
1786
        BooleanQuery byDistributionQuery = createByDistributionQuery(namedAreaList, distributionStatusList, queryFactory);
1787

  
1788
        Query taxonAreaJoinQuery = queryFactory.newJoinQuery(fromField, toField, byDistributionQuery, Distribution.class);
1789

  
1790
        return taxonAreaJoinQuery;
1791
    }
1792

  
1793
    /**
1794
     * @param namedAreaList
1795
     * @param distributionStatusList
1796
     * @param queryFactory
1797
     * @return
1798
     */
1799
    private BooleanQuery createByDistributionQuery(List<NamedArea> namedAreaList,
1800
            List<PresenceAbsenceTermBase<?>> distributionStatusList, QueryFactory queryFactory) {
1801
        BooleanQuery areaQuery = new BooleanQuery();
1802
        // area field from Distribution
1803
        areaQuery.add(queryFactory.newEntityIdsQuery("area.id", namedAreaList), Occur.MUST);
1804

  
1805
        // status field from Distribution
1806
        if(distributionStatusList != null && distributionStatusList.size() > 0){
1807
            areaQuery.add(queryFactory.newEntityIdsQuery("status.id", distributionStatusList), Occur.MUST);
1808
        }
1809

  
1810
        logger.debug("createByDistributionQuery() query: " + areaQuery.toString());
1811
        return areaQuery;
1812
    }
1813

  
1814
    /**
1815
     * This method has been primarily created for testing the area join query but might
1816
     * also be useful in other situations
1817
     *
1818
     * @param namedAreaList
1819
     * @param distributionStatusList
1820
     * @param classification
1821
     * @param highlightFragments
1822
     * @return
1823
     * @throws IOException
1824
     */
1825
    protected LuceneSearch prepareByDistributionSearch(
1826
            List<NamedArea> namedAreaList, List<PresenceAbsenceTermBase<?>> distributionStatusList,
1827
            Classification classification) throws IOException {
1828

  
1829
        BooleanQuery finalQuery = new BooleanQuery();
1830

  
1831
        LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, Taxon.class);
1832

  
1833
        // FIXME is this query factory using the wrong type?
1834
        QueryFactory taxonQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(Taxon.class);
1835

  
1836
        SortField[] sortFields = new  SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", SortField.STRING, false)};
1837
        luceneSearch.setSortFields(sortFields);
1838

  
1839

  
1840
        Query byAreaQuery = createByDistributionJoinQuery(namedAreaList, distributionStatusList, taxonQueryFactory);
1841

  
1842
        finalQuery.add(byAreaQuery, Occur.MUST);
1843

  
1844
        if(classification != null){
1845
            finalQuery.add(taxonQueryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);
1846
        }
1847

  
1848
        logger.info("prepareByAreaSearch() query: " + finalQuery.toString());
1849
        luceneSearch.setQuery(finalQuery);
1850

  
1851
        return luceneSearch;
1852
    }
1853

  
1854

  
1855

  
1565 1856
    /* (non-Javadoc)
1566 1857
     * @see eu.etaxonomy.cdm.api.service.ITaxonService#findByDescriptionElementFullText(java.lang.Class, java.lang.String, eu.etaxonomy.cdm.model.taxon.Classification, java.util.List, java.util.List, boolean, java.lang.Integer, java.lang.Integer, java.util.List, java.util.List)
1567 1858
     */
......
1600 1891
        LuceneSearch luceneSearchByDescriptionElement = prepareByDescriptionElementFullTextSearch(null, queryString, classification, null, languages, highlightFragments);
1601 1892
        LuceneSearch luceneSearchByTaxonBase = prepareFindByFullTextSearch(null, queryString, classification, languages, highlightFragments);
1602 1893

  
1603
        LuceneMultiSearch multiSearch = new LuceneMultiSearch(luceneSearchByDescriptionElement, luceneSearchByTaxonBase);
1894
        LuceneMultiSearch multiSearch = new LuceneMultiSearch(luceneIndexToolProvider, luceneSearchByDescriptionElement, luceneSearchByTaxonBase);
1604 1895

  
1605 1896
        // --- execute search
1606 1897
        TopGroupsWithMaxScore topDocsResultSet = multiSearch.executeSearch(pageSize, pageNumber);
......
1631 1922
     * @param directorySelectClass
1632 1923
     * @return
1633 1924
     */
1634
    protected LuceneSearch prepareByDescriptionElementFullTextSearch(Class<? extends CdmBase> clazz, String queryString, Classification classification, List<Feature> features,
1925
    protected LuceneSearch prepareByDescriptionElementFullTextSearch(Class<? extends CdmBase> clazz,
1926
            String queryString, Classification classification, List<Feature> features,
1635 1927
            List<Language> languages, boolean highlightFragments) {
1636
        BooleanQuery finalQuery = new BooleanQuery();
1637
        BooleanQuery textQuery = new BooleanQuery();
1638 1928

  
1639
        LuceneSearch luceneSearch = new LuceneSearch(getSession(), GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, DescriptionElementBase.class);
1640
        QueryFactory queryFactory = new QueryFactory(luceneSearch);
1929
        LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, DescriptionElementBase.class);
1930
        QueryFactory descriptionElementQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(DescriptionElementBase.class);
1641 1931

  
1642 1932
        SortField[] sortFields = new  SortField[]{SortField.FIELD_SCORE, new SortField("inDescription.taxon.titleCache__sort", SortField.STRING, false)};
1933

  
1934
        BooleanQuery finalQuery = createByDescriptionElementFullTextQuery(queryString, classification, features,
1935
                languages, descriptionElementQueryFactory);
1936

  
1643 1937
        luceneSearch.setSortFields(sortFields);
1938
        luceneSearch.setCdmTypRestriction(clazz);
1939
        luceneSearch.setQuery(finalQuery);
1940
        if(highlightFragments){
1941
            luceneSearch.setHighlightFields(descriptionElementQueryFactory.getTextFieldNamesAsArray());
1942
        }
1644 1943

  
1645
        // ---- search criteria
1646
        luceneSearch.setClazz(clazz);
1647
        textQuery.add(queryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD);
1944
        return luceneSearch;
1945
    }
1946

  
1947
    /**
1948
     * @param queryString
1949
     * @param classification
1950
     * @param features
1951
     * @param languages
1952
     * @param descriptionElementQueryFactory
1953
     * @return
1954
     */
1955
    private BooleanQuery createByDescriptionElementFullTextQuery(String queryString, Classification classification,
1956
            List<Feature> features, List<Language> languages, QueryFactory descriptionElementQueryFactory) {
1957
        BooleanQuery finalQuery = new BooleanQuery();
1958
        BooleanQuery textQuery = new BooleanQuery();
1959
        textQuery.add(descriptionElementQueryFactory.newTermQuery("titleCache", queryString), Occur.SHOULD);
1648 1960

  
1649 1961
        // common name
1650 1962
        Query nameQuery;
1651 1963
        if(languages == null || languages.size() == 0){
1652
            nameQuery = queryFactory.newTermQuery("name", queryString);
1964
            nameQuery = descriptionElementQueryFactory.newTermQuery("name", queryString);
1653 1965
        } else {
1654 1966
            nameQuery = new BooleanQuery();
1655 1967
            BooleanQuery languageSubQuery = new BooleanQuery();
1656 1968
            for(Language lang : languages){
1657
                languageSubQuery.add(queryFactory.newTermQuery("language.uuid",  lang.getUuid().toString(), false), Occur.SHOULD);
1969
                languageSubQuery.add(descriptionElementQueryFactory.newTermQuery("language.uuid",  lang.getUuid().toString(), false), Occur.SHOULD);
1658 1970
            }
1659
            ((BooleanQuery) nameQuery).add(queryFactory.newTermQuery("name", queryString), Occur.MUST);
1971
            ((BooleanQuery) nameQuery).add(descriptionElementQueryFactory.newTermQuery("name", queryString), Occur.MUST);
1660 1972
            ((BooleanQuery) nameQuery).add(languageSubQuery, Occur.MUST);
1661 1973
        }
1662 1974
        textQuery.add(nameQuery, Occur.SHOULD);
1663 1975

  
1664 1976

  
1665 1977
        // text field from TextData
1666
        textQuery.add(queryFactory.newMultilanguageTextQuery("text", queryString, languages), Occur.SHOULD);
1978
        textQuery.add(descriptionElementQueryFactory.newMultilanguageTextQuery("text", queryString, languages), Occur.SHOULD);
1667 1979

  
1668 1980
        // --- TermBase fields - by representation ----
1669 1981
        // state field from CategoricalData
1670
        textQuery.add(queryFactory.newDefinedTermQuery("stateData.state", queryString, languages), Occur.SHOULD);
1982
        textQuery.add(descriptionElementQueryFactory.newDefinedTermQuery("stateData.state", queryString, languages), Occur.SHOULD);
1671 1983

  
1672 1984
        // state field from CategoricalData
1673
        textQuery.add(queryFactory.newDefinedTermQuery("stateData.modifyingText", queryString, languages), Occur.SHOULD);
1985
        textQuery.add(descriptionElementQueryFactory.newDefinedTermQuery("stateData.modifyingText", queryString, languages), Occur.SHOULD);
1674 1986

  
1675 1987
        // area field from Distribution
1676
        textQuery.add(queryFactory.newDefinedTermQuery("area", queryString, languages), Occur.SHOULD);
1988
        textQuery.add(descriptionElementQueryFactory.newDefinedTermQuery("area", queryString, languages), Occur.SHOULD);
1677 1989

  
1678 1990
        // status field from Distribution
1679
        textQuery.add(queryFactory.newDefinedTermQuery("status", queryString, languages), Occur.SHOULD);
1991
        textQuery.add(descriptionElementQueryFactory.newDefinedTermQuery("status", queryString, languages), Occur.SHOULD);
1680 1992

  
1681 1993
        finalQuery.add(textQuery, Occur.MUST);
1682 1994
        // --- classification ----
1683 1995

  
1684 1996
        if(classification != null){
1685
            finalQuery.add(queryFactory.newEntityIdQuery("inDescription.taxon.taxonNodes.classification.id", classification), Occur.MUST);
1997
            finalQuery.add(descriptionElementQueryFactory.newEntityIdQuery("inDescription.taxon.taxonNodes.classification.id", classification), Occur.MUST);
1686 1998
        }
1687 1999

  
1688 2000
        // --- IdentifieableEntity fields - by uuid
1689 2001
        if(features != null && features.size() > 0 ){
1690
            finalQuery.add(queryFactory.newEntityUuidQuery("feature.uuid", features), Occur.MUST);
2002
            finalQuery.add(descriptionElementQueryFactory.newEntityUuidsQuery("feature.uuid", features), Occur.MUST);
1691 2003
        }
1692 2004

  
1693 2005
        // the description must be associated with a taxon
1694
        finalQuery.add(queryFactory.newIsNotNullQuery("inDescription.taxon.id"), Occur.MUST);
2006
        finalQuery.add(descriptionElementQueryFactory.newIsNotNullQuery("inDescription.taxon.id"), Occur.MUST);
1695 2007

  
1696 2008
        logger.info("prepareByDescriptionElementFullTextSearch() query: " + finalQuery.toString());
1697
        luceneSearch.setQuery(finalQuery);
1698

  
1699
        if(highlightFragments){
1700
            luceneSearch.setHighlightFields(queryFactory.getTextFieldNamesAsArray());
1701
        }
1702
        return luceneSearch;
2009
        return finalQuery;
1703 2010
    }
1704 2011

  
1705 2012
    /**
......
2209 2516
        Reference<?> sourceReference = syn.getSec();
2210 2517

  
2211 2518
        if (sourceReference == null){
2212
            logger.warn("The synonym has no sec reference because it is a misapplied name! Take the sec reference of taxon");
2213
            //TODO:Remove
2214
            System.out.println("The synonym has no sec reference because it is a misapplied name! Take the sec reference of taxon" + taxon.getSec());
2519
            logger.warn("The synonym has no sec reference because it is a misapplied name! Take the sec reference of taxon" + taxon.getSec());
2215 2520
            sourceReference = taxon.getSec();
2216 2521
        }
2217 2522

  

Also available in: Unified diff