+\r
+ boolean addDistributionFilter = namedAreas != null && namedAreas.size() > 0;\r
+\r
+ List<LuceneSearch> luceneSearches = new ArrayList<LuceneSearch>();\r
+ Map<CdmBaseType, String> idFieldMap = new HashMap<CdmBaseType, String>();\r
+\r
+ /*\r
+ ======== filtering by distribution , HOWTO ========\r
+\r
+ - http://www.javaranch.com/journal/2009/02/filtering-a-lucene-search.html\r
+ - http://stackoverflow.com/questions/17709256/lucene-solr-using-complex-filters -> QueryWrapperFilter\r
+ add Filter to search as http://lucene.apache.org/core/3_6_0/api/all/org/apache/lucene/search/Filter.html\r
+ which will be put into a FilteredQuersy in the end ?\r
+\r
+\r
+ 3. how does it work in spatial?\r
+ see\r
+ - http://www.nsshutdown.com/projects/lucene/whitepaper/locallucene_v2.html\r
+ - http://www.infoq.com/articles/LuceneSpatialSupport\r
+ - http://www.mhaller.de/archives/156-Spatial-search-with-Lucene.html\r
+ ------------------------------------------------------------------------\r
+\r
+ filter strategies:\r
+ A) use a separate distribution filter per index sub-query/search:\r
+ - byTaxonSyonym (query TaxaonBase):\r
+ use a join area filter (Distribution -> TaxonBase)\r
+ - byCommonName (query DescriptionElementBase): use an area filter on\r
+ DescriptionElementBase !!! PROBLEM !!!\r
+ This cannot work since the distributions are different entities than the\r
+ common names and thus these are different lucene documents.\r
+ - byMisaplliedNames (join query TaxonRelationship -> TaxaonBase):\r
+ use a join area filter (Distribution -> TaxonBase)\r
+\r
+ B) use a common distribution filter for all index sub-query/searches:\r
+ - use a common join area filter (Distribution -> TaxonBase)\r
+ - also implement the byCommonName as join query (CommonName -> TaxonBase)\r
+ PROBLEM in this case: we are losing the fragment highlighting for the\r
+ common names, since the returned documents are always TaxonBases\r
+ */\r
+\r
+ /* The QueryFactory for creating filter queries on Distributions should\r
+ * The query factory used for the common names query cannot be reused\r
+ * for this case, since we want to only record the text fields which are\r
+ * actually used in the primary query\r
+ */\r
+ QueryFactory distributionFilterQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(Distribution.class);\r
+\r
+ BooleanFilter multiIndexByAreaFilter = new BooleanFilter();\r
+\r
+\r
+ // search for taxa or synonyms\r
+ if(searchModes.contains(TaxaAndNamesSearchMode.doTaxa) || searchModes.contains(TaxaAndNamesSearchMode.doSynonyms)) {\r
+ Class taxonBaseSubclass = TaxonBase.class;\r
+ if(searchModes.contains(TaxaAndNamesSearchMode.doTaxa) && !searchModes.contains(TaxaAndNamesSearchMode.doSynonyms)){\r
+ taxonBaseSubclass = Taxon.class;\r
+ } else if (!searchModes.contains(TaxaAndNamesSearchMode.doTaxa) && searchModes.contains(TaxaAndNamesSearchMode.doSynonyms)) {\r
+ taxonBaseSubclass = Synonym.class;\r
+ }\r
+ luceneSearches.add(prepareFindByFullTextSearch(taxonBaseSubclass, queryString, classification, languages, highlightFragments));\r
+ idFieldMap.put(CdmBaseType.TAXON, "id");\r
+ /* A) does not work!!!!\r
+ if(addDistributionFilter){\r
+ // in this case we need a filter which uses a join query\r
+ // to get the TaxonBase documents for the DescriptionElementBase documents\r
+ // which are matching the areas in question\r
+ Query taxonAreaJoinQuery = createByDistributionJoinQuery(\r
+ namedAreaList,\r
+ distributionStatusList,\r
+ distributionFilterQueryFactory\r
+ );\r
+ multiIndexByAreaFilter.add(new QueryWrapperFilter(taxonAreaJoinQuery), Occur.SHOULD);\r
+ }\r
+ */\r
+ if(addDistributionFilter && searchModes.contains(TaxaAndNamesSearchMode.doSynonyms)){\r
+ // add additional area filter for synonyms\r
+ String fromField = "inDescription.taxon.id"; // in DescriptionElementBase index\r
+ String toField = "accTaxon.id"; // id in TaxonBase index\r
+\r
+ BooleanQuery byDistributionQuery = createByDistributionQuery(namedAreaList, distributionStatusList, distributionFilterQueryFactory);\r
+\r
+ Query taxonAreaJoinQuery = distributionFilterQueryFactory.newJoinQuery(fromField, toField, byDistributionQuery, Distribution.class);\r
+ multiIndexByAreaFilter.add(new QueryWrapperFilter(taxonAreaJoinQuery), Occur.SHOULD);\r
+\r
+ }\r
+ }\r
+\r
+ // search by CommonTaxonName\r
+ if(searchModes.contains(TaxaAndNamesSearchMode.doTaxaByCommonNames)) {\r
+ // B)\r
+ QueryFactory descriptionElementQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(DescriptionElementBase.class);\r
+ Query byCommonNameJoinQuery = descriptionElementQueryFactory.newJoinQuery(\r
+ "inDescription.taxon.id",\r
+ "id",\r
+ QueryFactory.addTypeRestriction(\r
+ createByDescriptionElementFullTextQuery(queryString, classification, null, languages, descriptionElementQueryFactory)\r
+ , CommonTaxonName.class\r
+ ),\r
+ CommonTaxonName.class);\r
+ logger.debug("byCommonNameJoinQuery: " + byCommonNameJoinQuery.toString());\r
+ LuceneSearch byCommonNameSearch = new LuceneSearch(luceneIndexToolProvider, GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, Taxon.class);\r
+ byCommonNameSearch.setCdmTypRestriction(Taxon.class);\r
+ byCommonNameSearch.setQuery(byCommonNameJoinQuery);\r
+ idFieldMap.put(CdmBaseType.TAXON, "id");\r
+\r
+ luceneSearches.add(byCommonNameSearch);\r
+\r
+ /* A) does not work!!!!\r
+ luceneSearches.add(\r
+ prepareByDescriptionElementFullTextSearch(CommonTaxonName.class,\r
+ queryString, classification, null, languages, highlightFragments)\r
+ );\r
+ idFieldMap.put(CdmBaseType.DESCRIPTION_ELEMENT, "inDescription.taxon.id");\r
+ if(addDistributionFilter){\r
+ // in this case we are able to use DescriptionElementBase documents\r
+ // which are matching the areas in question directly\r
+ BooleanQuery byDistributionQuery = createByDistributionQuery(\r
+ namedAreaList,\r
+ distributionStatusList,\r
+ distributionFilterQueryFactory\r
+ );\r
+ multiIndexByAreaFilter.add(new QueryWrapperFilter(byDistributionQuery), Occur.SHOULD);\r
+ } */\r
+ }\r
+\r
+ // search by misapplied names\r
+ if(searchModes.contains(TaxaAndNamesSearchMode.doMisappliedNames)) {\r
+ // NOTE:\r
+ // prepareFindByTaxonRelationFullTextSearch() is making use of JoinUtil.createJoinQuery()\r
+ // which allows doing query time joins\r
+ // finds the misapplied name (Taxon B) which is an misapplication for\r
+ // a related Taxon A.\r
+ //\r
+ luceneSearches.add(prepareFindByTaxonRelationFullTextSearch(\r
+ new TaxonRelationshipEdge(TaxonRelationshipType.MISAPPLIED_NAME_FOR(), Direction.relatedTo),\r
+ queryString, classification, languages, highlightFragments));\r
+ idFieldMap.put(CdmBaseType.TAXON, "id");\r
+\r
+ if(addDistributionFilter){\r
+ String fromField = "inDescription.taxon.id"; // in DescriptionElementBase index\r
+\r
+ /*\r
+ * Here i was facing wired and nasty bug which took me bugging be really for hours until I found this solution.\r
+ * Maybe this is a but in java itself java.\r
+ *\r
+ * When the string toField is constructed by using the expression TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString()\r
+ * directly:\r
+ *\r
+ * String toField = "relation." + TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString() +".to.id";\r
+ *\r
+ * The byDistributionQuery fails, however when the uuid is first stored in another string variable the query\r
+ * will execute as expected:\r
+ *\r
+ * String misappliedNameForUuid = TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString();\r
+ * String toField = "relation." + misappliedNameForUuid +".to.id";\r
+ *\r
+ * Comparing both strings by the String.equals method returns true, so both String are identical.\r
+ *\r
+ * The bug occurs when running eu.etaxonomy.cdm.api.service.TaxonServiceSearchTest in eclipse and in maven and seems to to be\r
+ * dependent from a specific jvm (openjdk6 6b27-1.12.6-1ubuntu0.13.04.2, openjdk7 7u25-2.3.10-1ubuntu0.13.04.2, oracle jdk1.7.0_25 tested)\r
+ * The bug is persistent after a reboot of the development computer.\r
+ */\r
+// String misappliedNameForUuid = TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString();\r
+// String toField = "relation." + misappliedNameForUuid +".to.id";\r
+ String toField = "relation.1ed87175-59dd-437e-959e-0d71583d8417.to.id";\r
+// System.out.println("relation.1ed87175-59dd-437e-959e-0d71583d8417.to.id".equals("relation." + misappliedNameForUuid +".to.id") ? " > identical" : " > different");\r
+// System.out.println("relation.1ed87175-59dd-437e-959e-0d71583d8417.to.id".equals("relation." + TaxonRelationshipType.MISAPPLIED_NAME_FOR().getUuid().toString() +".to.id") ? " > identical" : " > different");\r
+\r
+ BooleanQuery byDistributionQuery = createByDistributionQuery(namedAreaList, distributionStatusList, distributionFilterQueryFactory);\r
+ Query taxonAreaJoinQuery = distributionFilterQueryFactory.newJoinQuery(fromField, toField, byDistributionQuery, Distribution.class);\r
+ QueryWrapperFilter filter = new QueryWrapperFilter(taxonAreaJoinQuery);\r
+\r
+// debug code for bug described above\r
+ DocIdSet filterMatchSet = filter.getDocIdSet(luceneIndexToolProvider.getIndexReaderFor(Taxon.class));\r
+// System.err.println(DocIdBitSetPrinter.docsAsString(filterMatchSet, 100));\r
+\r
+ multiIndexByAreaFilter.add(filter, Occur.SHOULD);\r
+ }\r
+ }\r
+\r
+ LuceneMultiSearch multiSearch = new LuceneMultiSearch(luceneIndexToolProvider,\r
+ luceneSearches.toArray(new LuceneSearch[luceneSearches.size()]));\r
+\r
+\r
+ if(addDistributionFilter){\r
+\r
+ // B)\r
+ // in this case we need a filter which uses a join query\r
+ // to get the TaxonBase documents for the DescriptionElementBase documents\r
+ // which are matching the areas in question\r
+ //\r
+ // for toTaxa, doByCommonName\r
+ Query taxonAreaJoinQuery = createByDistributionJoinQuery(\r
+ namedAreaList,\r
+ distributionStatusList,\r
+ distributionFilterQueryFactory\r
+ );\r
+ multiIndexByAreaFilter.add(new QueryWrapperFilter(taxonAreaJoinQuery), Occur.SHOULD);\r
+ }\r
+\r
+ if (addDistributionFilter){\r
+ multiSearch.setFilter(multiIndexByAreaFilter);\r
+ }\r
+ // --- execute search\r
+ TopGroupsWithMaxScore topDocsResultSet = multiSearch.executeSearch(pageSize, pageNumber);\r
+\r
+ // --- initialize taxa, highlight matches ....\r
+ ISearchResultBuilder searchResultBuilder = new SearchResultBuilder(multiSearch, multiSearch.getQuery());\r
+\r
+\r
+ List<SearchResult<TaxonBase>> searchResults = searchResultBuilder.createResultSet(\r
+ topDocsResultSet, multiSearch.getHighlightFields(), dao, idFieldMap, propertyPaths);\r
+\r
+ int totalHits = topDocsResultSet != null ? topDocsResultSet.topGroups.totalGroupCount : 0;\r
+ return new DefaultPagerImpl<SearchResult<TaxonBase>>(pageNumber, totalHits, pageSize, searchResults);\r
+ }\r
+\r
+ /**\r
+ * @param namedAreaList at least one area must be in the list\r
+ * @param distributionStatusList optional\r
+ * @return\r
+ * @throws IOException\r
+ */\r
+ protected Query createByDistributionJoinQuery(\r
+ List<NamedArea> namedAreaList,\r
+ List<PresenceAbsenceTermBase<?>> distributionStatusList,\r
+ QueryFactory queryFactory\r
+ ) throws IOException {\r
+\r
+ String fromField = "inDescription.taxon.id"; // in DescriptionElementBase index\r
+ String toField = "id"; // id in TaxonBase index\r
+\r
+ BooleanQuery byDistributionQuery = createByDistributionQuery(namedAreaList, distributionStatusList, queryFactory);\r
+\r
+ Query taxonAreaJoinQuery = queryFactory.newJoinQuery(fromField, toField, byDistributionQuery, Distribution.class);\r
+\r
+ return taxonAreaJoinQuery;\r
+ }\r
+\r
+ /**\r
+ * @param namedAreaList\r
+ * @param distributionStatusList\r
+ * @param queryFactory\r
+ * @return\r
+ */\r
+ private BooleanQuery createByDistributionQuery(List<NamedArea> namedAreaList,\r
+ List<PresenceAbsenceTermBase<?>> distributionStatusList, QueryFactory queryFactory) {\r
+ BooleanQuery areaQuery = new BooleanQuery();\r
+ // area field from Distribution\r
+ areaQuery.add(queryFactory.newEntityIdsQuery("area.id", namedAreaList), Occur.MUST);\r
+\r
+ // status field from Distribution\r
+ if(distributionStatusList != null && distributionStatusList.size() > 0){\r
+ areaQuery.add(queryFactory.newEntityIdsQuery("status.id", distributionStatusList), Occur.MUST);\r
+ }\r
+\r
+ logger.debug("createByDistributionQuery() query: " + areaQuery.toString());\r
+ return areaQuery;\r
+ }\r
+\r
+ /**\r
+ * This method has been primarily created for testing the area join query but might\r
+ * also be useful in other situations\r
+ *\r
+ * @param namedAreaList\r
+ * @param distributionStatusList\r
+ * @param classification\r
+ * @param highlightFragments\r
+ * @return\r
+ * @throws IOException\r
+ */\r
+ protected LuceneSearch prepareByDistributionSearch(\r
+ List<NamedArea> namedAreaList, List<PresenceAbsenceTermBase<?>> distributionStatusList,\r
+ Classification classification) throws IOException {\r
+\r
+ BooleanQuery finalQuery = new BooleanQuery();\r
+\r
+ LuceneSearch luceneSearch = new LuceneSearch(luceneIndexToolProvider, GroupByTaxonClassBridge.GROUPBY_TAXON_FIELD, Taxon.class);\r
+\r
+ // FIXME is this query factory using the wrong type?\r
+ QueryFactory taxonQueryFactory = luceneIndexToolProvider.newQueryFactoryFor(Taxon.class);\r
+\r
+ SortField[] sortFields = new SortField[]{SortField.FIELD_SCORE, new SortField("titleCache__sort", SortField.STRING, false)};\r
+ luceneSearch.setSortFields(sortFields);\r
+\r
+\r
+ Query byAreaQuery = createByDistributionJoinQuery(namedAreaList, distributionStatusList, taxonQueryFactory);\r
+\r
+ finalQuery.add(byAreaQuery, Occur.MUST);\r
+\r
+ if(classification != null){\r
+ finalQuery.add(taxonQueryFactory.newEntityIdQuery("taxonNodes.classification.id", classification), Occur.MUST);\r
+ }\r
+\r
+ logger.info("prepareByAreaSearch() query: " + finalQuery.toString());\r
+ luceneSearch.setQuery(finalQuery);\r
+\r