fix #6354 non phrase search with wildcard using the standart QueryParser
[cdmlib.git] / cdmlib-services / src / test / java / eu / etaxonomy / cdm / api / service / TaxonServiceSearchTest.java
1 /**
2 * Copyright (C) 2009 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.api.service;
11
12 import static org.junit.Assert.assertEquals;
13 import static org.junit.Assert.assertNotNull;
14
15 import java.io.FileNotFoundException;
16 import java.io.IOException;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.EnumSet;
20 import java.util.HashSet;
21 import java.util.List;
22 import java.util.Map;
23 import java.util.Set;
24 import java.util.UUID;
25
26 import org.apache.commons.lang.RandomStringUtils;
27 import org.apache.log4j.Level;
28 import org.apache.log4j.Logger;
29 import org.apache.lucene.document.Document;
30 import org.apache.lucene.index.CorruptIndexException;
31 import org.apache.lucene.queryparser.classic.ParseException;
32 import org.junit.Assert;
33 import org.junit.Before;
34 import org.junit.Ignore;
35 import org.junit.Test;
36 import org.unitils.dbunit.annotation.DataSet;
37 import org.unitils.spring.annotation.SpringBeanByType;
38
39 import eu.etaxonomy.cdm.api.service.config.FindTaxaAndNamesConfiguratorImpl;
40 import eu.etaxonomy.cdm.api.service.config.IFindTaxaAndNamesConfigurator;
41 import eu.etaxonomy.cdm.api.service.pager.Pager;
42 import eu.etaxonomy.cdm.api.service.search.ICdmMassIndexer;
43 import eu.etaxonomy.cdm.api.service.search.LuceneMultiSearchException;
44 import eu.etaxonomy.cdm.api.service.search.SearchResult;
45 import eu.etaxonomy.cdm.common.UTF8;
46 import eu.etaxonomy.cdm.common.monitor.DefaultProgressMonitor;
47 import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
48 import eu.etaxonomy.cdm.model.common.CdmBase;
49 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
50 import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
51 import eu.etaxonomy.cdm.model.common.Language;
52 import eu.etaxonomy.cdm.model.description.CategoricalData;
53 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
54 import eu.etaxonomy.cdm.model.description.DescriptionBase;
55 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
56 import eu.etaxonomy.cdm.model.description.Distribution;
57 import eu.etaxonomy.cdm.model.description.Feature;
58 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
59 import eu.etaxonomy.cdm.model.description.State;
60 import eu.etaxonomy.cdm.model.description.StateData;
61 import eu.etaxonomy.cdm.model.description.TaxonDescription;
62 import eu.etaxonomy.cdm.model.description.TextData;
63 import eu.etaxonomy.cdm.model.location.Country;
64 import eu.etaxonomy.cdm.model.location.NamedArea;
65 import eu.etaxonomy.cdm.model.name.BotanicalName;
66 import eu.etaxonomy.cdm.model.name.NonViralName;
67 import eu.etaxonomy.cdm.model.name.Rank;
68 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
69 import eu.etaxonomy.cdm.model.reference.Reference;
70 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
71 import eu.etaxonomy.cdm.model.taxon.Classification;
72 import eu.etaxonomy.cdm.model.taxon.Synonym;
73 import eu.etaxonomy.cdm.model.taxon.SynonymType;
74 import eu.etaxonomy.cdm.model.taxon.Taxon;
75 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
76 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
77 import eu.etaxonomy.cdm.model.taxon.TaxonRelationship;
78 import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
79 import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
80 import eu.etaxonomy.cdm.persistence.query.MatchMode;
81 import eu.etaxonomy.cdm.persistence.query.OrderHint;
82 import eu.etaxonomy.cdm.test.integration.CdmTransactionalIntegrationTest;
83 import eu.etaxonomy.cdm.test.unitils.CleanSweepInsertLoadStrategy;
84
85 /**
86 * @author a.babadshanjan, a.kohlbecker
87 * @created 04.02.2009
88 */
89 public class TaxonServiceSearchTest extends CdmTransactionalIntegrationTest {
90
91 private static final String ABIES_BALSAMEA_UUID = "f65d47bd-4f49-4ab1-bc4a-bc4551eaa1a8";
92
93 private static final String ABIES_ALBA_UUID = "7dbd5810-a3e5-44b6-b563-25152b8867f4";
94
95 private static final String CLASSIFICATION_UUID = "2a5ceebb-4830-4524-b330-78461bf8cb6b";
96
97 private static final String CLASSIFICATION_ALT_UUID = "d7c741e3-ae9e-4a7d-a566-9e3a7a0b51ce";
98
99 private static final String D_ABIES_BALSAMEA_UUID = "900108d8-e6ce-495e-b32e-7aad3099135e";
100
101 private static final String D_ABIES_ALBA_UUID = "ec8bba03-d993-4c85-8472-18b14942464b";
102
103 private static final String D_ABIES_KAWAKAMII_SEC_KOMAROV_UUID = "e9d8c2fd-6409-46d5-9c2e-14a2bbb1b2b1";
104 private static final int NUM_OF_NEW_RADOM_ENTITIES = 1000;
105
106 private static Logger logger = Logger.getLogger(TaxonServiceSearchTest.class);
107
108
109
110 @SpringBeanByType
111 private ITaxonService taxonService;
112 @SpringBeanByType
113 private ITermService termService;
114 @SpringBeanByType
115 private IClassificationService classificationService;
116 @SpringBeanByType
117 private IReferenceService referenceService;
118 @SpringBeanByType
119 private IDescriptionService descriptionService;
120 @SpringBeanByType
121 private INameService nameService;
122 @SpringBeanByType
123 private ICdmMassIndexer indexer;
124
125 @SpringBeanByType
126 private ITaxonNodeService nodeService;
127
128 private static final int BENCHMARK_ROUNDS = 300;
129
130 private Set<Class<? extends CdmBase>> typesToIndex = null;
131
132 private NamedArea germany;
133 private NamedArea france ;
134 private NamedArea russia ;
135 private NamedArea canada ;
136
137 /**
138 * @throws java.lang.Exception
139 */
140 @Before
141 public void setUp() throws Exception {
142 typesToIndex = new HashSet<Class<? extends CdmBase>>();
143 typesToIndex.add(DescriptionElementBase.class);
144 typesToIndex.add(TaxonBase.class);
145 typesToIndex.add(TaxonRelationship.class);
146
147 germany = Country.GERMANY();
148 france = Country.FRANCEFRENCHREPUBLIC();
149 russia = Country.RUSSIANFEDERATION();
150 canada = Country.CANADA();
151
152
153 }
154
155 @Test
156 public void testDbUnitUsageTest() throws Exception {
157 assertNotNull("taxonService should exist", taxonService);
158 assertNotNull("nameService should exist", nameService);
159 }
160
161 /**
162 * Test method for
163 * {@link eu.etaxonomy.cdm.api.service.TaxonServiceImpl#findTaxaAndNames(eu.etaxonomy.cdm.api.service.config.IFindTaxaAndNamesConfigurator)}
164 * .
165 */
166 @Test
167 @DataSet
168 public final void testFindTaxaAndNames() {
169
170 // pass 1
171 IFindTaxaAndNamesConfigurator<?> configurator = new FindTaxaAndNamesConfiguratorImpl();
172 configurator.setTitleSearchString("Abies*");
173 configurator.setMatchMode(MatchMode.BEGINNING);
174 configurator.setDoTaxa(true);
175 configurator.setDoSynonyms(true);
176 configurator.setDoNamesWithoutTaxa(true);
177 configurator.setDoTaxaByCommonNames(true);
178
179 Pager<IdentifiableEntity> pager = taxonService.findTaxaAndNames(configurator);
180 List<IdentifiableEntity> list = pager.getRecords();
181
182 if (logger.isDebugEnabled()) {
183 for (int i = 0; i < list.size(); i++) {
184 String nameCache = "";
185 if (list.get(i) instanceof NonViralName) {
186 nameCache = ((NonViralName<?>) list.get(i)).getNameCache();
187 } else if (list.get(i) instanceof TaxonBase) {
188 TaxonNameBase<?,?> taxonNameBase = ((TaxonBase) list.get(i)).getName();
189 nameCache = HibernateProxyHelper.deproxy(taxonNameBase, NonViralName.class).getNameCache();
190 } else {
191 }
192 logger.debug(list.get(i).getClass() + "(" + i + ")" + ": Name Cache = " + nameCache + ", Title Cache = "
193 + list.get(i).getTitleCache());
194 }
195 }
196
197 logger.debug("number of taxa: " + list.size());
198 assertEquals(10, list.size());
199 configurator.setTitleSearchString("Balsam-Tanne");
200 pager = taxonService.findTaxaAndNames(configurator);
201 list = pager.getRecords();
202 assertEquals(1, list.size());
203 // pass 2
204 configurator.setDoTaxaByCommonNames(false);
205 configurator.setDoMisappliedNames(true);
206 configurator.setClassification(classificationService.load(UUID.fromString(CLASSIFICATION_UUID)));
207 pager = taxonService.findTaxaAndNames(configurator);
208 list = pager.getRecords();
209 assertEquals(0, list.size());
210
211 }
212
213 /**
214 * Test method for
215 * {@link eu.etaxonomy.cdm.api.service.TaxonServiceImpl#findTaxaAndNames(eu.etaxonomy.cdm.api.service.config.IFindTaxaAndNamesConfigurator)}
216 * .
217 */
218 @Test
219 @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class)
220 public final void testFindTaxaAndNamesWithHybridFormula() {
221
222 // pass 1
223 IFindTaxaAndNamesConfigurator<?> configurator = new FindTaxaAndNamesConfiguratorImpl();
224 configurator.setTitleSearchString("Achillea*");
225 configurator.setMatchMode(MatchMode.BEGINNING);
226 configurator.setDoTaxa(true);
227 configurator.setDoSynonyms(true);
228 configurator.setDoNamesWithoutTaxa(true);
229 configurator.setDoTaxaByCommonNames(true);
230
231 Pager<IdentifiableEntity> pager = taxonService.findTaxaAndNames(configurator);
232 // Assert.assertEquals("Expecting one taxon",1,pager.getRecords().size());
233 List<IdentifiableEntity> list = pager.getRecords();
234 }
235
236 /**
237 * Test method for
238 * {@link eu.etaxonomy.cdm.api.service.TaxonServiceImpl#searchTaxaByName(java.lang.String, eu.etaxonomy.cdm.model.reference.Reference)}
239 * .
240 */
241 @Test
242 @DataSet
243 public final void testSearchTaxaByName() {
244 IFindTaxaAndNamesConfigurator<?> configurator = new FindTaxaAndNamesConfiguratorImpl();
245 configurator.setTitleSearchString("Abies bor*");
246 configurator.setMatchMode(MatchMode.BEGINNING);
247 configurator.setDoTaxa(true);
248 configurator.setDoSynonyms(false);
249 configurator.setDoNamesWithoutTaxa(true);
250 configurator.setDoTaxaByCommonNames(false);
251
252 List<UuidAndTitleCache<IdentifiableEntity>> list = taxonService.findTaxaAndNamesForEditor(configurator);
253
254 Assert.assertEquals("Expecting one entity", 1, list.size());
255
256 configurator.setTitleSearchString("silver fir");
257 configurator.setMatchMode(MatchMode.BEGINNING);
258 configurator.setDoTaxa(false);
259 configurator.setDoSynonyms(false);
260 configurator.setDoNamesWithoutTaxa(true);
261 configurator.setDoTaxaByCommonNames(true);
262
263 list = taxonService.findTaxaAndNamesForEditor(configurator);
264
265 Assert.assertEquals("Expecting one entity", 1, list.size());
266
267 }
268
269 @SuppressWarnings("rawtypes")
270 @Test
271 @DataSet
272 public final void testPurgeAndReindex() throws CorruptIndexException, IOException, ParseException {
273
274 refreshLuceneIndex();
275
276 Pager<SearchResult<TaxonBase>> pager;
277
278 pager = taxonService.findByFullText(null, "Abies", null, null, true, null, null, null, null); // --> 8
279 Assert.assertEquals("Expecting 8 entities", 8, pager.getCount().intValue());
280
281 indexer.purge(null);
282 commitAndStartNewTransaction(null);
283
284 pager = taxonService.findByFullText(null, "Abies", null, null, true, null, null, null, null); // --> 0
285 Assert.assertEquals("Expecting no entities since the index has been purged", 0, pager.getCount().intValue());
286
287 indexer.reindex(indexer.indexedClasses(), null);
288 commitAndStartNewTransaction(null);
289
290 pager = taxonService.findByFullText(null, "Abies", null, null, true, null, null, null, null); // --> 8
291 Assert.assertEquals("Expecting 8 entities", 8, pager.getCount().intValue());
292 }
293
294
295 @SuppressWarnings("rawtypes")
296 @Test
297 @DataSet
298 public final void testFindByDescriptionElementFullText_CommonName() throws CorruptIndexException, IOException,
299 ParseException {
300
301 refreshLuceneIndex();
302
303 Pager<SearchResult<TaxonBase>> pager;
304
305 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Wei"+UTF8.SHARP_S+"tanne", null, null, null,
306 false, null, null, null, null);
307 Assert.assertEquals("Expecting one entity when searching for CommonTaxonName", 1,
308 pager.getCount().intValue());
309
310 // the description containing the Nulltanne has no taxon attached,
311 // taxon.id = null
312 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Nulltanne", null, null, null,
313 false, null, null, null, null);
314 Assert.assertEquals("Expecting no entity when searching for 'Nulltanne' ", 0, pager.getCount().intValue());
315
316 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Wei"+UTF8.SHARP_S+"tanne", null, null,
317 Arrays.asList(new Language[] { Language.GERMAN() }), false, null, null, null, null);
318 Assert.assertEquals("Expecting one entity when searching in German", 1, pager.getCount().intValue());
319
320 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Wei"+UTF8.SHARP_S+"tanne", null, null,
321 Arrays.asList(new Language[] { Language.RUSSIAN() }), false, null, null, null, null);
322 Assert.assertEquals("Expecting no entity when searching in Russian", 0, pager.getCount().intValue());
323
324 }
325
326 @SuppressWarnings("rawtypes")
327 @Test
328 @DataSet
329 public final void testFindByDescriptionElementFullText_Distribution() throws CorruptIndexException, IOException, ParseException {
330
331 refreshLuceneIndex();
332
333 Pager<SearchResult<TaxonBase>> pager;
334 // by Area
335 pager = taxonService.findByDescriptionElementFullText(null, "Canada", null, null, null, false, null, null, null, null);
336 Assert.assertEquals("Expecting one entity when searching for arae 'Canada'", 1, pager.getCount().intValue());
337 // by Status
338 pager = taxonService.findByDescriptionElementFullText(null, "present", null, null, null, false, null, null, null, null);
339 Assert.assertEquals("Expecting one entity when searching for status 'present'", 1, pager.getCount().intValue());
340 }
341
342 @SuppressWarnings("rawtypes")
343 @Test
344 @DataSet
345 public final void testFindByDescriptionElementFullText_wildcard() throws CorruptIndexException, IOException, ParseException {
346
347 refreshLuceneIndex();
348
349 Pager<SearchResult<TaxonBase>> pager;
350
351 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Wei"+UTF8.SHARP_S+"*", null, null, null, false, null, null, null, null);
352 Assert.assertEquals("Expecting one entity when searching for CommonTaxonName", 1, pager.getCount().intValue());
353 }
354
355 /**
356 * Regression test for #3113 (hibernate search: wildcard query can cause BooleanQuery$TooManyClauses: maxClauseCount is set to 1024)
357 *
358 * @throws CorruptIndexException
359 * @throws IOException
360 * @throws ParseException
361 */
362 @SuppressWarnings("rawtypes")
363 @Test
364 @DataSet
365 public final void testFindByDescriptionElementFullText_TooManyClauses() throws CorruptIndexException, IOException, ParseException {
366
367 // generate 1024 terms to reproduce the bug
368 TaxonDescription description = (TaxonDescription) descriptionService.find(UUID.fromString(D_ABIES_ALBA_UUID));
369 Set<String> uniqueRandomStrs = new HashSet<String>(1024);
370 while(uniqueRandomStrs.size() < 1024){
371 uniqueRandomStrs.add(RandomStringUtils.random(10, true, false));
372 }
373 for(String rndStr: uniqueRandomStrs){
374 description.addElement(CommonTaxonName.NewInstance("Rot" + rndStr, Language.DEFAULT()));
375 }
376 descriptionService.saveOrUpdate(description);
377 commitAndStartNewTransaction(null);
378
379 refreshLuceneIndex();
380
381 Pager<SearchResult<TaxonBase>> pager;
382
383 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, false, null, null, null, null);
384 Assert.assertEquals("Expecting all 1024 entities grouped into one SearchResult item when searching for Rot*", 1, pager.getCount().intValue());
385 }
386
387 /**
388 * Regression test for #3116 (fulltext search: always only one page of results)
389 *
390 * @throws CorruptIndexException
391 * @throws IOException
392 * @throws ParseException
393 */
394 @SuppressWarnings("rawtypes")
395 @Test
396 @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class)
397 @Ignore
398 public final void testFullText_Paging() throws CorruptIndexException, IOException, ParseException {
399
400 Reference sec = ReferenceFactory.newDatabase();
401 referenceService.save(sec);
402
403 Set<String> uniqueRandomStrs = new HashSet<String>(1024);
404 int numOfItems = 100;
405 while(uniqueRandomStrs.size() < numOfItems){
406 uniqueRandomStrs.add(RandomStringUtils.random(5, true, false));
407 }
408
409 for(String rndStr: uniqueRandomStrs){
410
411 Taxon taxon = Taxon.NewInstance(BotanicalName.NewInstance(Rank.SERIES()), sec);
412 taxon.setTitleCache("Tax" + rndStr, true);
413 taxonService.save(taxon);
414
415 TaxonDescription description = TaxonDescription.NewInstance(taxon);
416 description.addElement(CommonTaxonName.NewInstance("Rot" + rndStr, Language.DEFAULT()));
417 descriptionService.saveOrUpdate(description);
418 }
419
420 commitAndStartNewTransaction(new String[]{"TAXONBASE"});
421 refreshLuceneIndex();
422
423 int pageSize = 10;
424
425 Pager<SearchResult<TaxonBase>> pager;
426
427 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, false, pageSize, null, null, null);
428 Assert.assertEquals("unexpeted number of pages", Integer.valueOf(numOfItems / pageSize), pager.getPagesAvailable());
429 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, false, pageSize, 9, null, null);
430 Assert.assertNotNull("last page must have records", pager.getRecords());
431 Assert.assertNotNull("last item on last page must exist", pager.getRecords().get(0));
432 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, false, pageSize, 10, null, null);
433 Assert.assertNotNull("last page + 1 must not have any records", pager.getRecords());
434 }
435
436 /**
437 * test for max score and sort by score of hit groups
438 * with all matches per taxon in a single TextData element
439 * see {@link #testFullText_ScoreAndOrder_2()} for the complement
440 * test with matches in multiple TextData per taxon
441 *
442 * @throws CorruptIndexException
443 * @throws IOException
444 * @throws ParseException
445 */
446 @SuppressWarnings("rawtypes")
447 @Test
448 @DataSet
449 @Ignore // test fails, maybe the assumptions made here are not compatible with the lucene scoring mechanism see http://lucene.apache.org/core/3_6_1/scoring.html
450 public final void testFullText_ScoreAndOrder_1() throws CorruptIndexException, IOException, ParseException {
451
452 int numOfTaxa = 3;
453
454 UUID[] taxonUuids = new UUID[numOfTaxa];
455 StringBuilder text = new StringBuilder();
456
457 for(int i = 0; i < numOfTaxa; i++){
458
459 Taxon taxon = Taxon.NewInstance(BotanicalName.NewInstance(null), null);
460 taxon.setTitleCache("Taxon_" + i, true);
461 taxonUuids[i] = taxon.getUuid();
462 taxonService.save(taxon);
463
464 text.append(" ").append("Rot");
465 TaxonDescription description = TaxonDescription.NewInstance(taxon);
466 description.addElement(TextData.NewInstance(text.toString(), Language.DEFAULT(), null));
467 descriptionService.saveOrUpdate(description);
468 }
469
470 commitAndStartNewTransaction(null);
471 refreshLuceneIndex();
472
473 Pager<SearchResult<TaxonBase>> pager;
474
475 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Rot", null, null, null, false, null, null, null, null);
476 for(int i = 0; i < numOfTaxa; i++){
477 Assert.assertEquals("taxa should be orderd by relevance (= score)", taxonUuids[numOfTaxa - i - 1], pager.getRecords().get(i).getEntity().getUuid());
478 }
479 Assert.assertEquals("max score should be equal to the score of the first element", pager.getRecords().get(0).getMaxScore(), pager.getRecords().get(0).getScore(), 0);
480 }
481
482 /**
483 * test for max score and sort by score of hit groups
484 * with all matches per taxon in a multiple TextData elements
485 * see {@link #testFullText_ScoreAndOrder_1()} for the complement
486 * test with matches in a single TextData per taxon
487 *
488 * @throws CorruptIndexException
489 * @throws IOException
490 * @throws ParseException
491 */
492 @SuppressWarnings("rawtypes")
493 @Test
494 @DataSet
495 @Ignore // test fails, maybe the assumptions made here are not compatible with the lucene scoring mechanism see http://lucene.apache.org/core/3_6_1/scoring.html
496 public final void testFullText_ScoreAndOrder_2() throws CorruptIndexException, IOException, ParseException {
497
498 int numOfTaxa = 3;
499
500 UUID[] taxonUuids = new UUID[numOfTaxa];
501
502 for(int i = 0; i < numOfTaxa; i++){
503
504 Taxon taxon = Taxon.NewInstance(BotanicalName.NewInstance(null), null);
505 taxon.setTitleCache("Taxon_" + i, true);
506 taxonUuids[i] = taxon.getUuid();
507 taxonService.save(taxon);
508
509 TaxonDescription description = TaxonDescription.NewInstance(taxon);
510 for(int k = 0; k < i; k++){
511 description.addElement(TextData.NewInstance("Rot", Language.DEFAULT(), null));
512 }
513 descriptionService.saveOrUpdate(description);
514 }
515
516 commitAndStartNewTransaction(null);
517 refreshLuceneIndex();
518
519 Pager<SearchResult<TaxonBase>> pager;
520
521 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Rot", null, null, null, false, null, null, null, null);
522 for(int i = 0; i < numOfTaxa; i++){
523 Assert.assertEquals("taxa should be orderd by relevance (= score)", taxonUuids[numOfTaxa - i - 1], pager.getRecords().get(i).getEntity().getUuid());
524 }
525 Assert.assertEquals("max score should be equal to the score of the first element", pager.getRecords().get(0).getMaxScore(), pager.getRecords().get(0).getScore(), 0);
526 }
527
528
529 /**
530 * @throws CorruptIndexException
531 * @throws IOException
532 * @throws ParseException
533 * @throws LuceneMultiSearchException
534 */
535 @Test
536 @DataSet
537 public final void testFullText_Grouping() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
538
539 TaxonDescription description = (TaxonDescription) descriptionService.find(UUID.fromString(D_ABIES_ALBA_UUID));
540 Set<String> uniqueRandomStrs = new HashSet<String>(1024);
541 int numOfItems = 100;
542 while(uniqueRandomStrs.size() < numOfItems){
543 uniqueRandomStrs.add(RandomStringUtils.random(5, true, false));
544 }
545 for(String rndStr: uniqueRandomStrs){
546 description.addElement(CommonTaxonName.NewInstance("Rot" + rndStr, Language.DEFAULT()));
547 }
548 descriptionService.saveOrUpdate(description);
549
550 commitAndStartNewTransaction(new String[]{"DESCRIPTIONELEMENTBASE"});
551
552 refreshLuceneIndex();
553
554 int pageSize = 10;
555
556 Pager<SearchResult<TaxonBase>> pager;
557 boolean highlightFragments = true;
558
559 // test with findByDescriptionElementFullText
560 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, highlightFragments, pageSize, null, null, null);
561 logSearchResults(pager, Level.DEBUG, null);
562 Assert.assertEquals("All matches should be grouped into a single SearchResult element", 1, pager.getRecords().size());
563 Assert.assertEquals("The count property of the pager must be set correctly", 1, pager.getCount().intValue());
564 Map<String, String[]> highlightMap = pager.getRecords().get(0).getFieldHighlightMap();
565 // maxDocsPerGroup is defined in LuceneSearch and defaults to 10
566 int maxDocsPerGroup = 10;
567 Assert.assertEquals("expecting 10 highlighted fragments of field 'name'", maxDocsPerGroup, highlightMap.get("name").length);
568
569 // test with findByEverythingFullText
570 pager = taxonService.findByEverythingFullText( "Rot*", null, null, highlightFragments, pageSize, null, null, null);
571 logSearchResults(pager, Level.DEBUG, null);
572 Assert.assertEquals("All matches should be grouped into a single SearchResult element", 1, pager.getRecords().size());
573 Assert.assertEquals("The count property of the pager must be set correctly", 1, pager.getCount().intValue());
574 highlightMap = pager.getRecords().get(0).getFieldHighlightMap();
575 // maxDocsPerGroup is defined in LuceneSearch and defaults to 10
576 maxDocsPerGroup = 10;
577 Assert.assertEquals("expecting 10 highlighted fragments of field 'name'", maxDocsPerGroup, highlightMap.get("name").length);
578
579 }
580
581 @SuppressWarnings("rawtypes")
582 @Test
583 @DataSet
584 @Ignore
585 public final void testFindByDescriptionElementFullText_TextData() throws CorruptIndexException, IOException, ParseException {
586
587 refreshLuceneIndex();
588
589 Pager<SearchResult<TaxonBase>> pager;
590 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Abies", null, null, null, false, null, null, null, null);
591 logSearchResults(pager, Level.DEBUG, null);
592 Assert.assertEquals("Expecting one entity when searching for any TextData", 1, pager.getCount().intValue());
593 Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getEntity().getTitleCache());
594 Assert.assertTrue("Expecting two docs, one for RUSSIAN and one for GERMAN", pager.getRecords().get(0).getDocs().size() == 2);
595 Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getDocs().iterator().next().get("inDescription.taxon.titleCache"));
596
597
598 pager = taxonService.findByDescriptionElementFullText(null, "Abies", null, null, null, false, null, null, null, null);
599 Assert.assertEquals("Expecting one entity when searching for any type", 1, pager.getCount().intValue());
600
601 pager = taxonService.findByDescriptionElementFullText(null, "Abies", null, Arrays.asList(new Feature[]{Feature.UNKNOWN()}), null, false, null, null, null, null);
602 Assert.assertEquals("Expecting one entity when searching for any type and for Feature DESCRIPTION", 1, pager.getCount().intValue());
603
604 pager = taxonService.findByDescriptionElementFullText(null, "Abies", null, Arrays.asList(new Feature[]{Feature.CHROMOSOME_NUMBER()}), null, false, null, null, null, null);
605 Assert.assertEquals("Expecting no entity when searching for any type and for Feature CHROMOSOME_NUMBER", 0, pager.getCount().intValue());
606
607 pager = taxonService.findByDescriptionElementFullText(null, "Abies", null, Arrays.asList(new Feature[]{Feature.CHROMOSOME_NUMBER(), Feature.UNKNOWN()}), null, false, null, null, null, null);
608 Assert.assertEquals("Expecting no entity when searching for any type and for Feature DESCRIPTION or CHROMOSOME_NUMBER", 1, pager.getCount().intValue());
609
610 pager = taxonService.findByDescriptionElementFullText(Distribution.class, "Abies", null, null, null, false, null, null, null, null);
611 Assert.assertEquals("Expecting no entity when searching for Distribution", 0, pager.getCount().intValue());
612
613 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Бальзам", null, null, Arrays.asList(new Language[]{}), false, null, null, null, null);
614 Assert.assertEquals("Expecting one entity", 1, pager.getCount().intValue());
615 Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getEntity().getTitleCache());
616
617 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Бальзам", null, null, Arrays.asList(new Language[]{Language.RUSSIAN()}), false, null, null, null, null);
618 Assert.assertEquals("Expecting one entity", 1, pager.getCount().intValue());
619 Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getEntity().getTitleCache());
620
621 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Бальзам", null, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
622 Assert.assertEquals("Expecting no entity", 0, pager.getCount().intValue());
623
624 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN(), Language.RUSSIAN()}), false, null, null, null, null);
625 Assert.assertEquals("Expecting one entity", 1, pager.getCount().intValue());
626 Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getEntity().getTitleCache());
627 }
628
629 @SuppressWarnings("rawtypes")
630 @Test
631 @DataSet
632 public final void testFindByDescriptionElementFullText_MultipleWords() throws CorruptIndexException, IOException, ParseException {
633
634 refreshLuceneIndex();
635
636 // Pflanzenart aus der Gattung der Tannen
637 long start = System.currentTimeMillis();
638
639 Pager<SearchResult<TaxonBase>> pager;
640 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Pflanzenart Tannen", null, null, null, false, null, null, null, null);
641 Assert.assertEquals("OR search : Expecting one entity", 1, pager.getCount().intValue());
642
643 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Pflanzenart Wespen", null, null, null, false, null, null, null, null);
644 Assert.assertEquals("OR search : Expecting one entity", 1, pager.getCount().intValue());
645
646 pager = taxonService.findByDescriptionElementFullText(TextData.class, "+Pflanzenart +Tannen", null, null, null, false, null, null, null, null);
647 Assert.assertEquals("AND search : Expecting one entity", 1, pager.getCount().intValue());
648
649 pager = taxonService.findByDescriptionElementFullText(TextData.class, "+Pflanzenart +Wespen", null, null, null, false, null, null, null, null);
650 Assert.assertEquals("AND search : Expecting no entity", 0, pager.getCount().intValue());
651
652 pager = taxonService.findByDescriptionElementFullText(TextData.class, "\"Pflanzenart aus der Gattung der Tannen\"", null, null, null, false, null, null, null, null);
653 Assert.assertEquals("Phrase search : Expecting one entity", 1, pager.getCount().intValue());
654
655 pager = taxonService.findByDescriptionElementFullText(TextData.class, "\"Pflanzenart aus der Gattung der Wespen\"", null, null, null, false, null, null, null, null);
656 Assert.assertEquals("Phrase search : Expecting one entity", 0, pager.getCount().intValue());
657
658 logger.info("testFindByDescriptionElementFullText_MultipleWords() duration: " + (System.currentTimeMillis() - start) + "ms");
659
660 }
661
662
663 @SuppressWarnings("rawtypes")
664 @Test
665 @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class)
666 public final void testFindByDescriptionElementFullText_modify_DescriptionElement() throws CorruptIndexException, IOException, ParseException {
667
668 refreshLuceneIndex();
669
670 Pager<SearchResult<TaxonBase>> pager;
671 //
672 // modify the DescriptionElement
673 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN(), Language.RUSSIAN()}), false, null, null, null, null);
674 Assert.assertTrue("Search did not return any results", pager.getRecords().size() > 0);
675 Assert.assertTrue("Expecting only one doc", pager.getRecords().get(0).getDocs().size() == 1);
676 Document indexDocument = pager.getRecords().get(0).getDocs().iterator().next();
677 String[] descriptionElementUuidStr = indexDocument.getValues("uuid");
678 String[] inDescriptionUuidStr = indexDocument.getValues("inDescription.uuid");
679 // is only one uuid!
680 DescriptionElementBase textData = descriptionService.getDescriptionElementByUuid(UUID.fromString(descriptionElementUuidStr[0]));
681
682 ((TextData)textData).removeText(Language.GERMAN());
683 ((TextData)textData).putText(Language.SPANISH_CASTILIAN(), "abeto bals"+UTF8.SMALL_A_ACUTE+"mico");
684
685 descriptionService.saveDescriptionElement(textData);
686 commitAndStartNewTransaction(null);
687 // printDataSet(System.out, new String[] {
688 // "DESCRIPTIONELEMENTBASE", "LANGUAGESTRING", "DESCRIPTIONELEMENTBASE_LANGUAGESTRING" }
689 // );
690
691 //
692 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN(), Language.RUSSIAN()}), false, null, null, null, null);
693 Assert.assertEquals("The german 'Balsam-Tanne' TextData should no longer be indexed", 0, pager.getCount().intValue());
694 pager = taxonService.findByDescriptionElementFullText(TextData.class, "abeto", null, null, Arrays.asList(new Language[]{Language.SPANISH_CASTILIAN()}), false, null, null, null, null);
695 Assert.assertEquals("expecting to find the SPANISH_CASTILIAN 'abeto bals"+UTF8.SMALL_A_ACUTE+"mico'", 1, pager.getCount().intValue());
696 pager = taxonService.findByDescriptionElementFullText(TextData.class, "bals"+UTF8.SMALL_A_ACUTE+"mico", null, null, null, false, null, null, null, null);
697 Assert.assertEquals("expecting to find the SPANISH_CASTILIAN 'abeto bals"+UTF8.SMALL_A_ACUTE+"mico'", 1, pager.getCount().intValue());
698
699 //
700 // modify the DescriptionElement via the Description object
701 DescriptionBase<?> description = descriptionService.find(UUID.fromString(inDescriptionUuidStr[0]));
702 Set<DescriptionElementBase> elements = description.getElements();
703 for( DescriptionElementBase elm : elements){
704 if(elm.getUuid().toString().equals(descriptionElementUuidStr[0])){
705 ((TextData)elm).removeText(Language.SPANISH_CASTILIAN());
706 ((TextData)elm).putText(Language.POLISH(), "Jod"+UTF8.POLISH_L+"a balsamiczna");
707 }
708 }
709 descriptionService.saveOrUpdate(description);
710 commitAndStartNewTransaction(null);
711 pager = taxonService.findByDescriptionElementFullText(TextData.class, "abeto", null, null, Arrays.asList(new Language[]{Language.SPANISH_CASTILIAN()}), false, null, null, null, null);
712 Assert.assertEquals("The spanish 'abeto bals"+UTF8.SMALL_A_ACUTE+"mico' TextData should no longer be indexed", 0, pager.getCount().intValue());
713 pager = taxonService.findByDescriptionElementFullText(TextData.class, "balsamiczna", null, null, Arrays.asList(new Language[]{Language.POLISH()}), false, null, null, null, null);
714 Assert.assertEquals("expecting to find the POLISH 'Jod"+UTF8.POLISH_L+"a balsamiczna'", 1, pager.getCount().intValue());
715 }
716
717 @SuppressWarnings("rawtypes")
718 @Test
719 @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class)
720 public final void testFindByDescriptionElementFullText_modify_Taxon() throws CorruptIndexException, IOException, ParseException {
721
722 refreshLuceneIndex();
723
724 Pager<SearchResult<TaxonBase>> pager;
725 Taxon t_abies_balsamea = (Taxon)taxonService.find(UUID.fromString(ABIES_BALSAMEA_UUID));
726 TaxonDescription d_abies_balsamea = (TaxonDescription)descriptionService.find(UUID.fromString(D_ABIES_BALSAMEA_UUID));
727
728 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
729 Assert.assertEquals("expecting to find the GERMAN 'Balsam-Tanne'", 1, pager.getCount().intValue());
730
731 // exchange the Taxon with another one via the Taxon object
732 // 1.) remove existing description:
733 t_abies_balsamea.removeDescription(d_abies_balsamea);
734
735 taxonService.saveOrUpdate(t_abies_balsamea);
736 commitAndStartNewTransaction(null);
737
738 t_abies_balsamea = (Taxon)taxonService.find(t_abies_balsamea.getUuid());
739
740 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
741 Assert.assertEquals("'Balsam-Tanne' should no longer be found", 0, pager.getCount().intValue());
742
743 // 2.) create new description and add to taxon:
744 TaxonDescription d_abies_balsamea_new = TaxonDescription.NewInstance();
745 d_abies_balsamea_new
746 .addElement(TextData
747 .NewInstance(
748 "Die Balsamtanne ist mit bis zu 30 m Höhe ein mittelgro"+UTF8.SHARP_S+"er Baum und kann bis zu 200 Jahre alt werden",
749 Language.GERMAN(), null));
750 t_abies_balsamea.addDescription(d_abies_balsamea_new);
751 // set authorshipCache to null to avoid validation exception,
752 // this is maybe not needed in future, see ticket #3344
753 BotanicalName abies_balsamea = HibernateProxyHelper.deproxy(t_abies_balsamea.getName(), BotanicalName.class);
754 abies_balsamea.setAuthorshipCache(null);
755 printDataSet(System.err, new String[] {"LANGUAGESTRING_AUD"});
756 taxonService.saveOrUpdate(t_abies_balsamea);
757 commitAndStartNewTransaction(null);
758
759 // printDataSet(System.out, new String[] {
760 // "DESCRIPTIONBASE"
761 // });
762
763 pager = taxonService.findByDescriptionElementFullText(TextData.class, "mittelgro"+UTF8.SHARP_S+"er Baum", null, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
764 Assert.assertEquals("the taxon should be found via the new Description", 1, pager.getCount().intValue());
765 }
766
767 @SuppressWarnings("rawtypes")
768 @Test
769 @DataSet
770 public final void testFindByDescriptionElementFullText_modify_Classification() throws CorruptIndexException, IOException, ParseException {
771
772 refreshLuceneIndex();
773
774 Pager<SearchResult<TaxonBase>> pager;
775
776 // put taxon into other classification, new taxon node
777 Classification classification = classificationService.find(UUID.fromString(CLASSIFICATION_UUID));
778 Classification alternateClassification = classificationService.find(UUID.fromString(CLASSIFICATION_ALT_UUID));
779
780 // TODO: why is the test failing when the childNode is already retrieved here, and not after the following four lines?
781 //TaxonNode childNode = classification.getChildNodes().iterator().next();
782
783 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
784 Assert.assertEquals("expecting to find the GERMAN 'Balsam-Tanne' even if filtering by classification", 1, pager.getCount().intValue());
785 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", alternateClassification, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
786 Assert.assertEquals("GERMAN 'Balsam-Tanne' should NOT be found in other classification", 0, pager.getCount().intValue());
787
788 // check for the right taxon node
789 TaxonNode childNode = classification.getChildNodes().iterator().next();
790 Assert.assertEquals("expecting Abies balsamea sec.", childNode.getTaxon().getUuid().toString(), ABIES_BALSAMEA_UUID);
791 Assert.assertEquals("expecting default classification", childNode.getClassification().getUuid().toString(), CLASSIFICATION_UUID);
792
793 // moving the taxon around, the rootnode is only a proxy
794 alternateClassification.setRootNode(HibernateProxyHelper.deproxy(alternateClassification.getRootNode(), TaxonNode.class));
795 alternateClassification.addChildNode(childNode, null, null);
796
797 classificationService.saveOrUpdate(alternateClassification);
798 commitAndStartNewTransaction(null);
799
800 // printDataSet(System.out, new String[] {
801 // "TAXONBASE", "TAXONNODE", "CLASSIFICATION"
802 // });
803
804 // reload classification
805 classification = classificationService.find(UUID.fromString(CLASSIFICATION_UUID));
806 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", alternateClassification, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
807 Assert.assertEquals("GERMAN 'Balsam-Tanne' should now be found in other classification", 1, pager.getCount().intValue());
808
809 classification.getChildNodes().clear();
810 classificationService.saveOrUpdate(classification);
811 commitAndStartNewTransaction(null);
812
813 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", classification, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
814 Assert.assertEquals("Now the GERMAN 'Balsam-Tanne' should NOT be found in original classification", 0, pager.getCount().intValue());
815
816 }
817
818 @SuppressWarnings("rawtypes")
819 @Test
820 @DataSet
821 public final void testFindByDescriptionElementFullText_CategoricalData() throws CorruptIndexException, IOException, ParseException {
822
823 // add CategoricalData
824 DescriptionBase d_abies_balsamea = descriptionService.find(UUID.fromString(D_ABIES_BALSAMEA_UUID));
825 // Categorical data
826 CategoricalData cdata = CategoricalData.NewInstance();
827 cdata.setFeature(Feature.DESCRIPTION());
828 State state = State.NewInstance("green", "green", "gn");
829
830 StateData statedata = StateData.NewInstance(state);
831 statedata.putModifyingText(Language.ENGLISH(), "always, even during winter");
832 cdata.addStateData(statedata);
833 d_abies_balsamea.addElement(cdata);
834
835 UUID termUUID = termService.save(state).getUuid();
836 descriptionService.save(d_abies_balsamea);
837
838 commitAndStartNewTransaction(null);
839
840 // printDataSet(System.out, new String[] {
841 // "STATEDATA", "STATEDATA_DEFINEDTERMBASE", "STATEDATA_LANGUAGESTRING", "LANGUAGESTRING"});
842
843 refreshLuceneIndex();
844
845 Pager<SearchResult<TaxonBase>> pager;
846 pager = taxonService.findByDescriptionElementFullText(CategoricalData.class, "green", null, null, null, false, null, null, null, null);
847 Assert.assertEquals("Expecting one entity", 1, pager.getCount().intValue());
848 Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getEntity().getTitleCache());
849 Assert.assertTrue("Expecting only one doc", pager.getRecords().get(0).getDocs().size() == 1);
850 Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getDocs().iterator().next().get("inDescription.taxon.titleCache"));
851
852
853 //TODO modify the StateData
854 TaxonBase taxon = pager.getRecords().get(0).getEntity();
855
856 String newName = "Quercus robur";
857 taxon.setTitleCache(newName + " sec. ", true);
858
859 taxonService.saveOrUpdate(taxon);
860 commitAndStartNewTransaction(null);
861
862 taxon = taxonService.find(taxon.getUuid());
863 Assert.assertEquals(newName + " sec. ", taxon.getTitleCache());
864 DefinedTermBase term = termService.find(termUUID);
865
866 termService.delete(term);
867
868 }
869
870 @SuppressWarnings("rawtypes")
871 @Test
872 @DataSet
873 public final void testFindByDescriptionElementFullText_Highlighting() throws CorruptIndexException, IOException, ParseException {
874
875 refreshLuceneIndex();
876
877 Pager<SearchResult<TaxonBase>> pager;
878 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Abies", null, null, null, true, null, null, null, null);
879 Assert.assertEquals("Expecting one entity when searching for any TextData", 1, pager.getCount().intValue());
880 SearchResult<TaxonBase> searchResult = pager.getRecords().get(0);
881 Assert.assertTrue("the map of highlighted fragments should contain at least one item", searchResult.getFieldHighlightMap().size() > 0);
882 String[] fragments = searchResult.getFieldHighlightMap().values().iterator().next();
883 Assert.assertTrue("first fragments should contains serch term", fragments[0].contains("<B>Abies</B>"));
884
885 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Pflanzenart Tannen", null, null, null, true, null, null, null, null);
886 searchResult = pager.getRecords().get(0);
887 Assert.assertTrue("Phrase search : Expecting at least one item in highlighted fragments", searchResult.getFieldHighlightMap().size() > 0);
888 fragments = searchResult.getFieldHighlightMap().values().iterator().next();
889 Assert.assertTrue("first fragments should contains serch term", fragments[0].contains("<B>Pflanzenart</B>") || fragments[0].contains("<B>Tannen</B>"));
890
891 pager = taxonService.findByDescriptionElementFullText(TextData.class, "+Pflanzenart +Tannen", null, null, null, true, null, null, null, null);
892 searchResult = pager.getRecords().get(0);
893 Assert.assertTrue("Phrase search : Expecting at least one item in highlighted fragments", searchResult.getFieldHighlightMap().size() > 0);
894 fragments = searchResult.getFieldHighlightMap().values().iterator().next();
895 Assert.assertTrue("first fragments should contains serch term", fragments[0].contains("<B>Pflanzenart</B>") && fragments[0].contains("<B>Tannen</B>"));
896
897 pager = taxonService.findByDescriptionElementFullText(TextData.class, "\"Pflanzenart aus der Gattung der Tannen\"", null, null, null, true, null, null, null, null);
898 searchResult = pager.getRecords().get(0);
899 Assert.assertTrue("Phrase search : Expecting at least one item in highlighted fragments", searchResult.getFieldHighlightMap().size() > 0);
900 fragments = searchResult.getFieldHighlightMap().values().iterator().next();
901 Assert.assertTrue("first fragments should contains serch term", fragments[0].contains("<B>Pflanzenart</B> <B>aus</B> <B>der</B> <B>Gattung</B> <B>der</B> <B>Tannen</B>"));
902
903 pager = taxonService.findByDescriptionElementFullText(TextData.class, "Gatt*", null, null, null, true, null, null, null, null);
904 searchResult = pager.getRecords().get(0);
905 Assert.assertTrue("Wildcard search : Expecting at least one item in highlighted fragments", searchResult.getFieldHighlightMap().size() > 0);
906 fragments = searchResult.getFieldHighlightMap().values().iterator().next();
907 Assert.assertTrue("first fragments should contains serch term", fragments[0].contains("<B>Gatt"));
908 }
909
910
911 @Test
912 @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class)
913 public final void testFindByFullText() throws CorruptIndexException, IOException, ParseException {
914
915 refreshLuceneIndex();
916
917 Classification europeanAbiesClassification = classificationService.find(UUID.fromString(CLASSIFICATION_UUID));
918
919 Pager<SearchResult<TaxonBase>> pager;
920
921 pager = taxonService.findByFullText(null, "Abies", null, null, true, null, null, null, null); // --> 7
922 logSearchResults(pager, Level.DEBUG, null);
923 Assert.assertEquals("Expecting 8 entities", 8, pager.getCount().intValue());
924
925 pager = taxonService.findByFullText(Taxon.class, "Abies", null, null, true, null, null, null, null); // --> 6
926 Assert.assertEquals("Expecting 7 entities", 7, pager.getCount().intValue());
927
928 pager = taxonService.findByFullText(Synonym.class, "Abies", null, null, true, null, null, null, null); // --> 1
929 Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
930
931 pager = taxonService.findByFullText(TaxonBase.class, "sec", null, null, true, null, null, null, null); // --> 7
932 Assert.assertEquals("Expecting 8 entities", 9, pager.getCount().intValue());
933
934 pager = taxonService.findByFullText(null, "genus", null, null, true, null, null, null, null); // --> 1
935 Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
936
937 pager = taxonService.findByFullText(Taxon.class, "subalpina", null, null, true, null, null, null, null); // --> 0
938 Assert.assertEquals("Expecting 0 entities", 0, pager.getCount().intValue());
939
940 // synonym in classification ???
941 }
942
943 @Test
944 @DataSet
945 public final void testPrepareByAreaSearch() throws IOException, ParseException {
946
947 List<PresenceAbsenceTerm> statusFilter = new ArrayList<PresenceAbsenceTerm>();
948 List<NamedArea> areaFilter = new ArrayList<NamedArea>();
949 areaFilter.add(germany);
950 areaFilter.add(canada);
951 areaFilter.add(russia);
952
953 Pager<SearchResult<TaxonBase>> pager = taxonService.findByDistribution(areaFilter, statusFilter, null, 20, 0, null, null);
954 Assert.assertEquals("Expecting 2 entities", Integer.valueOf(2), Integer.valueOf(pager.getRecords().size()));
955
956 }
957
958 @Test
959 @DataSet
960 public final void testFindTaxaAndNamesByFullText() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
961
962 refreshLuceneIndex();
963
964 Pager<SearchResult<TaxonBase>> pager;
965
966 Classification alternateClassification = classificationService.find(UUID.fromString(CLASSIFICATION_ALT_UUID));
967
968
969 pager = taxonService.findTaxaAndNamesByFullText(
970 EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
971 "Abies", null, null, null, null, true, null, null, null, null);
972 // logPagerRecords(pager, Level.DEBUG);
973 Assert.assertEquals("doTaxa & doSynonyms", 8, pager.getCount().intValue());
974
975 pager = taxonService.findTaxaAndNamesByFullText(
976 EnumSet.allOf(TaxaAndNamesSearchMode.class),
977 "Abies", null, null, null, null, true, null, null, null, null);
978 // logPagerRecords(pager, Level.DEBUG);
979 Assert.assertEquals("all search modes", 8, pager.getCount().intValue());
980
981 pager = taxonService.findTaxaAndNamesByFullText(
982 EnumSet.allOf(TaxaAndNamesSearchMode.class),
983 "Abies", alternateClassification, null, null, null, true, null, null, null, null);
984 // logPagerRecords(pager, Level.DEBUG);
985 Assert.assertEquals("all search modes, filtered by alternateClassification", 1, pager.getCount().intValue());
986
987 pager = taxonService.findTaxaAndNamesByFullText(
988 EnumSet.of(TaxaAndNamesSearchMode.doSynonyms),
989 "Abies", null, null, null, null, true, null, null, null, null);
990 Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
991 SearchResult<TaxonBase> searchResult = pager.getRecords().get(0);
992 Assert.assertEquals(Synonym.class, searchResult.getEntity().getClass());
993 // Abies subalpina sec. Kohlbecker, A., Testcase standart views, 2013
994
995
996 pager = taxonService.findTaxaAndNamesByFullText(
997 EnumSet.of(TaxaAndNamesSearchMode.doTaxaByCommonNames),
998 "Abies", null, null, null, null, true, null, null, null, null);
999 Assert.assertEquals("Expecting 0 entity", 0, pager.getCount().intValue());
1000
1001
1002 pager = taxonService.findTaxaAndNamesByFullText(
1003 EnumSet.of(TaxaAndNamesSearchMode.doTaxaByCommonNames),
1004 "Tanne", null, null, null, null, true, null, null, null, null);
1005 Assert.assertEquals("Expecting 1 entity", 1, pager.getRecords().size());
1006 Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
1007
1008 pager = taxonService.findTaxaAndNamesByFullText(
1009 EnumSet.of(TaxaAndNamesSearchMode.doMisappliedNames),
1010 "kawakamii", null, null, null, null, true, null, null, null, null);
1011 logSearchResults(pager, Level.DEBUG, null);
1012 Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
1013
1014 }
1015
1016 @Test
1017 @DataSet
1018 public final void testFindTaxaAndNamesByFullText_PhraseQuery() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1019
1020 refreshLuceneIndex();
1021
1022 Pager<SearchResult<TaxonBase>> pager;
1023
1024
1025 pager = taxonService.findTaxaAndNamesByFullText(
1026 EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1027 "\"Abies alba\"", null, null, null, null, true, null, null, null, null);
1028 // logPagerRecords(pager, Level.DEBUG);
1029 Assert.assertEquals("doTaxa & doSynonyms with simple phrase query", 1, pager.getCount().intValue());
1030
1031 pager = taxonService.findTaxaAndNamesByFullText(
1032 EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1033 "\"Abies al*\"", null, null, null, null, true, null, null, null, null);
1034 // logPagerRecords(pager, Level.DEBUG);
1035 Assert.assertEquals("doTaxa & doSynonyms with complex phrase query", 1, pager.getCount().intValue());
1036
1037 pager = taxonService.findTaxaAndNamesByFullText(
1038 EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1039 "\"Abies*\"", null, null, null, null, true, null, null, null, null);
1040 // logPagerRecords(pager, Level.DEBUG);
1041 Assert.assertEquals("doTaxa & doSynonyms with simple phrase query", 8, pager.getCount().intValue());
1042
1043 }
1044
1045 @Test
1046 @DataSet
1047 public final void testFindTaxaAndNamesByFullText_Sort() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1048
1049 refreshLuceneIndex();
1050
1051 Pager<SearchResult<TaxonBase>> pager;
1052
1053 List<OrderHint> orderHints = new ArrayList<OrderHint>();
1054
1055 String[] docFields2log = new String[]{"id"};
1056
1057 // SortById
1058 orderHints.addAll(OrderHint.ORDER_BY_ID.asList());
1059 pager = taxonService.findTaxaAndNamesByFullText(
1060 EnumSet.of(TaxaAndNamesSearchMode.doTaxa),
1061 "Abies", null, null, null, null, true, null, null, orderHints, null);
1062 // logSearchResults(pager, Level.DEBUG, docFields2log);
1063 int lastId = -1;
1064 for(SearchResult<TaxonBase> rs : pager.getRecords()){
1065 if(lastId != -1){
1066 Assert.assertTrue("results not sorted by id", lastId < rs.getEntity().getId());
1067 }
1068 lastId = rs.getEntity().getId();
1069 }
1070
1071 orderHints.addAll(OrderHint.ORDER_BY_ID.asList());
1072 pager = taxonService.findTaxaAndNamesByFullText(
1073 EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1074 "Abies", null, null, null, null, true, null, null, orderHints, null);
1075 // logSearchResults(pager, Level.DEBUG, docFields2log);
1076
1077 lastId = -1;
1078 for(SearchResult<TaxonBase> rs : pager.getRecords()){
1079 if(lastId != -1){
1080 Assert.assertTrue("results not sorted by id", lastId < rs.getEntity().getId());
1081 }
1082 lastId = rs.getEntity().getId();
1083 }
1084
1085 // Sortby NOMENCLATURAL_SORT_ORDER TODO make assertions !!!
1086 orderHints.clear();
1087 orderHints.addAll(OrderHint.NOMENCLATURAL_SORT_ORDER.asList());
1088 pager = taxonService.findTaxaAndNamesByFullText(
1089 EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1090 "Abies", null, null, null, null, true, null, null, orderHints, null);
1091 logSearchResults(pager, Level.DEBUG, null);
1092
1093 }
1094
1095 @Test
1096 @DataSet
1097 public final void testFindTaxaAndNamesByFullText_AreaFilter() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1098
1099 refreshLuceneIndex();
1100
1101 Pager<SearchResult<TaxonBase>> pager;
1102
1103 Set<NamedArea> a_germany_canada_russia = new HashSet<NamedArea>();
1104 a_germany_canada_russia.add(germany);
1105 a_germany_canada_russia.add(canada);
1106 a_germany_canada_russia.add(russia);
1107
1108 Set<NamedArea> a_russia = new HashSet<NamedArea>();
1109 a_russia.add(russia);
1110
1111 Set<PresenceAbsenceTerm> present = new HashSet<PresenceAbsenceTerm>();
1112 present.add(PresenceAbsenceTerm.PRESENT());
1113
1114 Set<PresenceAbsenceTerm> present_native = new HashSet<PresenceAbsenceTerm>();
1115 present_native.add(PresenceAbsenceTerm.PRESENT());
1116 present_native.add(PresenceAbsenceTerm.NATIVE());
1117
1118 Set<PresenceAbsenceTerm> absent = new HashSet<PresenceAbsenceTerm>();
1119 absent.add(PresenceAbsenceTerm.ABSENT());
1120
1121 pager = taxonService.findTaxaAndNamesByFullText(
1122 EnumSet.of(TaxaAndNamesSearchMode.doTaxa),
1123 "Abies", null, a_germany_canada_russia, null, null, true, null, null, null, null);
1124 logSearchResults(pager, Level.DEBUG, null);
1125
1126 // abies_kawakamii_sensu_komarov as missapplied name for t_abies_balsamea
1127 pager = taxonService.findTaxaAndNamesByFullText(
1128 EnumSet.of(TaxaAndNamesSearchMode.doSynonyms),
1129 "Abies", null, a_germany_canada_russia, present_native, null, true, null, null, null, null);
1130 Assert.assertEquals("synonyms with matching area filter", 1, pager.getCount().intValue());
1131
1132 pager = taxonService.findTaxaAndNamesByFullText(
1133 EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1134 "Abies", null, a_germany_canada_russia, null, null, true, null, null, null, null);
1135 logSearchResults(pager, Level.DEBUG, null);
1136 Assert.assertEquals("taxa and synonyms with matching area filter", 3, pager.getCount().intValue());
1137
1138 pager = taxonService.findTaxaAndNamesByFullText(
1139 EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1140 "Abies", null, a_germany_canada_russia, present_native, null, true, null, null, null, null);
1141 Assert.assertEquals("taxa and synonyms with matching area & status filter 1", 3, pager.getCount().intValue());
1142
1143 pager = taxonService.findTaxaAndNamesByFullText(
1144 EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1145 "Abies", null, a_germany_canada_russia, present, null, true, null, null, null, null);
1146 Assert.assertEquals("taxa and synonyms with matching area & status filter 2", 2, pager.getCount().intValue());
1147
1148 pager = taxonService.findTaxaAndNamesByFullText(
1149 EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1150 "Abies", null, a_russia, present, null, true, null, null, null, null);
1151 Assert.assertEquals("taxa and synonyms with non matching area & status filter", 0, pager.getCount().intValue());
1152
1153 pager = taxonService.findTaxaAndNamesByFullText(
1154 EnumSet.of(TaxaAndNamesSearchMode.doTaxaByCommonNames),
1155 "Tanne", null, a_germany_canada_russia, present_native, null, true, null, null, null, null);
1156 Assert.assertEquals("ByCommonNames with area filter", 1, pager.getCount().intValue());
1157
1158 // abies_kawakamii_sensu_komarov as misapplied name for t_abies_balsamea
1159 pager = taxonService.findTaxaAndNamesByFullText(
1160 EnumSet.of(TaxaAndNamesSearchMode.doMisappliedNames),
1161 "Abies", null, a_germany_canada_russia, present_native, null, true, null, null, null, null);
1162 Assert.assertEquals("misappliedNames with matching area & status filter", 1, pager.getCount().intValue());
1163
1164
1165 // 1. remove existing taxon relation
1166 Taxon t_abies_balsamea = (Taxon)taxonService.find(UUID.fromString(ABIES_BALSAMEA_UUID));
1167 Set<TaxonRelationship> relsTo = t_abies_balsamea.getRelationsToThisTaxon();
1168 Assert.assertEquals(1, relsTo.size());
1169 TaxonRelationship taxonRelation = relsTo.iterator().next();
1170 t_abies_balsamea.removeTaxonRelation(taxonRelation);
1171 taxonService.saveOrUpdate(t_abies_balsamea);
1172 commitAndStartNewTransaction(null);
1173
1174 pager = taxonService.findTaxaAndNamesByFullText(
1175 EnumSet.of(TaxaAndNamesSearchMode.doMisappliedNames),
1176 "Abies", null, a_germany_canada_russia, present_native, null, true, null, null, null, null);
1177 Assert.assertEquals("misappliedNames with matching area & status filter, should match nothing now", 0, pager.getCount().intValue());
1178
1179 // 2. now add abies_kawakamii_sensu_komarov as misapplied name for t_abies_alba and search for misapplications in russia: ABSENT
1180 Taxon t_abies_kawakamii_sensu_komarov = (Taxon)taxonService.find(UUID.fromString(D_ABIES_KAWAKAMII_SEC_KOMAROV_UUID));
1181 Taxon t_abies_alba = (Taxon)taxonService.find(UUID.fromString(ABIES_ALBA_UUID));
1182 t_abies_alba.addMisappliedName(t_abies_kawakamii_sensu_komarov, null, null);
1183
1184 taxonService.update(t_abies_kawakamii_sensu_komarov);
1185
1186 commitAndStartNewTransaction(null);
1187
1188 pager = taxonService.findTaxaAndNamesByFullText(
1189 EnumSet.of(TaxaAndNamesSearchMode.doMisappliedNames),
1190 "Abies", null, a_germany_canada_russia, absent, null, true, null, null, null, null);
1191 Assert.assertEquals("misappliedNames with matching area & status filter, should find one", 1, pager.getCount().intValue());
1192
1193 }
1194
1195 @Test
1196 @DataSet
1197 @Ignore // remove once http://dev.e-taxonomy.eu/trac/ticket/5477 is solved
1198 public final void testFindTaxaAndNamesByFullText_AreaFilter_issue5477() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1199
1200 Set<NamedArea> a_germany_canada_russia = new HashSet<NamedArea>();
1201 a_germany_canada_russia.add(germany);
1202 a_germany_canada_russia.add(canada);
1203 a_germany_canada_russia.add(russia);
1204
1205
1206 Set<PresenceAbsenceTerm> absent = new HashSet<PresenceAbsenceTerm>();
1207 absent.add(PresenceAbsenceTerm.ABSENT());
1208
1209 Taxon t_abies_kawakamii_sensu_komarov = (Taxon)taxonService.find(UUID.fromString(D_ABIES_KAWAKAMII_SEC_KOMAROV_UUID));
1210 Taxon t_abies_alba = (Taxon)taxonService.find(UUID.fromString(ABIES_ALBA_UUID));
1211 t_abies_alba.addMisappliedName(t_abies_kawakamii_sensu_komarov, null, null);
1212
1213 /* Since the upgrade from hibernate search 4 to 5.5
1214 * triggering an update of t_abies_alba is no longer sufficient to also update the
1215 * document of t_abies_kawakamii_sensu_komarov in the lucene index.
1216 * the last test in testFindTaxaAndNamesByFullText_AreaFilter() failed in this case.
1217 * This situation is reproduced here:
1218 */
1219 taxonService.update(t_abies_alba);
1220
1221 commitAndStartNewTransaction(null);
1222
1223 Pager pager = taxonService.findTaxaAndNamesByFullText(
1224 EnumSet.of(TaxaAndNamesSearchMode.doMisappliedNames),
1225 "Abies", null, a_germany_canada_russia, absent, null, true, null, null, null, null);
1226 Assert.assertEquals("misappliedNames with matching area & status filter, should find one", 1, pager.getCount().intValue());
1227 }
1228
1229
1230 /**
1231 * Regression test for #3119: fulltext search: Entity always null whatever search
1232 *
1233 * @throws CorruptIndexException
1234 * @throws IOException
1235 * @throws ParseException
1236 * @throws LuceneMultiSearchException
1237 */
1238 @Test
1239 @DataSet
1240 public final void testFindByEverythingFullText() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1241
1242 refreshLuceneIndex();
1243
1244 Pager<SearchResult<TaxonBase>> pager;
1245
1246 // via Taxon
1247 pager = taxonService.findByEverythingFullText("Abies", null, null, true, null, null, null, null);
1248 logSearchResults(pager, Level.DEBUG, null);
1249 Assert.assertTrue("Expecting at least 7 entities for 'Abies'", pager.getCount() > 7);
1250 Assert.assertNotNull("Expecting entity", pager.getRecords().get(0).getEntity());
1251 Assert.assertEquals("Expecting Taxon entity", Taxon.class, pager.getRecords().get(0).getEntity().getClass());
1252
1253 // via DescriptionElement
1254 pager = taxonService.findByEverythingFullText("present", null, null, true, null, null, null, null);
1255 Assert.assertEquals("Expecting one entity when searching for area 'present'", 1, pager.getCount().intValue());
1256 Assert.assertNotNull("Expecting entity", pager.getRecords().get(0).getEntity());
1257 Assert.assertEquals("Expecting Taxon entity", Taxon.class, CdmBase.deproxy(pager.getRecords().get(0).getEntity()).getClass());
1258 Assert.assertEquals("Expecting Taxon ", ABIES_BALSAMEA_UUID, pager.getRecords().get(0).getEntity().getUuid().toString());
1259
1260 }
1261
1262
1263 @Test
1264 @DataSet
1265 public final void findByEveryThingFullText() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1266
1267 refreshLuceneIndex();
1268
1269 Pager<SearchResult<TaxonBase>> pager;
1270
1271 pager = taxonService.findByEverythingFullText("genus", null, null, false, null, null, null, null); // --> 1
1272 Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
1273
1274 //FIXME FAILS: abies balamea is returned twice, see also testFullText_Grouping()
1275 pager = taxonService.findByEverythingFullText("Balsam", null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
1276 logSearchResults(pager, Level.DEBUG, null);
1277 Assert.assertEquals("expecting to find the Abies balsamea via the GERMAN DescriptionElements", 1, pager.getCount().intValue());
1278
1279 pager = taxonService.findByEverythingFullText("Abies", null, null, true, null, null, null, null);
1280 Assert.assertEquals("Expecting 8 entities", 8, pager.getCount().intValue());
1281 SearchResult<TaxonBase> searchResult = pager.getRecords().get(0);
1282 Assert.assertTrue("the map of highlighted fragments should contain at least one item", searchResult.getFieldHighlightMap().size() > 0);
1283 String[] fragments = searchResult.getFieldHighlightMap().values().iterator().next();
1284 Assert.assertTrue("first fragments should contains serch term", fragments[0].toLowerCase().contains("<b>abies</b>"));
1285 }
1286
1287 // @SuppressWarnings("rawtypes")
1288 // @Test
1289 // @DataSet
1290 // public final void benchmarkFindTaxaAndNamesHql() throws CorruptIndexException, IOException, ParseException {
1291 //
1292 // createRandomTaxonWithCommonName(NUM_OF_NEW_RADOM_ENTITIES);
1293 //
1294 // IFindTaxaAndNamesConfigurator configurator = new FindTaxaAndNamesConfiguratorImpl();
1295 // configurator.setTitleSearchString("Wei"+UTF8.SHARP_S+"%");
1296 // configurator.setMatchMode(MatchMode.BEGINNING);
1297 // configurator.setDoTaxa(false);
1298 // configurator.setDoSynonyms(false);
1299 // configurator.setDoNamesWithoutTaxa(false);
1300 // configurator.setDoTaxaByCommonNames(true);
1301 //
1302 // Pager<IdentifiableEntity> pager;
1303 //
1304 // long startMillis = System.currentTimeMillis();
1305 // for (int indx = 0; indx < BENCHMARK_ROUNDS; indx++) {
1306 // pager = taxonService.findTaxaAndNames(configurator);
1307 // if (logger.isDebugEnabled()) {
1308 // logger.debug("[" + indx + "]" + pager.getRecords().get(0).getTitleCache());
1309 // }
1310 // }
1311 // double duration = ((double) (System.currentTimeMillis() - startMillis)) / BENCHMARK_ROUNDS;
1312 // logger.info("Benchmark result - [find taxon by CommonName via HQL] : " + duration + "ms (" + BENCHMARK_ROUNDS + " benchmark rounds )");
1313 // }
1314
1315 @SuppressWarnings("rawtypes")
1316 @Test
1317 @DataSet
1318 public final void benchmarkFindByCommonNameHql() throws CorruptIndexException, IOException, ParseException {
1319
1320 // printDataSet(System.err, new String[] { "TaxonBase" });
1321
1322 createRandomTaxonWithCommonName(NUM_OF_NEW_RADOM_ENTITIES);
1323
1324 IFindTaxaAndNamesConfigurator configurator = new FindTaxaAndNamesConfiguratorImpl<>();
1325 configurator.setTitleSearchString("Wei"+UTF8.SHARP_S+"%");
1326 configurator.setMatchMode(MatchMode.BEGINNING);
1327 configurator.setDoTaxa(false);
1328 configurator.setDoSynonyms(false);
1329 configurator.setDoNamesWithoutTaxa(false);
1330 configurator.setDoTaxaByCommonNames(true);
1331
1332 Pager<IdentifiableEntity> pager;
1333
1334 long startMillis = System.currentTimeMillis();
1335 for (int indx = 0; indx < BENCHMARK_ROUNDS; indx++) {
1336 pager = taxonService.findTaxaAndNames(configurator);
1337 if (logger.isDebugEnabled()) {
1338 logger.debug("[" + indx + "]" + pager.getRecords().get(0).getTitleCache());
1339 }
1340 }
1341 double duration = ((double) (System.currentTimeMillis() - startMillis)) / BENCHMARK_ROUNDS;
1342 logger.info("Benchmark result - [find taxon by CommonName via HQL] : " + duration + "ms (" + BENCHMARK_ROUNDS + " benchmark rounds )");
1343 }
1344
1345 @SuppressWarnings("rawtypes")
1346 @Test
1347 @DataSet
1348 public final void benchmarkFindByCommonNameLucene() throws CorruptIndexException, IOException, ParseException {
1349
1350 createRandomTaxonWithCommonName(NUM_OF_NEW_RADOM_ENTITIES);
1351
1352 refreshLuceneIndex();
1353
1354 Pager<SearchResult<TaxonBase>> pager;
1355
1356 long startMillis = System.currentTimeMillis();
1357 for (int indx = 0; indx < BENCHMARK_ROUNDS; indx++) {
1358 pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Wei"+UTF8.SHARP_S+"*", null, null, null, false, null, null, null, null);
1359 if (logger.isDebugEnabled()) {
1360 logger.debug("[" + indx + "]" + pager.getRecords().get(0).getEntity().getTitleCache());
1361 }
1362 }
1363 double duration = ((double) (System.currentTimeMillis() - startMillis)) / BENCHMARK_ROUNDS;
1364 logger.info("Benchmark result - [find taxon by CommonName via lucene] : " + duration + "ms (" + BENCHMARK_ROUNDS + " benchmark rounds )");
1365 }
1366
1367 /**
1368 * uncomment @Test annotation to create the dataset for this test
1369 */
1370 @Override
1371 // @Test
1372 @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class, value="BlankDataSet.xml")
1373 public final void createTestDataSet() throws FileNotFoundException {
1374
1375 Classification europeanAbiesClassification = Classification.NewInstance("European Abies");
1376 europeanAbiesClassification.setUuid(UUID.fromString(CLASSIFICATION_UUID));
1377 classificationService.save(europeanAbiesClassification);
1378
1379 Classification alternativeClassification = Classification.NewInstance("Abies alternative");
1380 alternativeClassification.setUuid(UUID.fromString(CLASSIFICATION_ALT_UUID));
1381 classificationService.save(alternativeClassification);
1382
1383 Reference sec = ReferenceFactory.newBook();
1384 sec.setTitleCache("Kohlbecker, A., Testcase standart views, 2013", true);
1385 Reference sec_sensu = ReferenceFactory.newBook();
1386 sec_sensu.setTitleCache("Komarov, V. L., Flora SSSR 29", true);
1387 referenceService.save(sec);
1388 referenceService.save(sec_sensu);
1389
1390 BotanicalName n_abies = BotanicalName.NewInstance(Rank.GENUS());
1391 n_abies.setNameCache("Abies", true);
1392 Taxon t_abies = Taxon.NewInstance(n_abies, sec);
1393 taxonService.save(t_abies);
1394
1395 BotanicalName n_abies_alba = BotanicalName.NewInstance(Rank.SPECIES());
1396 n_abies_alba.setNameCache("Abies alba", true);
1397 Taxon t_abies_alba = Taxon.NewInstance(n_abies_alba, sec);
1398 t_abies_alba.setUuid(UUID.fromString(ABIES_ALBA_UUID));
1399 taxonService.save(t_abies_alba);
1400
1401 BotanicalName n_abies_subalpina = BotanicalName.NewInstance(Rank.SPECIES());
1402 n_abies_subalpina.setNameCache("Abies subalpina", true);
1403 Synonym s_abies_subalpina = Synonym.NewInstance(n_abies_subalpina, sec);
1404 taxonService.save(s_abies_subalpina);
1405
1406 BotanicalName n_abies_balsamea = BotanicalName.NewInstance(Rank.SPECIES());
1407 n_abies_balsamea.setNameCache("Abies balsamea", true);
1408 Taxon t_abies_balsamea = Taxon.NewInstance(n_abies_balsamea, sec);
1409 t_abies_balsamea.setUuid(UUID.fromString(ABIES_BALSAMEA_UUID));
1410 t_abies_balsamea.addSynonym(s_abies_subalpina, SynonymType.SYNONYM_OF());
1411 taxonService.save(t_abies_balsamea);
1412
1413 BotanicalName n_abies_grandis = BotanicalName.NewInstance(Rank.SPECIES());
1414 n_abies_grandis.setNameCache("Abies grandis", true);
1415 Taxon t_abies_grandis = Taxon.NewInstance(n_abies_grandis, sec);
1416 taxonService.save(t_abies_grandis);
1417
1418 BotanicalName n_abies_kawakamii = BotanicalName.NewInstance(Rank.SPECIES());
1419 n_abies_kawakamii.setNameCache("Abies kawakamii", true);
1420 Taxon t_abies_kawakamii = Taxon.NewInstance(n_abies_kawakamii, sec);
1421 t_abies_kawakamii.getTitleCache();
1422 taxonService.save(t_abies_kawakamii);
1423
1424 // abies_kawakamii_sensu_komarov as missapplied name for t_abies_balsamea
1425 Taxon t_abies_kawakamii_sensu_komarov = Taxon.NewInstance(n_abies_kawakamii, sec_sensu);
1426 taxonService.save(t_abies_kawakamii_sensu_komarov);
1427 t_abies_kawakamii_sensu_komarov.addTaxonRelation(t_abies_balsamea, TaxonRelationshipType.MISAPPLIED_NAME_FOR(), null, null);
1428 taxonService.saveOrUpdate(t_abies_kawakamii_sensu_komarov);
1429
1430 BotanicalName n_abies_lasiocarpa = BotanicalName.NewInstance(Rank.SPECIES());
1431 n_abies_lasiocarpa.setNameCache("Abies lasiocarpa", true);
1432 Taxon t_abies_lasiocarpa = Taxon.NewInstance(n_abies_lasiocarpa, sec);
1433 taxonService.save(t_abies_lasiocarpa);
1434
1435 // add taxa to classifications
1436 europeanAbiesClassification.addChildTaxon(t_abies_balsamea, null, null);
1437 alternativeClassification.addChildTaxon(t_abies_lasiocarpa, null, null);
1438 classificationService.saveOrUpdate(europeanAbiesClassification);
1439 classificationService.saveOrUpdate(alternativeClassification);
1440
1441 //
1442 // Description
1443 //
1444 TaxonDescription d_abies_alba = TaxonDescription.NewInstance(t_abies_alba);
1445 TaxonDescription d_abies_balsamea = TaxonDescription.NewInstance(t_abies_balsamea);
1446
1447 d_abies_alba.setUuid(UUID.fromString(D_ABIES_ALBA_UUID));
1448 d_abies_balsamea.setUuid(UUID.fromString(D_ABIES_BALSAMEA_UUID));
1449
1450
1451 // CommonTaxonName
1452 d_abies_alba.addElement(CommonTaxonName.NewInstance("Wei"+UTF8.SHARP_S+"tanne", Language.GERMAN()));
1453 d_abies_alba.addElement(CommonTaxonName.NewInstance("silver fir", Language.ENGLISH()));
1454 d_abies_alba.addElement(Distribution
1455 .NewInstance(
1456 germany,
1457 PresenceAbsenceTerm.NATIVE()));
1458 d_abies_alba.addElement(Distribution
1459 .NewInstance(
1460 russia,
1461 PresenceAbsenceTerm.ABSENT()));
1462
1463 // TextData
1464 d_abies_balsamea
1465 .addElement(TextData
1466 .NewInstance(
1467 "Die Balsam-Tanne (Abies balsamea) ist eine Pflanzenart aus der Gattung der Tannen (Abies). Sie wächst im nordöstlichen Nordamerika, wo sie sowohl Tief- als auch Bergland besiedelt. Sie gilt als relativ anspruchslos gegenüber dem Standort und ist frosthart. In vielen Teilen des natürlichen Verbreitungsgebietes stellt sie die Klimaxbaumart dar.",
1468 Language.GERMAN(), null));
1469 d_abies_balsamea
1470 .addElement(CommonTaxonName
1471 .NewInstance(
1472 "Balsam-Tanne",
1473 Language.GERMAN(), null));
1474
1475 d_abies_balsamea
1476 .addElement(Distribution
1477 .NewInstance(
1478 canada,
1479 PresenceAbsenceTerm.PRESENT()));
1480
1481 d_abies_balsamea
1482 .addElement(Distribution
1483 .NewInstance(
1484 germany,
1485 PresenceAbsenceTerm.NATIVE()));
1486
1487 d_abies_balsamea
1488 .addElement(TextData
1489 .NewInstance(
1490 TaxonServiceSearchTestUtf8Constants.RUSSIAN_ABIES_ALBA_LONG,
1491 Language.RUSSIAN(), null));
1492 d_abies_balsamea
1493 .addElement(CommonTaxonName
1494 .NewInstance(
1495 TaxonServiceSearchTestUtf8Constants.RUSSIAN_ABIES_ALBA_SHORT,
1496 Language.RUSSIAN(), null));
1497 descriptionService.saveOrUpdate(d_abies_balsamea);
1498
1499 setComplete();
1500 endTransaction();
1501
1502
1503 writeDbUnitDataSetFile(new String[] {
1504 "TAXONBASE", "TAXONNAMEBASE",
1505 "TAXONRELATIONSHIP",
1506 "REFERENCE", "DESCRIPTIONELEMENTBASE", "DESCRIPTIONBASE",
1507 "AGENTBASE", "HOMOTYPICALGROUP",
1508 "CLASSIFICATION", "TAXONNODE",
1509 "LANGUAGESTRING", "DESCRIPTIONELEMENTBASE_LANGUAGESTRING",
1510 "HIBERNATE_SEQUENCES" // IMPORTANT!!!
1511 });
1512
1513 }
1514
1515 /**
1516 *
1517 */
1518 private void refreshLuceneIndex() {
1519
1520 // commitAndStartNewTransaction(null);
1521 commit();
1522 endTransaction();
1523 indexer.purge(DefaultProgressMonitor.NewInstance());
1524 indexer.reindex(typesToIndex, DefaultProgressMonitor.NewInstance());
1525 startNewTransaction();
1526 // commitAndStartNewTransaction(null);
1527 }
1528
1529 /**
1530 * @param numberOfNew
1531 *
1532 */
1533 private void createRandomTaxonWithCommonName(int numberOfNew) {
1534
1535 logger.debug(String.format("creating %1$s random taxan with CommonName", numberOfNew));
1536
1537 commitAndStartNewTransaction(null);
1538
1539 Reference sec = ReferenceFactory.newBook();
1540 referenceService.save(sec);
1541
1542 for (int i = numberOfNew; i < numberOfNew; i++) {
1543 RandomStringUtils.randomAlphabetic(10);
1544 String radomName = RandomStringUtils.randomAlphabetic(5) + " " + RandomStringUtils.randomAlphabetic(10);
1545 String radomCommonName = RandomStringUtils.randomAlphabetic(10);
1546
1547 BotanicalName name = BotanicalName.NewInstance(Rank.SPECIES());
1548 name.setNameCache(radomName, true);
1549 Taxon taxon = Taxon.NewInstance(name, sec);
1550 taxonService.save(taxon);
1551
1552 TaxonDescription description = TaxonDescription.NewInstance(taxon);
1553 description.addElement(CommonTaxonName.NewInstance(radomCommonName, Language.GERMAN()));
1554 descriptionService.save(description);
1555 }
1556
1557 commitAndStartNewTransaction(null);
1558 }
1559
1560 private <T extends CdmBase> void logSearchResults(Pager<SearchResult<T>> pager, Level level, String[] docFields){
1561 if(level == null){
1562 level = Level.DEBUG;
1563 }
1564 if(logger.isEnabledFor(level)){
1565 StringBuilder b = new StringBuilder();
1566 b.append("\n");
1567 int i = 0;
1568 for(SearchResult sr : pager.getRecords()){
1569
1570 b.append(" ").append(i++).append(" - ");
1571 b.append("score:").append(sr.getScore()).append(", ");
1572
1573 if(docFields != null){
1574 b.append("docs : ");
1575 for(Document doc : sr.getDocs()) {
1576 b.append("<");
1577 for(String f : docFields){
1578 b.append(f).append(":").append(Arrays.toString(doc.getValues(f)));
1579 }
1580 b.append(">");
1581 }
1582 }
1583
1584 CdmBase entity = sr.getEntity();
1585 if(entity == null){
1586 b.append("NULL");
1587 } else {
1588 b.append(entity.getClass().getSimpleName()).
1589 append(" [").append(entity.getId()).
1590 append(" | ").append(entity.getUuid()).append("] : ").
1591 append(entity.toString());
1592
1593 }
1594 b.append("\n");
1595 }
1596 logger.log(level, b);
1597 }
1598 }
1599
1600 }