Project

General

Profile

Download (77.6 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2009 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.api.service;
11

    
12
import static org.junit.Assert.assertEquals;
13
import static org.junit.Assert.assertNotNull;
14

    
15
import java.io.FileNotFoundException;
16
import java.io.IOException;
17
import java.util.ArrayList;
18
import java.util.Arrays;
19
import java.util.EnumSet;
20
import java.util.HashSet;
21
import java.util.List;
22
import java.util.Map;
23
import java.util.Set;
24
import java.util.UUID;
25

    
26
import org.apache.commons.lang.RandomStringUtils;
27
import org.apache.log4j.Level;
28
import org.apache.log4j.Logger;
29
import org.apache.lucene.document.Document;
30
import org.apache.lucene.index.CorruptIndexException;
31
import org.apache.lucene.queryparser.classic.ParseException;
32
import org.junit.Assert;
33
import org.junit.Before;
34
import org.junit.Ignore;
35
import org.junit.Test;
36
import org.unitils.dbunit.annotation.DataSet;
37
import org.unitils.spring.annotation.SpringBeanByType;
38

    
39
import eu.etaxonomy.cdm.api.service.config.FindTaxaAndNamesConfiguratorImpl;
40
import eu.etaxonomy.cdm.api.service.config.IFindTaxaAndNamesConfigurator;
41
import eu.etaxonomy.cdm.api.service.pager.Pager;
42
import eu.etaxonomy.cdm.api.service.search.ICdmMassIndexer;
43
import eu.etaxonomy.cdm.api.service.search.LuceneMultiSearchException;
44
import eu.etaxonomy.cdm.api.service.search.SearchResult;
45
import eu.etaxonomy.cdm.common.UTF8;
46
import eu.etaxonomy.cdm.common.monitor.DefaultProgressMonitor;
47
import eu.etaxonomy.cdm.hibernate.HibernateProxyHelper;
48
import eu.etaxonomy.cdm.model.common.CdmBase;
49
import eu.etaxonomy.cdm.model.common.DefinedTermBase;
50
import eu.etaxonomy.cdm.model.common.IdentifiableEntity;
51
import eu.etaxonomy.cdm.model.common.Language;
52
import eu.etaxonomy.cdm.model.description.CategoricalData;
53
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
54
import eu.etaxonomy.cdm.model.description.DescriptionBase;
55
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
56
import eu.etaxonomy.cdm.model.description.Distribution;
57
import eu.etaxonomy.cdm.model.description.Feature;
58
import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
59
import eu.etaxonomy.cdm.model.description.State;
60
import eu.etaxonomy.cdm.model.description.StateData;
61
import eu.etaxonomy.cdm.model.description.TaxonDescription;
62
import eu.etaxonomy.cdm.model.description.TextData;
63
import eu.etaxonomy.cdm.model.location.Country;
64
import eu.etaxonomy.cdm.model.location.NamedArea;
65
import eu.etaxonomy.cdm.model.name.BotanicalName;
66
import eu.etaxonomy.cdm.model.name.NonViralName;
67
import eu.etaxonomy.cdm.model.name.Rank;
68
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
69
import eu.etaxonomy.cdm.model.reference.Reference;
70
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
71
import eu.etaxonomy.cdm.model.taxon.Classification;
72
import eu.etaxonomy.cdm.model.taxon.Synonym;
73
import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
74
import eu.etaxonomy.cdm.model.taxon.Taxon;
75
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
76
import eu.etaxonomy.cdm.model.taxon.TaxonNode;
77
import eu.etaxonomy.cdm.model.taxon.TaxonRelationship;
78
import eu.etaxonomy.cdm.model.taxon.TaxonRelationshipType;
79
import eu.etaxonomy.cdm.persistence.dto.UuidAndTitleCache;
80
import eu.etaxonomy.cdm.persistence.query.MatchMode;
81
import eu.etaxonomy.cdm.persistence.query.OrderHint;
82
import eu.etaxonomy.cdm.test.integration.CdmTransactionalIntegrationTest;
83
import eu.etaxonomy.cdm.test.unitils.CleanSweepInsertLoadStrategy;
84

    
85
/**
86
 * @author a.babadshanjan, a.kohlbecker
87
 * @created 04.02.2009
88
 */
89
public class TaxonServiceSearchTest extends CdmTransactionalIntegrationTest {
90

    
91
    private static final String ABIES_BALSAMEA_UUID = "f65d47bd-4f49-4ab1-bc4a-bc4551eaa1a8";
92

    
93
    private static final String ABIES_ALBA_UUID = "7dbd5810-a3e5-44b6-b563-25152b8867f4";
94

    
95
    private static final String CLASSIFICATION_UUID = "2a5ceebb-4830-4524-b330-78461bf8cb6b";
96

    
97
    private static final String CLASSIFICATION_ALT_UUID = "d7c741e3-ae9e-4a7d-a566-9e3a7a0b51ce";
98

    
99
    private static final String D_ABIES_BALSAMEA_UUID = "900108d8-e6ce-495e-b32e-7aad3099135e";
100

    
101
    private static final String D_ABIES_ALBA_UUID = "ec8bba03-d993-4c85-8472-18b14942464b";
102

    
103
    private static final String D_ABIES_KAWAKAMII_SEC_KOMAROV_UUID = "e9d8c2fd-6409-46d5-9c2e-14a2bbb1b2b1";
104
    private static final int NUM_OF_NEW_RADOM_ENTITIES = 1000;
105

    
106
    private static Logger logger = Logger.getLogger(TaxonServiceSearchTest.class);
107

    
108

    
109

    
110
    @SpringBeanByType
111
    private ITaxonService taxonService;
112
    @SpringBeanByType
113
    private ITermService termService;
114
    @SpringBeanByType
115
    private IClassificationService classificationService;
116
    @SpringBeanByType
117
    private IReferenceService referenceService;
118
    @SpringBeanByType
119
    private IDescriptionService descriptionService;
120
    @SpringBeanByType
121
    private INameService nameService;
122
    @SpringBeanByType
123
    private ICdmMassIndexer indexer;
124

    
125
    @SpringBeanByType
126
    private ITaxonNodeService nodeService;
127

    
128
    private static final int BENCHMARK_ROUNDS = 300;
129

    
130
    private Set<Class<? extends CdmBase>> typesToIndex = null;
131

    
132
    private NamedArea germany;
133
    private NamedArea france ;
134
    private NamedArea russia ;
135
    private NamedArea canada ;
136

    
137
    /**
138
     * @throws java.lang.Exception
139
     */
140
    @Before
141
    public void setUp() throws Exception {
142
        typesToIndex = new HashSet<Class<? extends CdmBase>>();
143
        typesToIndex.add(DescriptionElementBase.class);
144
        typesToIndex.add(TaxonBase.class);
145
        typesToIndex.add(TaxonRelationship.class);
146

    
147
        germany =  Country.GERMANY();
148
        france = Country.FRANCEFRENCHREPUBLIC();
149
        russia = Country.RUSSIANFEDERATION();
150
        canada = Country.CANADA();
151

    
152

    
153
    }
154

    
155
    @Test
156
    public void testDbUnitUsageTest() throws Exception {
157
        assertNotNull("taxonService should exist", taxonService);
158
        assertNotNull("nameService should exist", nameService);
159
    }
160

    
161
    /**
162
     * Test method for
163
     * {@link eu.etaxonomy.cdm.api.service.TaxonServiceImpl#findTaxaAndNames(eu.etaxonomy.cdm.api.service.config.IFindTaxaAndNamesConfigurator)}
164
     * .
165
     */
166
    @Test
167
    @DataSet
168
    public final void testFindTaxaAndNames() {
169

    
170
        // pass 1
171
        IFindTaxaAndNamesConfigurator<?> configurator = new FindTaxaAndNamesConfiguratorImpl();
172
        configurator.setTitleSearchString("Abies*");
173
        configurator.setMatchMode(MatchMode.BEGINNING);
174
        configurator.setDoTaxa(true);
175
        configurator.setDoSynonyms(true);
176
        configurator.setDoNamesWithoutTaxa(true);
177
        configurator.setDoTaxaByCommonNames(true);
178

    
179
        Pager<IdentifiableEntity> pager = taxonService.findTaxaAndNames(configurator);
180
        List<IdentifiableEntity> list = pager.getRecords();
181

    
182
        if (logger.isDebugEnabled()) {
183
            for (int i = 0; i < list.size(); i++) {
184
                String nameCache = "";
185
                if (list.get(i) instanceof NonViralName) {
186
                    nameCache = ((NonViralName<?>) list.get(i)).getNameCache();
187
                } else if (list.get(i) instanceof TaxonBase) {
188
                    TaxonNameBase<?,?> taxonNameBase = ((TaxonBase) list.get(i)).getName();
189
                    nameCache = HibernateProxyHelper.deproxy(taxonNameBase, NonViralName.class).getNameCache();
190
                } else {
191
                }
192
                logger.debug(list.get(i).getClass() + "(" + i + ")" + ": Name Cache = " + nameCache + ", Title Cache = "
193
                        + list.get(i).getTitleCache());
194
            }
195
        }
196

    
197
        logger.debug("number of taxa: " + list.size());
198
        assertEquals(10, list.size());
199
        configurator.setTitleSearchString("Balsam-Tanne");
200
        pager = taxonService.findTaxaAndNames(configurator);
201
        list = pager.getRecords();
202
        assertEquals(1, list.size());
203
        // pass 2
204
//        configurator.setDoTaxaByCommonNames(false);
205
//        configurator.setDoMisappliedNames(true);
206
//        configurator.setClassification(classificationService.load(UUID.fromString(CLASSIFICATION_UUID)));
207
//        pager = taxonService.findTaxaAndNames(configurator);
208
//        list = pager.getRecords();
209
//        assertEquals(0, list.size());
210

    
211
    }
212

    
213
    /**
214
     * Test method for
215
     * {@link eu.etaxonomy.cdm.api.service.TaxonServiceImpl#searchTaxaByName(java.lang.String, eu.etaxonomy.cdm.model.reference.Reference)}
216
     * .
217
     */
218
    @Test
219
    @DataSet
220
    public final void testSearchTaxaByName() {
221
         IFindTaxaAndNamesConfigurator<?> configurator = new FindTaxaAndNamesConfiguratorImpl();
222
         configurator.setTitleSearchString("Abies bor*");
223
         configurator.setMatchMode(MatchMode.BEGINNING);
224
         configurator.setDoTaxa(true);
225
         configurator.setDoSynonyms(false);
226
         configurator.setDoNamesWithoutTaxa(true);
227
         configurator.setDoTaxaByCommonNames(false);
228

    
229
        List<UuidAndTitleCache<IdentifiableEntity>> list = taxonService.findTaxaAndNamesForEditor(configurator);
230

    
231
         Assert.assertEquals("Expecting one entity", 1, list.size());
232

    
233
         configurator.setTitleSearchString("silver fir");
234
         configurator.setMatchMode(MatchMode.BEGINNING);
235
         configurator.setDoTaxa(false);
236
         configurator.setDoSynonyms(false);
237
         configurator.setDoNamesWithoutTaxa(true);
238
         configurator.setDoTaxaByCommonNames(true);
239

    
240
         list = taxonService.findTaxaAndNamesForEditor(configurator);
241

    
242
         Assert.assertEquals("Expecting one entity", 1, list.size());
243

    
244
    }
245

    
246
    @SuppressWarnings("rawtypes")
247
    @Test
248
    @DataSet
249
    public final void testPurgeAndReindex() throws CorruptIndexException, IOException, ParseException {
250

    
251
        refreshLuceneIndex();
252

    
253
        Pager<SearchResult<TaxonBase>> pager;
254

    
255
        pager = taxonService.findByFullText(null, "Abies", null, null, true, null, null, null, null); // --> 8
256
        Assert.assertEquals("Expecting 8 entities", 8, pager.getCount().intValue());
257

    
258
        indexer.purge(null);
259
        commitAndStartNewTransaction(null);
260

    
261
        pager = taxonService.findByFullText(null, "Abies", null, null, true, null, null, null, null); // --> 0
262
        Assert.assertEquals("Expecting no entities since the index has been purged", 0, pager.getCount().intValue());
263

    
264
        indexer.reindex(indexer.indexedClasses(), null);
265
        commitAndStartNewTransaction(null);
266

    
267
        pager = taxonService.findByFullText(null, "Abies", null, null, true, null, null, null, null); // --> 8
268
        Assert.assertEquals("Expecting 8 entities", 8, pager.getCount().intValue());
269
    }
270

    
271

    
272
    @SuppressWarnings("rawtypes")
273
    @Test
274
    @DataSet
275
    public final void testFindByDescriptionElementFullText_CommonName() throws CorruptIndexException, IOException,
276
            ParseException {
277

    
278
        refreshLuceneIndex();
279

    
280
        Pager<SearchResult<TaxonBase>> pager;
281

    
282
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Wei"+UTF8.SHARP_S+"tanne", null, null, null,
283
                false, null, null, null, null);
284
        Assert.assertEquals("Expecting one entity when searching for CommonTaxonName", 1,
285
                pager.getCount().intValue());
286

    
287
        // the description containing the Nulltanne has no taxon attached,
288
        // taxon.id = null
289
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Nulltanne", null, null, null,
290
                false, null, null, null, null);
291
        Assert.assertEquals("Expecting no entity when searching for 'Nulltanne' ", 0, pager.getCount().intValue());
292

    
293
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Wei"+UTF8.SHARP_S+"tanne", null, null,
294
                Arrays.asList(new Language[] { Language.GERMAN() }), false, null, null, null, null);
295
        Assert.assertEquals("Expecting one entity when searching in German", 1, pager.getCount().intValue());
296

    
297
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Wei"+UTF8.SHARP_S+"tanne", null, null,
298
                Arrays.asList(new Language[] { Language.RUSSIAN() }), false, null, null, null, null);
299
        Assert.assertEquals("Expecting no entity when searching in Russian", 0, pager.getCount().intValue());
300

    
301
    }
302

    
303
    @SuppressWarnings("rawtypes")
304
    @Test
305
    @DataSet
306
    public final void testFindByDescriptionElementFullText_Distribution() throws CorruptIndexException, IOException, ParseException {
307

    
308
        refreshLuceneIndex();
309

    
310
        Pager<SearchResult<TaxonBase>> pager;
311
        // by Area
312
        pager = taxonService.findByDescriptionElementFullText(null, "Canada", null, null, null, false, null, null, null, null);
313
        Assert.assertEquals("Expecting one entity when searching for arae 'Canada'", 1, pager.getCount().intValue());
314
        // by Status
315
        pager = taxonService.findByDescriptionElementFullText(null, "present", null, null, null, false, null, null, null, null);
316
        Assert.assertEquals("Expecting one entity when searching for status 'present'", 1, pager.getCount().intValue());
317
    }
318

    
319
    @SuppressWarnings("rawtypes")
320
    @Test
321
    @DataSet
322
    public final void testFindByDescriptionElementFullText_wildcard() throws CorruptIndexException, IOException, ParseException {
323

    
324
        refreshLuceneIndex();
325

    
326
        Pager<SearchResult<TaxonBase>> pager;
327

    
328
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Wei"+UTF8.SHARP_S+"*", null, null, null, false, null, null, null, null);
329
        Assert.assertEquals("Expecting one entity when searching for CommonTaxonName", 1, pager.getCount().intValue());
330
    }
331

    
332
    /**
333
     * Regression test for #3113 (hibernate search: wildcard query can cause BooleanQuery$TooManyClauses: maxClauseCount is set to 1024)
334
     *
335
     * @throws CorruptIndexException
336
     * @throws IOException
337
     * @throws ParseException
338
     */
339
    @SuppressWarnings("rawtypes")
340
    @Test
341
    @DataSet
342
    public final void testFindByDescriptionElementFullText_TooManyClauses() throws CorruptIndexException, IOException, ParseException {
343

    
344
        // generate 1024 terms to reproduce the bug
345
        TaxonDescription description = (TaxonDescription) descriptionService.find(UUID.fromString(D_ABIES_ALBA_UUID));
346
        Set<String> uniqueRandomStrs = new HashSet<String>(1024);
347
        while(uniqueRandomStrs.size() < 1024){
348
            uniqueRandomStrs.add(RandomStringUtils.random(10, true, false));
349
        }
350
        for(String rndStr: uniqueRandomStrs){
351
            description.addElement(CommonTaxonName.NewInstance("Rot" + rndStr, Language.DEFAULT()));
352
        }
353
        descriptionService.saveOrUpdate(description);
354
        commitAndStartNewTransaction(null);
355

    
356
        refreshLuceneIndex();
357

    
358
        Pager<SearchResult<TaxonBase>> pager;
359

    
360
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, false, null, null, null, null);
361
        Assert.assertEquals("Expecting all 1024 entities grouped into one SearchResult item when searching for Rot*", 1, pager.getCount().intValue());
362
    }
363

    
364
    /**
365
     * Regression test for #3116 (fulltext search: always only one page of results)
366
     *
367
     * @throws CorruptIndexException
368
     * @throws IOException
369
     * @throws ParseException
370
     */
371
    @SuppressWarnings("rawtypes")
372
    @Test
373
    @DataSet
374
    public final void testFullText_Paging() throws CorruptIndexException, IOException, ParseException {
375

    
376
        Reference sec = ReferenceFactory.newDatabase();
377
        referenceService.save(sec);
378

    
379
        Set<String> uniqueRandomStrs = new HashSet<String>(1024);
380
        int numOfItems = 100;
381
        while(uniqueRandomStrs.size() < numOfItems){
382
            uniqueRandomStrs.add(RandomStringUtils.random(5, true, false));
383
        }
384

    
385
        for(String rndStr: uniqueRandomStrs){
386

    
387
            Taxon taxon = Taxon.NewInstance(BotanicalName.NewInstance(Rank.SERIES()), sec);
388
            taxon.setTitleCache("Tax" + rndStr, true);
389
            taxonService.save(taxon);
390

    
391
            TaxonDescription description = TaxonDescription.NewInstance(taxon);
392
            description.addElement(CommonTaxonName.NewInstance("Rot" + rndStr, Language.DEFAULT()));
393
            descriptionService.saveOrUpdate(description);
394
        }
395

    
396
        commitAndStartNewTransaction(new String[]{"TAXONBASE"});
397
        refreshLuceneIndex();
398

    
399
        int pageSize = 10;
400

    
401
        Pager<SearchResult<TaxonBase>> pager;
402

    
403
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, false, pageSize, null, null, null);
404
        Assert.assertEquals("unexpeted number of pages", Integer.valueOf(numOfItems / pageSize), pager.getPagesAvailable());
405
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, false, pageSize, 9, null, null);
406
        Assert.assertNotNull("last page must have records", pager.getRecords());
407
        Assert.assertNotNull("last item on last page must exist", pager.getRecords().get(0));
408
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, false, pageSize, 10, null, null);
409
        Assert.assertNotNull("last page + 1 must not have any records", pager.getRecords());
410
    }
411

    
412
    /**
413
     * test for max score and sort by score of hit groups
414
     * with all matches per taxon in a single TextData  element
415
     * see {@link #testFullText_ScoreAndOrder_2()} for the complement
416
     * test with matches in multiple TextData per taxon
417
     *
418
     * @throws CorruptIndexException
419
     * @throws IOException
420
     * @throws ParseException
421
     */
422
    @SuppressWarnings("rawtypes")
423
    @Test
424
    @DataSet
425
    @Ignore // test fails, maybe the assumptions made here are not compatible with the lucene scoring mechanism see http://lucene.apache.org/core/3_6_1/scoring.html
426
    public final void testFullText_ScoreAndOrder_1() throws CorruptIndexException, IOException, ParseException {
427

    
428
        int numOfTaxa = 3;
429

    
430
        UUID[] taxonUuids = new UUID[numOfTaxa];
431
        StringBuilder text = new StringBuilder();
432

    
433
        for(int i = 0; i < numOfTaxa; i++){
434

    
435
            Taxon taxon = Taxon.NewInstance(BotanicalName.NewInstance(null), null);
436
            taxon.setTitleCache("Taxon_" + i, true);
437
            taxonUuids[i] = taxon.getUuid();
438
            taxonService.save(taxon);
439

    
440
            text.append(" ").append("Rot");
441
            TaxonDescription description = TaxonDescription.NewInstance(taxon);
442
            description.addElement(TextData.NewInstance(text.toString(), Language.DEFAULT(), null));
443
            descriptionService.saveOrUpdate(description);
444
        }
445

    
446
        commitAndStartNewTransaction(null);
447
        refreshLuceneIndex();
448

    
449
        Pager<SearchResult<TaxonBase>> pager;
450

    
451
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Rot", null, null, null, false, null, null, null, null);
452
        for(int i = 0; i < numOfTaxa; i++){
453
            Assert.assertEquals("taxa should be orderd by relevance (= score)", taxonUuids[numOfTaxa - i - 1], pager.getRecords().get(i).getEntity().getUuid());
454
        }
455
        Assert.assertEquals("max score should be equal to the score of the first element", pager.getRecords().get(0).getMaxScore(), pager.getRecords().get(0).getScore(), 0);
456
    }
457

    
458
    /**
459
     * test for max score and sort by score of hit groups
460
     * with all matches per taxon in a multiple TextData elements
461
     * see {@link #testFullText_ScoreAndOrder_1()} for the complement
462
     * test with matches in a single TextData per taxon
463
     *
464
     * @throws CorruptIndexException
465
     * @throws IOException
466
     * @throws ParseException
467
     */
468
    @SuppressWarnings("rawtypes")
469
    @Test
470
    @DataSet
471
    @Ignore // test fails, maybe the assumptions made here are not compatible with the lucene scoring mechanism see http://lucene.apache.org/core/3_6_1/scoring.html
472
    public final void testFullText_ScoreAndOrder_2() throws CorruptIndexException, IOException, ParseException {
473

    
474
        int numOfTaxa = 3;
475

    
476
        UUID[] taxonUuids = new UUID[numOfTaxa];
477

    
478
        for(int i = 0; i < numOfTaxa; i++){
479

    
480
            Taxon taxon = Taxon.NewInstance(BotanicalName.NewInstance(null), null);
481
            taxon.setTitleCache("Taxon_" + i, true);
482
            taxonUuids[i] = taxon.getUuid();
483
            taxonService.save(taxon);
484

    
485
            TaxonDescription description = TaxonDescription.NewInstance(taxon);
486
            for(int k = 0; k < i; k++){
487
                description.addElement(TextData.NewInstance("Rot", Language.DEFAULT(), null));
488
            }
489
            descriptionService.saveOrUpdate(description);
490
        }
491

    
492
        commitAndStartNewTransaction(null);
493
        refreshLuceneIndex();
494

    
495
        Pager<SearchResult<TaxonBase>> pager;
496

    
497
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Rot", null, null, null, false, null, null, null, null);
498
        for(int i = 0; i < numOfTaxa; i++){
499
            Assert.assertEquals("taxa should be orderd by relevance (= score)", taxonUuids[numOfTaxa - i - 1], pager.getRecords().get(i).getEntity().getUuid());
500
        }
501
        Assert.assertEquals("max score should be equal to the score of the first element", pager.getRecords().get(0).getMaxScore(), pager.getRecords().get(0).getScore(), 0);
502
    }
503

    
504

    
505
    /**
506
     * @throws CorruptIndexException
507
     * @throws IOException
508
     * @throws ParseException
509
     * @throws LuceneMultiSearchException
510
     */
511
    @Test
512
    @DataSet
513
    public final void testFullText_Grouping() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
514

    
515
        TaxonDescription description = (TaxonDescription) descriptionService.find(UUID.fromString(D_ABIES_ALBA_UUID));
516
        Set<String> uniqueRandomStrs = new HashSet<String>(1024);
517
        int numOfItems = 100;
518
        while(uniqueRandomStrs.size() < numOfItems){
519
            uniqueRandomStrs.add(RandomStringUtils.random(5, true, false));
520
        }
521
        for(String rndStr: uniqueRandomStrs){
522
            description.addElement(CommonTaxonName.NewInstance("Rot" + rndStr, Language.DEFAULT()));
523
        }
524
        descriptionService.saveOrUpdate(description);
525

    
526
        commitAndStartNewTransaction(new String[]{"DESCRIPTIONELEMENTBASE"});
527

    
528
        refreshLuceneIndex();
529

    
530
        int pageSize = 10;
531

    
532
        Pager<SearchResult<TaxonBase>> pager;
533
        boolean highlightFragments = true;
534

    
535
        // test with findByDescriptionElementFullText
536
        pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Rot*", null, null, null, highlightFragments, pageSize, null, null, null);
537
        logSearchResults(pager, Level.DEBUG, null);
538
        Assert.assertEquals("All matches should be grouped into a single SearchResult element", 1, pager.getRecords().size());
539
        Assert.assertEquals("The count property of the pager must be set correctly", 1, pager.getCount().intValue());
540
        Map<String, String[]> highlightMap = pager.getRecords().get(0).getFieldHighlightMap();
541
        // maxDocsPerGroup is defined in LuceneSearch and defaults to 10
542
        int maxDocsPerGroup = 10;
543
        Assert.assertEquals("expecting 10 highlighted fragments of field 'name'", maxDocsPerGroup, highlightMap.get("name").length);
544

    
545
        // test with findByEverythingFullText
546
        pager = taxonService.findByEverythingFullText( "Rot*", null, null, highlightFragments, pageSize, null, null, null);
547
        logSearchResults(pager, Level.DEBUG, null);
548
        Assert.assertEquals("All matches should be grouped into a single SearchResult element", 1, pager.getRecords().size());
549
        Assert.assertEquals("The count property of the pager must be set correctly", 1, pager.getCount().intValue());
550
        highlightMap = pager.getRecords().get(0).getFieldHighlightMap();
551
        // maxDocsPerGroup is defined in LuceneSearch and defaults to 10
552
        maxDocsPerGroup = 10;
553
        Assert.assertEquals("expecting 10 highlighted fragments of field 'name'", maxDocsPerGroup, highlightMap.get("name").length);
554

    
555
    }
556

    
557
    @SuppressWarnings("rawtypes")
558
    @Test
559
    @DataSet
560
    @Ignore
561
    public final void testFindByDescriptionElementFullText_TextData() throws CorruptIndexException, IOException, ParseException {
562

    
563
        refreshLuceneIndex();
564

    
565
        Pager<SearchResult<TaxonBase>> pager;
566
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Abies", null, null, null, false, null, null, null, null);
567
        logSearchResults(pager, Level.DEBUG, null);
568
        Assert.assertEquals("Expecting one entity when searching for any TextData", 1, pager.getCount().intValue());
569
        Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getEntity().getTitleCache());
570
        Assert.assertTrue("Expecting two docs, one for RUSSIAN and one for GERMAN", pager.getRecords().get(0).getDocs().size() == 2);
571
        Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getDocs().iterator().next().get("inDescription.taxon.titleCache"));
572

    
573

    
574
        pager = taxonService.findByDescriptionElementFullText(null, "Abies", null, null, null, false, null, null, null, null);
575
        Assert.assertEquals("Expecting one entity when searching for any type", 1, pager.getCount().intValue());
576

    
577
        pager = taxonService.findByDescriptionElementFullText(null, "Abies", null, Arrays.asList(new Feature[]{Feature.UNKNOWN()}), null, false, null, null, null, null);
578
        Assert.assertEquals("Expecting one entity when searching for any type and for Feature DESCRIPTION", 1, pager.getCount().intValue());
579

    
580
        pager = taxonService.findByDescriptionElementFullText(null, "Abies", null, Arrays.asList(new Feature[]{Feature.CHROMOSOME_NUMBER()}), null, false, null, null, null, null);
581
        Assert.assertEquals("Expecting no entity when searching for any type and for Feature CHROMOSOME_NUMBER", 0, pager.getCount().intValue());
582

    
583
        pager = taxonService.findByDescriptionElementFullText(null, "Abies", null, Arrays.asList(new Feature[]{Feature.CHROMOSOME_NUMBER(), Feature.UNKNOWN()}), null, false, null, null, null, null);
584
        Assert.assertEquals("Expecting no entity when searching for any type and for Feature DESCRIPTION or CHROMOSOME_NUMBER", 1, pager.getCount().intValue());
585

    
586
        pager = taxonService.findByDescriptionElementFullText(Distribution.class, "Abies", null, null, null, false, null, null, null, null);
587
        Assert.assertEquals("Expecting no entity when searching for Distribution", 0, pager.getCount().intValue());
588

    
589
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Бальзам", null, null, Arrays.asList(new Language[]{}), false, null, null, null, null);
590
        Assert.assertEquals("Expecting one entity", 1, pager.getCount().intValue());
591
        Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getEntity().getTitleCache());
592

    
593
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Бальзам", null, null, Arrays.asList(new Language[]{Language.RUSSIAN()}), false, null, null, null, null);
594
        Assert.assertEquals("Expecting one entity", 1, pager.getCount().intValue());
595
        Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getEntity().getTitleCache());
596

    
597
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Бальзам", null, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
598
        Assert.assertEquals("Expecting no entity", 0, pager.getCount().intValue());
599

    
600
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN(), Language.RUSSIAN()}), false, null, null, null, null);
601
        Assert.assertEquals("Expecting one entity", 1, pager.getCount().intValue());
602
        Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getEntity().getTitleCache());
603
    }
604

    
605
    @SuppressWarnings("rawtypes")
606
    @Test
607
    @DataSet
608
    public final void testFindByDescriptionElementFullText_MultipleWords() throws CorruptIndexException, IOException, ParseException {
609

    
610
        refreshLuceneIndex();
611

    
612
        // Pflanzenart aus der Gattung der Tannen
613
        long start = System.currentTimeMillis();
614

    
615
        Pager<SearchResult<TaxonBase>> pager;
616
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Pflanzenart Tannen", null, null, null, false, null, null, null, null);
617
        Assert.assertEquals("OR search : Expecting one entity", 1, pager.getCount().intValue());
618

    
619
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Pflanzenart Wespen", null, null, null, false, null, null, null, null);
620
        Assert.assertEquals("OR search : Expecting one entity", 1, pager.getCount().intValue());
621

    
622
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "+Pflanzenart +Tannen", null, null, null, false, null, null, null, null);
623
        Assert.assertEquals("AND search : Expecting one entity", 1, pager.getCount().intValue());
624

    
625
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "+Pflanzenart +Wespen", null, null, null, false, null, null, null, null);
626
        Assert.assertEquals("AND search : Expecting no entity", 0, pager.getCount().intValue());
627

    
628
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "\"Pflanzenart aus der Gattung der Tannen\"", null, null, null, false, null, null, null, null);
629
        Assert.assertEquals("Phrase search : Expecting one entity", 1, pager.getCount().intValue());
630

    
631
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "\"Pflanzenart aus der Gattung der Wespen\"", null, null, null, false, null, null, null, null);
632
        Assert.assertEquals("Phrase search : Expecting one entity", 0, pager.getCount().intValue());
633

    
634
        logger.info("testFindByDescriptionElementFullText_MultipleWords() duration: " + (System.currentTimeMillis() - start) + "ms");
635

    
636
    }
637

    
638

    
639
    @SuppressWarnings("rawtypes")
640
    @Test
641
    @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class)
642
    public final void testFindByDescriptionElementFullText_modify_DescriptionElement() throws CorruptIndexException, IOException, ParseException {
643

    
644
        refreshLuceneIndex();
645

    
646
        Pager<SearchResult<TaxonBase>> pager;
647
        //
648
        // modify the DescriptionElement
649
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN(), Language.RUSSIAN()}), false, null, null, null, null);
650
        Assert.assertTrue("Search did not return any results", pager.getRecords().size() > 0);
651
        Assert.assertTrue("Expecting only one doc", pager.getRecords().get(0).getDocs().size() == 1);
652
        Document indexDocument = pager.getRecords().get(0).getDocs().iterator().next();
653
        String[] descriptionElementUuidStr = indexDocument.getValues("uuid");
654
        String[] inDescriptionUuidStr = indexDocument.getValues("inDescription.uuid");
655
        // is only one uuid!
656
        DescriptionElementBase textData = descriptionService.getDescriptionElementByUuid(UUID.fromString(descriptionElementUuidStr[0]));
657

    
658
        ((TextData)textData).removeText(Language.GERMAN());
659
        ((TextData)textData).putText(Language.SPANISH_CASTILIAN(), "abeto bals"+UTF8.SMALL_A_ACUTE+"mico");
660

    
661
        descriptionService.saveDescriptionElement(textData);
662
        commitAndStartNewTransaction(null);
663
//        printDataSet(System.out, new String[] {
664
//                "DESCRIPTIONELEMENTBASE", "LANGUAGESTRING", "DESCRIPTIONELEMENTBASE_LANGUAGESTRING" }
665
//        );
666

    
667
        //
668
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN(), Language.RUSSIAN()}), false, null, null, null, null);
669
        Assert.assertEquals("The german 'Balsam-Tanne' TextData should no longer be indexed", 0, pager.getCount().intValue());
670
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "abeto", null, null, Arrays.asList(new Language[]{Language.SPANISH_CASTILIAN()}), false, null, null, null, null);
671
        Assert.assertEquals("expecting to find the SPANISH_CASTILIAN 'abeto bals"+UTF8.SMALL_A_ACUTE+"mico'", 1, pager.getCount().intValue());
672
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "bals"+UTF8.SMALL_A_ACUTE+"mico", null, null, null, false, null, null, null, null);
673
        Assert.assertEquals("expecting to find the SPANISH_CASTILIAN 'abeto bals"+UTF8.SMALL_A_ACUTE+"mico'", 1, pager.getCount().intValue());
674

    
675
        //
676
        // modify the DescriptionElement via the Description object
677
        DescriptionBase<?> description = descriptionService.find(UUID.fromString(inDescriptionUuidStr[0]));
678
        Set<DescriptionElementBase> elements = description.getElements();
679
        for( DescriptionElementBase elm : elements){
680
            if(elm.getUuid().toString().equals(descriptionElementUuidStr[0])){
681
                ((TextData)elm).removeText(Language.SPANISH_CASTILIAN());
682
                ((TextData)elm).putText(Language.POLISH(), "Jod"+UTF8.POLISH_L+"a balsamiczna");
683
            }
684
        }
685
        descriptionService.saveOrUpdate(description);
686
        commitAndStartNewTransaction(null);
687
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "abeto", null, null, Arrays.asList(new Language[]{Language.SPANISH_CASTILIAN()}), false, null, null, null, null);
688
        Assert.assertEquals("The spanish 'abeto bals"+UTF8.SMALL_A_ACUTE+"mico' TextData should no longer be indexed", 0, pager.getCount().intValue());
689
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "balsamiczna", null, null, Arrays.asList(new Language[]{Language.POLISH()}), false, null, null, null, null);
690
        Assert.assertEquals("expecting to find the POLISH 'Jod"+UTF8.POLISH_L+"a balsamiczna'", 1, pager.getCount().intValue());
691
    }
692

    
693
    @SuppressWarnings("rawtypes")
694
    @Test
695
    @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class)
696
    public final void testFindByDescriptionElementFullText_modify_Taxon() throws CorruptIndexException, IOException, ParseException {
697

    
698
        refreshLuceneIndex();
699

    
700
        Pager<SearchResult<TaxonBase>> pager;
701
        Taxon t_abies_balsamea = (Taxon)taxonService.find(UUID.fromString(ABIES_BALSAMEA_UUID));
702
        TaxonDescription d_abies_balsamea = (TaxonDescription)descriptionService.find(UUID.fromString(D_ABIES_BALSAMEA_UUID));
703

    
704
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
705
        Assert.assertEquals("expecting to find the GERMAN 'Balsam-Tanne'", 1, pager.getCount().intValue());
706

    
707
        // exchange the Taxon with another one via the Taxon object
708
        // 1.) remove existing description:
709
        t_abies_balsamea.removeDescription(d_abies_balsamea);
710

    
711
        taxonService.saveOrUpdate(t_abies_balsamea);
712
        commitAndStartNewTransaction(null);
713

    
714
        t_abies_balsamea = (Taxon)taxonService.find(t_abies_balsamea.getUuid());
715

    
716
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
717
        Assert.assertEquals("'Balsam-Tanne' should no longer be found", 0, pager.getCount().intValue());
718

    
719
        // 2.) create new description and add to taxon:
720
        TaxonDescription d_abies_balsamea_new = TaxonDescription.NewInstance();
721
        d_abies_balsamea_new
722
                .addElement(TextData
723
                        .NewInstance(
724
                                "Die Balsamtanne ist mit bis zu 30 m Höhe ein mittelgro"+UTF8.SHARP_S+"er Baum und kann bis zu 200 Jahre alt werden",
725
                                Language.GERMAN(), null));
726
        t_abies_balsamea.addDescription(d_abies_balsamea_new);
727
        // set authorshipCache to null to avoid validation exception,
728
        // this is maybe not needed in future,  see ticket #3344
729
        BotanicalName abies_balsamea = HibernateProxyHelper.deproxy(t_abies_balsamea.getName(), BotanicalName.class);
730
        abies_balsamea.setAuthorshipCache(null);
731
        printDataSet(System.err, new String[] {"LANGUAGESTRING_AUD"});
732
        taxonService.saveOrUpdate(t_abies_balsamea);
733
        commitAndStartNewTransaction(null);
734

    
735
//        printDataSet(System.out, new String[] {
736
//                "DESCRIPTIONBASE"
737
//        });
738

    
739
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "mittelgro"+UTF8.SHARP_S+"er Baum", null, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
740
        Assert.assertEquals("the taxon should be found via the new Description", 1, pager.getCount().intValue());
741
    }
742

    
743
    @SuppressWarnings("rawtypes")
744
    @Test
745
    @DataSet
746
    public final void testFindByDescriptionElementFullText_modify_Classification() throws CorruptIndexException, IOException, ParseException {
747

    
748
        refreshLuceneIndex();
749

    
750
        Pager<SearchResult<TaxonBase>> pager;
751

    
752
        // put taxon into other classification, new taxon node
753
        Classification classification = classificationService.find(UUID.fromString(CLASSIFICATION_UUID));
754
        Classification alternateClassification = classificationService.find(UUID.fromString(CLASSIFICATION_ALT_UUID));
755

    
756
        // TODO: why is the test failing when the childNode is already retrieved here, and not after the following four lines?
757
        //TaxonNode childNode = classification.getChildNodes().iterator().next();
758

    
759
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", null, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
760
        Assert.assertEquals("expecting to find the GERMAN 'Balsam-Tanne' even if filtering by classification", 1, pager.getCount().intValue());
761
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", alternateClassification, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
762
        Assert.assertEquals("GERMAN 'Balsam-Tanne' should NOT be found in other classification", 0, pager.getCount().intValue());
763

    
764
        // check for the right taxon node
765
        TaxonNode childNode = classification.getChildNodes().iterator().next();
766
        Assert.assertEquals("expecting Abies balsamea sec.", childNode.getTaxon().getUuid().toString(), ABIES_BALSAMEA_UUID);
767
        Assert.assertEquals("expecting default classification", childNode.getClassification().getUuid().toString(), CLASSIFICATION_UUID);
768

    
769
        // moving the taxon around, the rootnode is only a proxy
770
        alternateClassification.setRootNode(HibernateProxyHelper.deproxy(alternateClassification.getRootNode(), TaxonNode.class));
771
        alternateClassification.addChildNode(childNode, null, null);
772

    
773
        classificationService.saveOrUpdate(alternateClassification);
774
        commitAndStartNewTransaction(null);
775

    
776
//        printDataSet(System.out, new String[] {
777
//            "TAXONBASE", "TAXONNODE", "CLASSIFICATION"
778
//        });
779

    
780
        // reload classification
781
        classification = classificationService.find(UUID.fromString(CLASSIFICATION_UUID));
782
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", alternateClassification, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
783
        Assert.assertEquals("GERMAN 'Balsam-Tanne' should now be found in other classification", 1, pager.getCount().intValue());
784

    
785
        classification.getChildNodes().clear();
786
        classificationService.saveOrUpdate(classification);
787
        commitAndStartNewTransaction(null);
788

    
789
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Balsam-Tanne", classification, null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
790
        Assert.assertEquals("Now the GERMAN 'Balsam-Tanne' should NOT be found in original classification", 0, pager.getCount().intValue());
791

    
792
    }
793

    
794
    @SuppressWarnings("rawtypes")
795
    @Test
796
    @DataSet
797
    public final void testFindByDescriptionElementFullText_CategoricalData() throws CorruptIndexException, IOException, ParseException {
798

    
799
        // add CategoricalData
800
        DescriptionBase d_abies_balsamea = descriptionService.find(UUID.fromString(D_ABIES_BALSAMEA_UUID));
801
        // Categorical data
802
        CategoricalData cdata = CategoricalData.NewInstance();
803
        cdata.setFeature(Feature.DESCRIPTION());
804
        State state = State.NewInstance("green", "green", "gn");
805

    
806
        StateData statedata = StateData.NewInstance(state);
807
        statedata.putModifyingText(Language.ENGLISH(), "always, even during winter");
808
        cdata.addStateData(statedata);
809
        d_abies_balsamea.addElement(cdata);
810

    
811
        UUID termUUID = termService.save(state).getUuid();
812
        descriptionService.save(d_abies_balsamea);
813

    
814
        commitAndStartNewTransaction(null);
815

    
816
//        printDataSet(System.out, new String[] {
817
//                 "STATEDATA", "STATEDATA_DEFINEDTERMBASE", "STATEDATA_LANGUAGESTRING", "LANGUAGESTRING"});
818

    
819
        refreshLuceneIndex();
820

    
821
        Pager<SearchResult<TaxonBase>> pager;
822
        pager = taxonService.findByDescriptionElementFullText(CategoricalData.class, "green", null, null, null, false, null, null, null, null);
823
        Assert.assertEquals("Expecting one entity", 1, pager.getCount().intValue());
824
        Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getEntity().getTitleCache());
825
        Assert.assertTrue("Expecting only one doc", pager.getRecords().get(0).getDocs().size() == 1);
826
        Assert.assertEquals("Abies balsamea sec. Kohlbecker, A., Testcase standart views, 2013", pager.getRecords().get(0).getDocs().iterator().next().get("inDescription.taxon.titleCache"));
827

    
828

    
829
        //TODO modify the StateData
830
        TaxonBase taxon = pager.getRecords().get(0).getEntity();
831

    
832
        String newName = "Quercus robur";
833
        taxon.setTitleCache(newName + " sec. ", true);
834

    
835
        taxonService.saveOrUpdate(taxon);
836
        commitAndStartNewTransaction(null);
837

    
838
        taxon = taxonService.find(taxon.getUuid());
839
        Assert.assertEquals(newName + " sec. ", taxon.getTitleCache());
840
        DefinedTermBase term = termService.find(termUUID);
841

    
842
        termService.delete(term);
843

    
844
    }
845

    
846
    @SuppressWarnings("rawtypes")
847
    @Test
848
    @DataSet
849
    public final void testFindByDescriptionElementFullText_Highlighting() throws CorruptIndexException, IOException, ParseException {
850

    
851
        refreshLuceneIndex();
852

    
853
        Pager<SearchResult<TaxonBase>> pager;
854
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Abies", null, null, null, true, null, null, null, null);
855
        Assert.assertEquals("Expecting one entity when searching for any TextData", 1, pager.getCount().intValue());
856
        SearchResult<TaxonBase> searchResult = pager.getRecords().get(0);
857
        Assert.assertTrue("the map of highlighted fragments should contain at least one item", searchResult.getFieldHighlightMap().size() > 0);
858
        String[] fragments = searchResult.getFieldHighlightMap().values().iterator().next();
859
        Assert.assertTrue("first fragments should contains serch term", fragments[0].contains("<B>Abies</B>"));
860

    
861
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Pflanzenart Tannen", null, null, null, true, null, null, null, null);
862
        searchResult = pager.getRecords().get(0);
863
        Assert.assertTrue("Phrase search : Expecting at least one item in highlighted fragments", searchResult.getFieldHighlightMap().size() > 0);
864
        fragments = searchResult.getFieldHighlightMap().values().iterator().next();
865
        Assert.assertTrue("first fragments should contains serch term", fragments[0].contains("<B>Pflanzenart</B>") || fragments[0].contains("<B>Tannen</B>"));
866

    
867
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "+Pflanzenart +Tannen", null, null, null, true, null, null, null, null);
868
        searchResult = pager.getRecords().get(0);
869
        Assert.assertTrue("Phrase search : Expecting at least one item in highlighted fragments", searchResult.getFieldHighlightMap().size() > 0);
870
        fragments = searchResult.getFieldHighlightMap().values().iterator().next();
871
        Assert.assertTrue("first fragments should contains serch term", fragments[0].contains("<B>Pflanzenart</B>") && fragments[0].contains("<B>Tannen</B>"));
872

    
873
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "\"Pflanzenart aus der Gattung der Tannen\"", null, null, null, true, null, null, null, null);
874
        searchResult = pager.getRecords().get(0);
875
        Assert.assertTrue("Phrase search : Expecting at least one item in highlighted fragments", searchResult.getFieldHighlightMap().size() > 0);
876
        fragments = searchResult.getFieldHighlightMap().values().iterator().next();
877
        Assert.assertTrue("first fragments should contains serch term", fragments[0].contains("<B>Pflanzenart</B> <B>aus</B> <B>der</B> <B>Gattung</B> <B>der</B> <B>Tannen</B>"));
878

    
879
        pager = taxonService.findByDescriptionElementFullText(TextData.class, "Gatt*", null, null, null, true, null, null, null, null);
880
        searchResult = pager.getRecords().get(0);
881
        Assert.assertTrue("Wildcard search : Expecting at least one item in highlighted fragments", searchResult.getFieldHighlightMap().size() > 0);
882
        fragments = searchResult.getFieldHighlightMap().values().iterator().next();
883
        Assert.assertTrue("first fragments should contains serch term", fragments[0].contains("<B>Gatt"));
884
    }
885

    
886

    
887
    @Test
888
    @DataSet
889
    public final void testFindByFullText() throws CorruptIndexException, IOException, ParseException {
890

    
891
        refreshLuceneIndex();
892

    
893
        Classification europeanAbiesClassification = classificationService.find(UUID.fromString(CLASSIFICATION_UUID));
894

    
895
        Pager<SearchResult<TaxonBase>> pager;
896

    
897
        pager = taxonService.findByFullText(null, "Abies", null, null, true, null, null, null, null); // --> 7
898
        logSearchResults(pager, Level.DEBUG, null);
899
        Assert.assertEquals("Expecting 8 entities", 8, pager.getCount().intValue());
900

    
901
        pager = taxonService.findByFullText(Taxon.class, "Abies", null, null, true, null, null, null, null); // --> 6
902
        Assert.assertEquals("Expecting 7 entities", 7, pager.getCount().intValue());
903

    
904
        pager = taxonService.findByFullText(Synonym.class, "Abies", null, null, true, null, null, null, null); // --> 1
905
        Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
906

    
907
        pager = taxonService.findByFullText(TaxonBase.class, "sec", null, null, true, null, null, null, null); // --> 7
908
        Assert.assertEquals("Expecting 8 entities", 8, pager.getCount().intValue());
909

    
910
        pager = taxonService.findByFullText(null, "genus", null, null, true, null, null, null, null); // --> 1
911
        Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
912

    
913
        pager = taxonService.findByFullText(Taxon.class, "subalpina", null, null, true, null, null, null, null); // --> 0
914
        Assert.assertEquals("Expecting 0 entities", 0, pager.getCount().intValue());
915

    
916
        // synonym in classification ???
917
    }
918

    
919
    @Test
920
    @DataSet
921
    public final void testPrepareByAreaSearch() throws IOException, ParseException {
922

    
923
        List<PresenceAbsenceTerm> statusFilter = new ArrayList<PresenceAbsenceTerm>();
924
        List<NamedArea> areaFilter = new ArrayList<NamedArea>();
925
        areaFilter.add(germany);
926
        areaFilter.add(canada);
927
        areaFilter.add(russia);
928

    
929
        Pager<SearchResult<TaxonBase>> pager = taxonService.findByDistribution(areaFilter, statusFilter, null, 20, 0, null, null);
930
        Assert.assertEquals("Expecting 2 entities", Integer.valueOf(2), Integer.valueOf(pager.getRecords().size()));
931

    
932
    }
933

    
934
    @Test
935
    @DataSet
936
    public final void testFindTaxaAndNamesByFullText() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
937

    
938
        refreshLuceneIndex();
939

    
940
        Pager<SearchResult<TaxonBase>> pager;
941

    
942
        Classification alternateClassification = classificationService.find(UUID.fromString(CLASSIFICATION_ALT_UUID));
943

    
944

    
945
        pager = taxonService.findTaxaAndNamesByFullText(
946
                EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
947
                "Abies", null, null, null, null, true, null, null, null, null);
948
//        logPagerRecords(pager, Level.DEBUG);
949
        Assert.assertEquals("doTaxa & doSynonyms", 8, pager.getCount().intValue());
950

    
951
        pager = taxonService.findTaxaAndNamesByFullText(
952
                EnumSet.allOf(TaxaAndNamesSearchMode.class),
953
                "Abies", null, null, null, null, true, null, null, null, null);
954
//        logPagerRecords(pager, Level.DEBUG);
955
        Assert.assertEquals("all search modes", 8, pager.getCount().intValue());
956

    
957
        pager = taxonService.findTaxaAndNamesByFullText(
958
                EnumSet.allOf(TaxaAndNamesSearchMode.class),
959
                "Abies", alternateClassification, null, null, null, true, null, null, null, null);
960
//        logPagerRecords(pager, Level.DEBUG);
961
        Assert.assertEquals("all search modes, filtered by alternateClassification", 1, pager.getCount().intValue());
962

    
963
        pager = taxonService.findTaxaAndNamesByFullText(
964
                EnumSet.of(TaxaAndNamesSearchMode.doSynonyms),
965
                "Abies", null, null, null, null, true, null, null, null, null);
966
        Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
967
        SearchResult<TaxonBase> searchResult = pager.getRecords().get(0);
968
        Assert.assertEquals(Synonym.class, searchResult.getEntity().getClass());
969
        // Abies subalpina sec. Kohlbecker, A., Testcase standart views, 2013
970

    
971

    
972
        pager = taxonService.findTaxaAndNamesByFullText(
973
                EnumSet.of(TaxaAndNamesSearchMode.doTaxaByCommonNames),
974
                "Abies", null, null, null, null, true, null, null, null, null);
975
        Assert.assertEquals("Expecting 0 entity", 0, pager.getCount().intValue());
976

    
977

    
978
        pager = taxonService.findTaxaAndNamesByFullText(
979
                EnumSet.of(TaxaAndNamesSearchMode.doTaxaByCommonNames),
980
                "Tanne", null, null, null, null, true, null, null, null, null);
981
        Assert.assertEquals("Expecting 1 entity", 1, pager.getRecords().size());
982
        Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
983

    
984
        pager = taxonService.findTaxaAndNamesByFullText(
985
                EnumSet.of(TaxaAndNamesSearchMode.doMisappliedNames),
986
                "kawakamii", null, null, null, null, true, null, null, null, null);
987
        logSearchResults(pager, Level.DEBUG, null);
988
        Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
989

    
990
    }
991

    
992
    @Test
993
    @DataSet
994
    public final void testFindTaxaAndNamesByFullText_Sort() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
995

    
996
        refreshLuceneIndex();
997

    
998
        Pager<SearchResult<TaxonBase>> pager;
999

    
1000
        List<OrderHint> orderHints = new ArrayList<OrderHint>();
1001

    
1002
        String[] docFields2log = new String[]{"id"};
1003

    
1004
        // SortById
1005
        orderHints.addAll(OrderHint.ORDER_BY_ID.asList());
1006
        pager = taxonService.findTaxaAndNamesByFullText(
1007
                EnumSet.of(TaxaAndNamesSearchMode.doTaxa),
1008
                "Abies", null, null, null, null, true, null, null, orderHints, null);
1009
//        logSearchResults(pager, Level.DEBUG, docFields2log);
1010
        int lastId = -1;
1011
        for(SearchResult<TaxonBase> rs : pager.getRecords()){
1012
            if(lastId != -1){
1013
                Assert.assertTrue("results not sorted by id", lastId < rs.getEntity().getId());
1014
            }
1015
            lastId = rs.getEntity().getId();
1016
        }
1017

    
1018
        orderHints.addAll(OrderHint.ORDER_BY_ID.asList());
1019
        pager = taxonService.findTaxaAndNamesByFullText(
1020
                EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1021
                "Abies", null, null, null, null, true, null, null, orderHints, null);
1022
//        logSearchResults(pager, Level.DEBUG, docFields2log);
1023

    
1024
        lastId = -1;
1025
        for(SearchResult<TaxonBase> rs : pager.getRecords()){
1026
            if(lastId != -1){
1027
                Assert.assertTrue("results not sorted by id", lastId < rs.getEntity().getId());
1028
            }
1029
            lastId = rs.getEntity().getId();
1030
        }
1031

    
1032
        // Sortby NOMENCLATURAL_SORT_ORDER TODO make assertions !!!
1033
        orderHints.clear();
1034
        orderHints.addAll(OrderHint.NOMENCLATURAL_SORT_ORDER.asList());
1035
        pager = taxonService.findTaxaAndNamesByFullText(
1036
                EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1037
                "Abies", null, null, null, null, true, null, null, orderHints, null);
1038
        logSearchResults(pager, Level.DEBUG, null);
1039

    
1040
    }
1041

    
1042
    @Test
1043
    @DataSet
1044
    public final void testFindTaxaAndNamesByFullText_AreaFilter() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1045

    
1046
        refreshLuceneIndex();
1047

    
1048
        Pager<SearchResult<TaxonBase>> pager;
1049

    
1050
        Set<NamedArea> a_germany_canada_russia = new HashSet<NamedArea>();
1051
        a_germany_canada_russia.add(germany);
1052
        a_germany_canada_russia.add(canada);
1053
        a_germany_canada_russia.add(russia);
1054

    
1055
        Set<NamedArea> a_russia = new HashSet<NamedArea>();
1056
        a_russia.add(russia);
1057

    
1058
        Set<PresenceAbsenceTerm> present = new HashSet<PresenceAbsenceTerm>();
1059
        present.add(PresenceAbsenceTerm.PRESENT());
1060

    
1061
        Set<PresenceAbsenceTerm> present_native = new HashSet<PresenceAbsenceTerm>();
1062
        present_native.add(PresenceAbsenceTerm.PRESENT());
1063
        present_native.add(PresenceAbsenceTerm.NATIVE());
1064

    
1065
        Set<PresenceAbsenceTerm> absent = new HashSet<PresenceAbsenceTerm>();
1066
        absent.add(PresenceAbsenceTerm.ABSENT());
1067

    
1068
        pager = taxonService.findTaxaAndNamesByFullText(
1069
                EnumSet.of(TaxaAndNamesSearchMode.doTaxa),
1070
                "Abies", null, a_germany_canada_russia, null, null, true, null, null, null, null);
1071
        logSearchResults(pager, Level.DEBUG, null);
1072

    
1073
        // abies_kawakamii_sensu_komarov as missapplied name for t_abies_balsamea
1074
        pager = taxonService.findTaxaAndNamesByFullText(
1075
                EnumSet.of(TaxaAndNamesSearchMode.doSynonyms),
1076
                "Abies", null, a_germany_canada_russia, present_native, null, true, null, null, null, null);
1077
        Assert.assertEquals("synonyms with matching area filter", 1, pager.getCount().intValue());
1078

    
1079
        pager = taxonService.findTaxaAndNamesByFullText(
1080
                EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1081
                "Abies", null, a_germany_canada_russia, null, null, true, null, null, null, null);
1082
        logSearchResults(pager, Level.DEBUG, null);
1083
        Assert.assertEquals("taxa and synonyms with matching area filter", 3, pager.getCount().intValue());
1084

    
1085
        pager = taxonService.findTaxaAndNamesByFullText(
1086
                EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1087
                "Abies", null, a_germany_canada_russia, present_native, null, true, null, null, null, null);
1088
        Assert.assertEquals("taxa and synonyms with matching area & status filter 1", 3, pager.getCount().intValue());
1089

    
1090
        pager = taxonService.findTaxaAndNamesByFullText(
1091
                EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1092
                "Abies", null, a_germany_canada_russia, present, null, true, null, null, null, null);
1093
        Assert.assertEquals("taxa and synonyms with matching area & status filter 2", 2, pager.getCount().intValue());
1094

    
1095
        pager = taxonService.findTaxaAndNamesByFullText(
1096
                EnumSet.of(TaxaAndNamesSearchMode.doTaxa, TaxaAndNamesSearchMode.doSynonyms),
1097
                "Abies", null, a_russia, present, null, true, null, null, null, null);
1098
        Assert.assertEquals("taxa and synonyms with non matching area & status filter", 0, pager.getCount().intValue());
1099

    
1100
        pager = taxonService.findTaxaAndNamesByFullText(
1101
                EnumSet.of(TaxaAndNamesSearchMode.doTaxaByCommonNames),
1102
                "Tanne", null, a_germany_canada_russia, present_native, null, true, null, null, null, null);
1103
        Assert.assertEquals("ByCommonNames with area filter", 1, pager.getCount().intValue());
1104

    
1105
        // abies_kawakamii_sensu_komarov as misapplied name for t_abies_balsamea
1106
        pager = taxonService.findTaxaAndNamesByFullText(
1107
                EnumSet.of(TaxaAndNamesSearchMode.doMisappliedNames),
1108
                "Abies", null, a_germany_canada_russia, present_native, null, true, null, null, null, null);
1109
        Assert.assertEquals("misappliedNames with matching area & status filter", 1, pager.getCount().intValue());
1110

    
1111

    
1112
        // 1. remove existing taxon relation
1113
        Taxon t_abies_balsamea = (Taxon)taxonService.find(UUID.fromString(ABIES_BALSAMEA_UUID));
1114
        Set<TaxonRelationship> relsTo = t_abies_balsamea.getRelationsToThisTaxon();
1115
        Assert.assertEquals(1, relsTo.size());
1116
        TaxonRelationship taxonRelation = relsTo.iterator().next();
1117
        t_abies_balsamea.removeTaxonRelation(taxonRelation);
1118
        taxonService.saveOrUpdate(t_abies_balsamea);
1119
        commitAndStartNewTransaction(null);
1120

    
1121
        pager = taxonService.findTaxaAndNamesByFullText(
1122
                EnumSet.of(TaxaAndNamesSearchMode.doMisappliedNames),
1123
                "Abies", null, a_germany_canada_russia, present_native, null, true, null, null, null, null);
1124
        Assert.assertEquals("misappliedNames with matching area & status filter, should match nothing now", 0, pager.getCount().intValue());
1125

    
1126
        // 2. now add abies_kawakamii_sensu_komarov as misapplied name for t_abies_alba and search for misapplications in russia: ABSENT
1127
        Taxon t_abies_kawakamii_sensu_komarov = (Taxon)taxonService.find(UUID.fromString(D_ABIES_KAWAKAMII_SEC_KOMAROV_UUID));
1128
        Taxon t_abies_alba = (Taxon)taxonService.find(UUID.fromString(ABIES_ALBA_UUID));
1129
        t_abies_alba.addMisappliedName(t_abies_kawakamii_sensu_komarov, null, null);
1130

    
1131
        taxonService.update(t_abies_kawakamii_sensu_komarov);
1132

    
1133
        commitAndStartNewTransaction(null);
1134

    
1135
        pager = taxonService.findTaxaAndNamesByFullText(
1136
                EnumSet.of(TaxaAndNamesSearchMode.doMisappliedNames),
1137
                "Abies", null, a_germany_canada_russia, absent, null, true, null, null, null, null);
1138
        Assert.assertEquals("misappliedNames with matching area & status filter, should find one", 1, pager.getCount().intValue());
1139

    
1140
    }
1141

    
1142
    @Test
1143
    @DataSet
1144
    @Ignore // remove once http://dev.e-taxonomy.eu/trac/ticket/5477 is solved
1145
    public final void testFindTaxaAndNamesByFullText_AreaFilter_issue5477() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1146

    
1147
        Set<NamedArea> a_germany_canada_russia = new HashSet<NamedArea>();
1148
        a_germany_canada_russia.add(germany);
1149
        a_germany_canada_russia.add(canada);
1150
        a_germany_canada_russia.add(russia);
1151

    
1152

    
1153
        Set<PresenceAbsenceTerm> absent = new HashSet<PresenceAbsenceTerm>();
1154
        absent.add(PresenceAbsenceTerm.ABSENT());
1155

    
1156
        Taxon t_abies_kawakamii_sensu_komarov = (Taxon)taxonService.find(UUID.fromString(D_ABIES_KAWAKAMII_SEC_KOMAROV_UUID));
1157
        Taxon t_abies_alba = (Taxon)taxonService.find(UUID.fromString(ABIES_ALBA_UUID));
1158
        t_abies_alba.addMisappliedName(t_abies_kawakamii_sensu_komarov, null, null);
1159

    
1160
        /* Since the upgrade from hibernate search 4 to 5.5
1161
         * triggering an update of t_abies_alba is no longer sufficient to also update the
1162
         * document of t_abies_kawakamii_sensu_komarov in the lucene index.
1163
         * the last test in testFindTaxaAndNamesByFullText_AreaFilter() failed in this case.
1164
         * This situation is reproduced here:
1165
         */
1166
        taxonService.update(t_abies_alba);
1167

    
1168
          commitAndStartNewTransaction(null);
1169

    
1170
          Pager pager = taxonService.findTaxaAndNamesByFullText(
1171
                  EnumSet.of(TaxaAndNamesSearchMode.doMisappliedNames),
1172
                  "Abies", null, a_germany_canada_russia, absent, null, true, null, null, null, null);
1173
          Assert.assertEquals("misappliedNames with matching area & status filter, should find one", 1, pager.getCount().intValue());
1174
    }
1175

    
1176

    
1177
    /**
1178
     * Regression test for #3119: fulltext search: Entity always null whatever search
1179
     *
1180
     * @throws CorruptIndexException
1181
     * @throws IOException
1182
     * @throws ParseException
1183
     * @throws LuceneMultiSearchException
1184
     */
1185
    @Test
1186
    @DataSet
1187
    public final void testFindByEverythingFullText() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1188

    
1189
        refreshLuceneIndex();
1190

    
1191
        Pager<SearchResult<TaxonBase>> pager;
1192

    
1193
        // via Taxon
1194
        pager = taxonService.findByEverythingFullText("Abies", null, null, true, null, null, null, null);
1195
        logSearchResults(pager, Level.DEBUG, null);
1196
        Assert.assertTrue("Expecting at least 7 entities for 'Abies'", pager.getCount() > 7);
1197
        Assert.assertNotNull("Expecting entity", pager.getRecords().get(0).getEntity());
1198
        Assert.assertEquals("Expecting Taxon entity", Taxon.class, pager.getRecords().get(0).getEntity().getClass());
1199

    
1200
        // via DescriptionElement
1201
        pager = taxonService.findByEverythingFullText("present", null, null, true, null, null, null, null);
1202
        Assert.assertEquals("Expecting one entity when searching for area 'present'", 1, pager.getCount().intValue());
1203
        Assert.assertNotNull("Expecting entity", pager.getRecords().get(0).getEntity());
1204
        Assert.assertEquals("Expecting Taxon entity", Taxon.class, pager.getRecords().get(0).getEntity().getClass());
1205
        Assert.assertEquals("Expecting Taxon ", ABIES_BALSAMEA_UUID, pager.getRecords().get(0).getEntity().getUuid().toString());
1206

    
1207
    }
1208

    
1209

    
1210
    @Test
1211
    @DataSet
1212
    public final void findByEveryThingFullText() throws CorruptIndexException, IOException, ParseException, LuceneMultiSearchException {
1213

    
1214
        refreshLuceneIndex();
1215

    
1216
        Pager<SearchResult<TaxonBase>> pager;
1217

    
1218
        pager = taxonService.findByEverythingFullText("genus", null, null,  false, null, null, null, null); // --> 1
1219
        Assert.assertEquals("Expecting 1 entity", 1, pager.getCount().intValue());
1220

    
1221
        //FIXME FAILS: abies balamea is returned twice, see also testFullText_Grouping()
1222
        pager = taxonService.findByEverythingFullText("Balsam-Tanne", null, Arrays.asList(new Language[]{Language.GERMAN()}), false, null, null, null, null);
1223
        logSearchResults(pager, Level.DEBUG, null);
1224
        Assert.assertEquals("expecting to find the Abies balsamea via the GERMAN DescriptionElements", 1, pager.getCount().intValue());
1225

    
1226
        pager = taxonService.findByEverythingFullText("Abies", null, null, true, null, null, null, null);
1227
        Assert.assertEquals("Expecting 8 entities", 8, pager.getCount().intValue());
1228
        SearchResult<TaxonBase> searchResult = pager.getRecords().get(0);
1229
        Assert.assertTrue("the map of highlighted fragments should contain at least one item", searchResult.getFieldHighlightMap().size() > 0);
1230
        String[] fragments = searchResult.getFieldHighlightMap().values().iterator().next();
1231
        Assert.assertTrue("first fragments should contains serch term", fragments[0].toLowerCase().contains("<b>abies</b>"));
1232
    }
1233

    
1234
//    @SuppressWarnings("rawtypes")
1235
//    @Test
1236
//    @DataSet
1237
//    public final void benchmarkFindTaxaAndNamesHql() throws CorruptIndexException, IOException, ParseException {
1238
//
1239
//        createRandomTaxonWithCommonName(NUM_OF_NEW_RADOM_ENTITIES);
1240
//
1241
//        IFindTaxaAndNamesConfigurator configurator = new FindTaxaAndNamesConfiguratorImpl();
1242
//        configurator.setTitleSearchString("Wei"+UTF8.SHARP_S+"%");
1243
//        configurator.setMatchMode(MatchMode.BEGINNING);
1244
//        configurator.setDoTaxa(false);
1245
//        configurator.setDoSynonyms(false);
1246
//        configurator.setDoNamesWithoutTaxa(false);
1247
//        configurator.setDoTaxaByCommonNames(true);
1248
//
1249
//        Pager<IdentifiableEntity> pager;
1250
//
1251
//        long startMillis = System.currentTimeMillis();
1252
//        for (int indx = 0; indx < BENCHMARK_ROUNDS; indx++) {
1253
//            pager = taxonService.findTaxaAndNames(configurator);
1254
//            if (logger.isDebugEnabled()) {
1255
//                logger.debug("[" + indx + "]" + pager.getRecords().get(0).getTitleCache());
1256
//            }
1257
//        }
1258
//        double duration = ((double) (System.currentTimeMillis() - startMillis)) / BENCHMARK_ROUNDS;
1259
//        logger.info("Benchmark result - [find taxon by CommonName via HQL] : " + duration + "ms (" + BENCHMARK_ROUNDS + " benchmark rounds )");
1260
//    }
1261

    
1262
    @SuppressWarnings("rawtypes")
1263
    @Test
1264
    @DataSet
1265
    public final void benchmarkFindByCommonNameHql() throws CorruptIndexException, IOException, ParseException {
1266

    
1267
//        printDataSet(System.err, new String[] { "TaxonBase" });
1268

    
1269
        createRandomTaxonWithCommonName(NUM_OF_NEW_RADOM_ENTITIES);
1270

    
1271
        IFindTaxaAndNamesConfigurator configurator = new FindTaxaAndNamesConfiguratorImpl();
1272
        configurator.setTitleSearchString("Wei"+UTF8.SHARP_S+"%");
1273
        configurator.setMatchMode(MatchMode.BEGINNING);
1274
        configurator.setDoTaxa(false);
1275
        configurator.setDoSynonyms(false);
1276
        configurator.setDoNamesWithoutTaxa(false);
1277
        configurator.setDoTaxaByCommonNames(true);
1278

    
1279
        Pager<IdentifiableEntity> pager;
1280

    
1281
        long startMillis = System.currentTimeMillis();
1282
        for (int indx = 0; indx < BENCHMARK_ROUNDS; indx++) {
1283
            pager = taxonService.findTaxaAndNames(configurator);
1284
            if (logger.isDebugEnabled()) {
1285
                logger.debug("[" + indx + "]" + pager.getRecords().get(0).getTitleCache());
1286
            }
1287
        }
1288
        double duration = ((double) (System.currentTimeMillis() - startMillis)) / BENCHMARK_ROUNDS;
1289
        logger.info("Benchmark result - [find taxon by CommonName via HQL] : " + duration + "ms (" + BENCHMARK_ROUNDS + " benchmark rounds )");
1290
    }
1291

    
1292
    @SuppressWarnings("rawtypes")
1293
    @Test
1294
    @DataSet
1295
    public final void benchmarkFindByCommonNameLucene() throws CorruptIndexException, IOException, ParseException {
1296

    
1297
        createRandomTaxonWithCommonName(NUM_OF_NEW_RADOM_ENTITIES);
1298

    
1299
        refreshLuceneIndex();
1300

    
1301
        Pager<SearchResult<TaxonBase>> pager;
1302

    
1303
        long startMillis = System.currentTimeMillis();
1304
        for (int indx = 0; indx < BENCHMARK_ROUNDS; indx++) {
1305
            pager = taxonService.findByDescriptionElementFullText(CommonTaxonName.class, "Wei"+UTF8.SHARP_S+"*", null, null, null, false, null, null, null, null);
1306
            if (logger.isDebugEnabled()) {
1307
                logger.debug("[" + indx + "]" + pager.getRecords().get(0).getEntity().getTitleCache());
1308
            }
1309
        }
1310
        double duration = ((double) (System.currentTimeMillis() - startMillis)) / BENCHMARK_ROUNDS;
1311
        logger.info("Benchmark result - [find taxon by CommonName via lucene] : " + duration + "ms (" + BENCHMARK_ROUNDS + " benchmark rounds )");
1312
    }
1313

    
1314
    /**
1315
     * uncomment @Test annotation to create the dataset for this test
1316
     */
1317
    @Override
1318
    //    @Test
1319
    @DataSet(loadStrategy=CleanSweepInsertLoadStrategy.class, value="BlankDataSet.xml")
1320
    public final void createTestDataSet() throws FileNotFoundException {
1321

    
1322
        Classification europeanAbiesClassification = Classification.NewInstance("European Abies");
1323
        europeanAbiesClassification.setUuid(UUID.fromString(CLASSIFICATION_UUID));
1324
        classificationService.save(europeanAbiesClassification);
1325

    
1326
        Classification alternativeClassification = Classification.NewInstance("Abies alternative");
1327
        alternativeClassification.setUuid(UUID.fromString(CLASSIFICATION_ALT_UUID));
1328
        classificationService.save(alternativeClassification);
1329

    
1330
        Reference sec = ReferenceFactory.newBook();
1331
        sec.setTitleCache("Kohlbecker, A., Testcase standart views, 2013", true);
1332
        Reference sec_sensu = ReferenceFactory.newBook();
1333
        sec_sensu.setTitleCache("Komarov, V. L., Flora SSSR 29", true);
1334
        referenceService.save(sec);
1335
        referenceService.save(sec_sensu);
1336

    
1337
        BotanicalName n_abies = BotanicalName.NewInstance(Rank.GENUS());
1338
        n_abies.setNameCache("Abies", true);
1339
        Taxon t_abies = Taxon.NewInstance(n_abies, sec);
1340
        taxonService.save(t_abies);
1341

    
1342
        BotanicalName n_abies_alba = BotanicalName.NewInstance(Rank.SPECIES());
1343
        n_abies_alba.setNameCache("Abies alba", true);
1344
        Taxon t_abies_alba = Taxon.NewInstance(n_abies_alba, sec);
1345
        t_abies_alba.setUuid(UUID.fromString(ABIES_ALBA_UUID));
1346
        taxonService.save(t_abies_alba);
1347

    
1348
        BotanicalName n_abies_subalpina = BotanicalName.NewInstance(Rank.SPECIES());
1349
        n_abies_subalpina.setNameCache("Abies subalpina", true);
1350
        Synonym s_abies_subalpina = Synonym.NewInstance(n_abies_subalpina, sec);
1351
        taxonService.save(s_abies_subalpina);
1352

    
1353
        BotanicalName n_abies_balsamea = BotanicalName.NewInstance(Rank.SPECIES());
1354
        n_abies_balsamea.setNameCache("Abies balsamea", true);
1355
        Taxon t_abies_balsamea = Taxon.NewInstance(n_abies_balsamea, sec);
1356
        t_abies_balsamea.setUuid(UUID.fromString(ABIES_BALSAMEA_UUID));
1357
        t_abies_balsamea.addSynonym(s_abies_subalpina, SynonymRelationshipType.SYNONYM_OF());
1358
        taxonService.save(t_abies_balsamea);
1359

    
1360
        BotanicalName n_abies_grandis = BotanicalName.NewInstance(Rank.SPECIES());
1361
        n_abies_grandis.setNameCache("Abies grandis", true);
1362
        Taxon t_abies_grandis = Taxon.NewInstance(n_abies_grandis, sec);
1363
        taxonService.save(t_abies_grandis);
1364

    
1365
        BotanicalName n_abies_kawakamii = BotanicalName.NewInstance(Rank.SPECIES());
1366
        n_abies_kawakamii.setNameCache("Abies kawakamii", true);
1367
        Taxon t_abies_kawakamii = Taxon.NewInstance(n_abies_kawakamii, sec);
1368
        t_abies_kawakamii.getTitleCache();
1369
        taxonService.save(t_abies_kawakamii);
1370

    
1371
        // abies_kawakamii_sensu_komarov as missapplied name for t_abies_balsamea
1372
        Taxon t_abies_kawakamii_sensu_komarov = Taxon.NewInstance(n_abies_kawakamii, sec_sensu);
1373
        taxonService.save(t_abies_kawakamii_sensu_komarov);
1374
        t_abies_kawakamii_sensu_komarov.addTaxonRelation(t_abies_balsamea, TaxonRelationshipType.MISAPPLIED_NAME_FOR(), null, null);
1375
        taxonService.saveOrUpdate(t_abies_kawakamii_sensu_komarov);
1376

    
1377
        BotanicalName n_abies_lasiocarpa = BotanicalName.NewInstance(Rank.SPECIES());
1378
        n_abies_lasiocarpa.setNameCache("Abies lasiocarpa", true);
1379
        Taxon t_abies_lasiocarpa = Taxon.NewInstance(n_abies_lasiocarpa, sec);
1380
        taxonService.save(t_abies_lasiocarpa);
1381

    
1382
        // add taxa to classifications
1383
        europeanAbiesClassification.addChildTaxon(t_abies_balsamea, null, null);
1384
        alternativeClassification.addChildTaxon(t_abies_lasiocarpa, null, null);
1385
        classificationService.saveOrUpdate(europeanAbiesClassification);
1386
        classificationService.saveOrUpdate(alternativeClassification);
1387

    
1388
        //
1389
        // Description
1390
        //
1391
        TaxonDescription d_abies_alba = TaxonDescription.NewInstance(t_abies_alba);
1392
        TaxonDescription d_abies_balsamea = TaxonDescription.NewInstance(t_abies_balsamea);
1393

    
1394
        d_abies_alba.setUuid(UUID.fromString(D_ABIES_ALBA_UUID));
1395
        d_abies_balsamea.setUuid(UUID.fromString(D_ABIES_BALSAMEA_UUID));
1396

    
1397

    
1398
        // CommonTaxonName
1399
        d_abies_alba.addElement(CommonTaxonName.NewInstance("Wei"+UTF8.SHARP_S+"tanne", Language.GERMAN()));
1400
        d_abies_alba.addElement(CommonTaxonName.NewInstance("silver fir", Language.ENGLISH()));
1401
        d_abies_alba.addElement(Distribution
1402
                .NewInstance(
1403
                        germany,
1404
                        PresenceAbsenceTerm.NATIVE()));
1405
        d_abies_alba.addElement(Distribution
1406
                .NewInstance(
1407
                        russia,
1408
                        PresenceAbsenceTerm.ABSENT()));
1409

    
1410
        // TextData
1411
        d_abies_balsamea
1412
            .addElement(TextData
1413
                    .NewInstance(
1414
                            "Die Balsam-Tanne (Abies balsamea) ist eine Pflanzenart aus der Gattung der Tannen (Abies). Sie wächst im nordöstlichen Nordamerika, wo sie sowohl Tief- als auch Bergland besiedelt. Sie gilt als relativ anspruchslos gegenüber dem Standort und ist frosthart. In vielen Teilen des natürlichen Verbreitungsgebietes stellt sie die Klimaxbaumart dar.",
1415
                            Language.GERMAN(), null));
1416
        d_abies_balsamea
1417
        .addElement(CommonTaxonName
1418
                .NewInstance(
1419
                        "Balsam-Tanne",
1420
                        Language.GERMAN(), null));
1421

    
1422
        d_abies_balsamea
1423
        .addElement(Distribution
1424
                .NewInstance(
1425
                        canada,
1426
                        PresenceAbsenceTerm.PRESENT()));
1427

    
1428
        d_abies_balsamea
1429
        .addElement(Distribution
1430
                .NewInstance(
1431
                        germany,
1432
                        PresenceAbsenceTerm.NATIVE()));
1433

    
1434
        d_abies_balsamea
1435
                .addElement(TextData
1436
                        .NewInstance(
1437
                                TaxonServiceSearchTestUtf8Constants.RUSSIAN_ABIES_ALBA_LONG,
1438
                                Language.RUSSIAN(), null));
1439
        d_abies_balsamea
1440
        .addElement(CommonTaxonName
1441
                .NewInstance(
1442
                        TaxonServiceSearchTestUtf8Constants.RUSSIAN_ABIES_ALBA_SHORT,
1443
                        Language.RUSSIAN(), null));
1444
        descriptionService.saveOrUpdate(d_abies_balsamea);
1445

    
1446
        setComplete();
1447
        endTransaction();
1448

    
1449

    
1450
        writeDbUnitDataSetFile(new String[] {
1451
            "TAXONBASE", "TAXONNAMEBASE",
1452
            "SYNONYMRELATIONSHIP", "TAXONRELATIONSHIP",
1453
            "REFERENCE", "DESCRIPTIONELEMENTBASE", "DESCRIPTIONBASE",
1454
            "AGENTBASE", "HOMOTYPICALGROUP",
1455
            "CLASSIFICATION", "TAXONNODE",
1456
            "LANGUAGESTRING", "DESCRIPTIONELEMENTBASE_LANGUAGESTRING",
1457
            "HIBERNATE_SEQUENCES" // IMPORTANT!!!
1458
            });
1459

    
1460
    }
1461

    
1462
    /**
1463
     *
1464
     */
1465
    private void refreshLuceneIndex() {
1466

    
1467
//        commitAndStartNewTransaction(null);
1468
        commit();
1469
        endTransaction();
1470
        indexer.purge(DefaultProgressMonitor.NewInstance());
1471
        indexer.reindex(typesToIndex, DefaultProgressMonitor.NewInstance());
1472
        startNewTransaction();
1473
//        commitAndStartNewTransaction(null);
1474
    }
1475

    
1476
    /**
1477
     * @param numberOfNew
1478
     *
1479
     */
1480
    private void createRandomTaxonWithCommonName(int numberOfNew) {
1481

    
1482
        logger.debug(String.format("creating %1$s random taxan with CommonName", numberOfNew));
1483

    
1484
        commitAndStartNewTransaction(null);
1485

    
1486
        Reference sec = ReferenceFactory.newBook();
1487
        referenceService.save(sec);
1488

    
1489
        for (int i = numberOfNew; i < numberOfNew; i++) {
1490
            RandomStringUtils.randomAlphabetic(10);
1491
            String radomName = RandomStringUtils.randomAlphabetic(5) + " " + RandomStringUtils.randomAlphabetic(10);
1492
            String radomCommonName = RandomStringUtils.randomAlphabetic(10);
1493

    
1494
            BotanicalName name = BotanicalName.NewInstance(Rank.SPECIES());
1495
            name.setNameCache(radomName, true);
1496
            Taxon taxon = Taxon.NewInstance(name, sec);
1497
            taxonService.save(taxon);
1498

    
1499
            TaxonDescription description = TaxonDescription.NewInstance(taxon);
1500
            description.addElement(CommonTaxonName.NewInstance(radomCommonName, Language.GERMAN()));
1501
            descriptionService.save(description);
1502
        }
1503

    
1504
        commitAndStartNewTransaction(null);
1505
    }
1506

    
1507
    private <T extends CdmBase> void logSearchResults(Pager<SearchResult<T>> pager, Level level, String[] docFields){
1508
        if(level == null){
1509
            level = Level.DEBUG;
1510
        }
1511
        if(logger.isEnabledFor(level)){
1512
            StringBuilder b = new StringBuilder();
1513
            b.append("\n");
1514
            int i = 0;
1515
            for(SearchResult sr : pager.getRecords()){
1516

    
1517
                b.append(" ").append(i++).append(" - ");
1518
                b.append("score:").append(sr.getScore()).append(", ");
1519

    
1520
                if(docFields != null){
1521
                    b.append("docs : ");
1522
                    for(Document doc : sr.getDocs()) {
1523
                        b.append("<");
1524
                        for(String f : docFields){
1525
                            b.append(f).append(":").append(Arrays.toString(doc.getValues(f)));
1526
                        }
1527
                        b.append(">");
1528
                    }
1529
                }
1530

    
1531
                CdmBase entity = sr.getEntity();
1532
                if(entity == null){
1533
                    b.append("NULL");
1534
                } else {
1535
                    b.append(entity.getClass().getSimpleName()).
1536
                        append(" [").append(entity.getId()).
1537
                        append(" | ").append(entity.getUuid()).append("] : ").
1538
                        append(entity.toString());
1539

    
1540
                }
1541
                b.append("\n");
1542
            }
1543
            logger.log(level, b);
1544
        }
1545
    }
1546

    
1547
}
(26-26/31)