1
|
package org.bgbm.biovel.drf.checklist;
|
2
|
|
3
|
import java.io.PrintStream;
|
4
|
import java.net.URI;
|
5
|
import java.util.ArrayList;
|
6
|
import java.util.EnumSet;
|
7
|
import java.util.Iterator;
|
8
|
import java.util.List;
|
9
|
|
10
|
import org.apache.jena.rdf.model.Model;
|
11
|
import org.apache.jena.rdf.model.Resource;
|
12
|
import org.apache.lucene.queryParser.QueryParser;
|
13
|
import org.bgbm.biovel.drf.client.ServiceProviderInfo;
|
14
|
import org.bgbm.biovel.drf.query.IQueryClient;
|
15
|
import org.bgbm.biovel.drf.query.SparqlClient;
|
16
|
import org.bgbm.biovel.drf.query.TinkerPopClient;
|
17
|
import org.bgbm.biovel.drf.store.Neo4jStore;
|
18
|
import org.bgbm.biovel.drf.store.Store;
|
19
|
import org.bgbm.biovel.drf.store.TDBStore;
|
20
|
import org.bgbm.biovel.drf.tnr.msg.NameType;
|
21
|
import org.bgbm.biovel.drf.tnr.msg.Query;
|
22
|
import org.bgbm.biovel.drf.tnr.msg.Query.Request;
|
23
|
import org.bgbm.biovel.drf.tnr.msg.Response;
|
24
|
import org.bgbm.biovel.drf.tnr.msg.Source;
|
25
|
import org.bgbm.biovel.drf.tnr.msg.Synonym;
|
26
|
import org.bgbm.biovel.drf.tnr.msg.Taxon;
|
27
|
import org.bgbm.biovel.drf.tnr.msg.TaxonBase;
|
28
|
import org.bgbm.biovel.drf.tnr.msg.TaxonName;
|
29
|
import org.bgbm.biovel.drf.tnr.msg.TnrMsg;
|
30
|
import org.bgbm.biovel.drf.utils.IdentifierUtils;
|
31
|
import org.bgbm.biovel.drf.utils.Profiler;
|
32
|
import org.bgbm.biovel.drf.utils.TnrMsgUtils;
|
33
|
import org.neo4j.graphdb.Relationship;
|
34
|
import org.openrdf.query.MalformedQueryException;
|
35
|
import org.openrdf.query.QueryEvaluationException;
|
36
|
import org.openrdf.query.QueryLanguage;
|
37
|
import org.openrdf.query.TupleQuery;
|
38
|
import org.openrdf.query.TupleQueryResult;
|
39
|
import org.openrdf.repository.RepositoryException;
|
40
|
import org.openrdf.repository.sail.SailRepositoryConnection;
|
41
|
|
42
|
import com.tinkerpop.blueprints.Direction;
|
43
|
import com.tinkerpop.blueprints.Graph;
|
44
|
import com.tinkerpop.blueprints.Vertex;
|
45
|
import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Graph;
|
46
|
import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Vertex;
|
47
|
import com.tinkerpop.blueprints.oupls.sail.GraphSail;
|
48
|
import com.tinkerpop.gremlin.java.GremlinPipeline;
|
49
|
import com.tinkerpop.pipes.PipeFunction;
|
50
|
import com.tinkerpop.pipes.util.FastNoSuchElementException;
|
51
|
|
52
|
public class EEA_BDC_Client extends AggregateChecklistClient<TinkerPopClient> {
|
53
|
|
54
|
/**
|
55
|
*
|
56
|
*/
|
57
|
public static final String ID = "eea_bdc";
|
58
|
public static final String LABEL = "European Environment Agency (EEA) Biodiversity data centre (BDC)";
|
59
|
public static final String DOC_URL = "http://semantic.eea.europa.eu/documentation";
|
60
|
public static final String COPYRIGHT_URL = "http://www.eea.europa.eu/legal/eea-data-policy";
|
61
|
|
62
|
private static final String SPARQL_ENDPOINT_URL = "http://semantic.eea.europa.eu/sparql";
|
63
|
private static final boolean USE_REMOTE_SERVICE = false;
|
64
|
|
65
|
private static final String SPECIES_RDF_FILE_URL = "http://localhost/download/species.rdf.gz"; // http://eunis.eea.europa.eu/rdf/species.rdf.gz
|
66
|
private static final String LEGALREFS_RDF_FILE_URL = "http://localhost/download/legalrefs.rdf.gz"; // http://eunis.eea.europa.eu/rdf/legalrefs.rdf.gz
|
67
|
private static final String REFERENCES_RDF_FILE_URL = "http://localhost/download/references.rdf.gz"; // http://eunis.eea.europa.eu/rdf/references.rdf.gz
|
68
|
private static final boolean REFRESH_TDB = false;
|
69
|
|
70
|
private static final Class<? extends IQueryClient> clientClass = TinkerPopClient.class;
|
71
|
|
72
|
private static final int MAX_PAGING_LIMIT = 50;
|
73
|
|
74
|
public static final EnumSet<SearchMode> SEARCH_MODES = EnumSet.of(
|
75
|
SearchMode.scientificNameExact,
|
76
|
SearchMode.scientificNameLike,
|
77
|
SearchMode.vernacularNameExact,
|
78
|
SearchMode.vernacularNameLike,
|
79
|
SearchMode.findByIdentifier);
|
80
|
|
81
|
public static enum RdfSchema {
|
82
|
|
83
|
/*
|
84
|
* xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
85
|
xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
|
86
|
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
|
87
|
xmlns:dcterms="http://purl.org/dc/terms/"
|
88
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
89
|
xmlns:dwc="http://rs.tdwg.org/dwc/terms/"
|
90
|
xmlns:owl="http://www.w3.org/2002/07/owl#"
|
91
|
xmlns="http://eunis.eea.europa.eu/rdf/species-schema.rdf#"
|
92
|
xmlns:sioc="http://rdfs.org/sioc/ns#"
|
93
|
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
|
94
|
xmlns:bibo="http://purl.org/ontology/bibo/"
|
95
|
xmlns:cc="http://creativecommons.org/ns#"
|
96
|
xmlns:foaf="http://xmlns.com/foaf/0.1/"
|
97
|
*/
|
98
|
EUNIS_SPECIES("es","http://eunis.eea.europa.eu/rdf/species-schema.rdf#"),
|
99
|
EUNIS_TAXONOMY("et", "http://eunis.eea.europa.eu/rdf/taxonomies-schema.rdf#"),
|
100
|
DWC("dwc", "http://rs.tdwg.org/dwc/terms/"),
|
101
|
RDF("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
|
102
|
RDFS("rdfs", "http://www.w3.org/2000/01/rdf-schema#"),
|
103
|
SKOS_CORE("scos_core", "http://www.w3.org/2004/02/skos/core#"),
|
104
|
DC("dc", "http://purl.org/dc/terms/source"),
|
105
|
DCTERMS("dcterms", "http://purl.org/dc/terms/");
|
106
|
|
107
|
private String schemaUri;
|
108
|
private String abbreviation;
|
109
|
RdfSchema(String abbreviation, String schemaUri) {
|
110
|
this.abbreviation = abbreviation;
|
111
|
this.schemaUri = schemaUri;
|
112
|
}
|
113
|
|
114
|
public String schemaUri() {
|
115
|
|
116
|
return schemaUri;
|
117
|
}
|
118
|
|
119
|
public String abbreviation() {
|
120
|
|
121
|
return abbreviation;
|
122
|
}
|
123
|
|
124
|
public String propertyURI(String name) {
|
125
|
return schemaUri + name;
|
126
|
}
|
127
|
|
128
|
}
|
129
|
|
130
|
public enum SubCheckListId {
|
131
|
|
132
|
eunis, natura_2000;
|
133
|
}
|
134
|
|
135
|
private enum RankLevel{
|
136
|
|
137
|
Kingdom, Phylum, Clazz, Order, Family, Genus;
|
138
|
}
|
139
|
|
140
|
public EEA_BDC_Client() {
|
141
|
|
142
|
super();
|
143
|
}
|
144
|
|
145
|
public EEA_BDC_Client(String checklistInfoJson) throws DRFChecklistException {
|
146
|
|
147
|
super(checklistInfoJson);
|
148
|
}
|
149
|
|
150
|
@Override
|
151
|
public void initQueryClient() {
|
152
|
|
153
|
if(SparqlClient.class.isAssignableFrom(clientClass)) {
|
154
|
if(USE_REMOTE_SERVICE) {
|
155
|
// use SPARQL end point
|
156
|
//FIXME queryClient = new SparqlClient(SPARQL_ENDPOINT_URL);
|
157
|
} else {
|
158
|
TDBStore tripleStore;
|
159
|
try {
|
160
|
tripleStore = new TDBStore();
|
161
|
} catch (Exception e1) {
|
162
|
throw new RuntimeException("Creation of TripleStore failed", e1);
|
163
|
}
|
164
|
if(REFRESH_TDB) {
|
165
|
updateStore(tripleStore);
|
166
|
}
|
167
|
//FIXME queryClient = new SparqlClient(tripleStore);
|
168
|
|
169
|
}
|
170
|
} else if(TinkerPopClient.class.isAssignableFrom(clientClass)) {
|
171
|
if(USE_REMOTE_SERVICE) {
|
172
|
throw new RuntimeException("USE_REMOTE_SERVICE not suported by QueryClient class "+ clientClass);
|
173
|
} else {
|
174
|
Neo4jStore neo4jStore;
|
175
|
try {
|
176
|
neo4jStore = new Neo4jStore();
|
177
|
} catch (Exception e1) {
|
178
|
throw new RuntimeException("Creation of Neo4jStore failed", e1);
|
179
|
}
|
180
|
if(REFRESH_TDB) {
|
181
|
updateStore(neo4jStore);
|
182
|
}
|
183
|
queryClient = new TinkerPopClient(neo4jStore);
|
184
|
|
185
|
}
|
186
|
|
187
|
} else {
|
188
|
throw new RuntimeException("Unsuported QueryClient class "+ clientClass);
|
189
|
}
|
190
|
}
|
191
|
|
192
|
/**
|
193
|
* @param neo4jStore
|
194
|
*/
|
195
|
private void updateStore(Store neo4jStore) {
|
196
|
try {
|
197
|
neo4jStore.loadIntoStore(
|
198
|
SPECIES_RDF_FILE_URL,
|
199
|
LEGALREFS_RDF_FILE_URL,
|
200
|
REFERENCES_RDF_FILE_URL
|
201
|
);
|
202
|
} catch (Exception e) {
|
203
|
throw new RuntimeException("Loading "
|
204
|
+ SPECIES_RDF_FILE_URL + ", "
|
205
|
+ LEGALREFS_RDF_FILE_URL + ", "
|
206
|
+ REFERENCES_RDF_FILE_URL +
|
207
|
" into Neo4jStore failed", e);
|
208
|
}
|
209
|
}
|
210
|
|
211
|
@Override
|
212
|
public ServiceProviderInfo buildServiceProviderInfo() {
|
213
|
|
214
|
ServiceProviderInfo checklistInfo = new ServiceProviderInfo(ID, LABEL, DOC_URL, COPYRIGHT_URL, getSearchModes());
|
215
|
checklistInfo.addSubChecklist(new ServiceProviderInfo(SubCheckListId.eunis.name(), "EUNIS",
|
216
|
"http://www.eea.europa.eu/themes/biodiversity/eunis/eunis-db#tab-metadata",
|
217
|
"http://www.eea.europa.eu/legal/copyright", SEARCH_MODES));
|
218
|
return checklistInfo;
|
219
|
}
|
220
|
|
221
|
|
222
|
/**
|
223
|
* @param queryString
|
224
|
* @throws DRFChecklistException
|
225
|
*/
|
226
|
private void addPrexfixes(StringBuilder queryString) throws DRFChecklistException {
|
227
|
|
228
|
for(RdfSchema schema : RdfSchema.values()) {
|
229
|
queryString.append(String.format("PREFIX %s: <%s>\n", schema.abbreviation(), schema.schemaUri()));
|
230
|
}
|
231
|
}
|
232
|
|
233
|
/**
|
234
|
* @param checklistInfo
|
235
|
* @return
|
236
|
* @throws DRFChecklistException
|
237
|
*/
|
238
|
private StringBuilder prepareQueryString() throws DRFChecklistException {
|
239
|
|
240
|
StringBuilder queryString = new StringBuilder();
|
241
|
addPrexfixes(queryString);
|
242
|
return queryString;
|
243
|
}
|
244
|
|
245
|
private Taxon createTaxon(Vertex v) {
|
246
|
|
247
|
Taxon taxon = new Taxon();
|
248
|
|
249
|
TaxonName taxonName = createTaxonName(v);
|
250
|
|
251
|
// Taxon
|
252
|
taxon.setTaxonName(taxonName);
|
253
|
taxon.setIdentifier(v.getId().toString());
|
254
|
taxon.setAccordingTo(queryClient.relatedVertexValue(v, RdfSchema.DWC, "nameAccordingToID"));
|
255
|
URI typeUri = queryClient.relatedVertexURI(v, RdfSchema.RDF, "type");
|
256
|
taxon.setTaxonomicStatus(typeUri.getFragment());
|
257
|
|
258
|
createSources(v, taxon);
|
259
|
|
260
|
/*
|
261
|
|
262
|
// classification
|
263
|
Classification c = null;
|
264
|
Resource parentR = queryClient.objectAsResource(taxonR, RdfSchema.EUNIS_SPECIES, "taxonomy");
|
265
|
while (parentR != null) {
|
266
|
|
267
|
String level = queryClient.objectAsString(parentR, RdfSchema.EUNIS_TAXONOMY, "level");
|
268
|
String parentTaxonName = queryClient.objectAsString(parentR, RdfSchema.EUNIS_TAXONOMY, "name");
|
269
|
|
270
|
RankLevel rankLevel = null;
|
271
|
try {
|
272
|
rankLevel = RankLevel.valueOf(level);
|
273
|
} catch (Exception e) {
|
274
|
// IGNORE
|
275
|
}
|
276
|
if(rankLevel != null) {
|
277
|
if(c == null) {
|
278
|
c = new Classification();
|
279
|
}
|
280
|
switch(rankLevel) {
|
281
|
case Clazz:
|
282
|
c.setClazz(parentTaxonName);
|
283
|
break;
|
284
|
case Family:
|
285
|
c.setFamily(parentTaxonName);
|
286
|
break;
|
287
|
case Genus:
|
288
|
c.setGenus(parentTaxonName);
|
289
|
break;
|
290
|
case Kingdom:
|
291
|
c.setKingdom(parentTaxonName);
|
292
|
break;
|
293
|
case Order:
|
294
|
c.setOrder(parentTaxonName);
|
295
|
break;
|
296
|
case Phylum:
|
297
|
c.setPhylum(parentTaxonName);
|
298
|
break;
|
299
|
default:
|
300
|
break;
|
301
|
}
|
302
|
}
|
303
|
Resource lastParentR = parentR;
|
304
|
parentR = queryClient.objectAsResource(parentR, RdfSchema.EUNIS_TAXONOMY, "parent");
|
305
|
if(lastParentR.equals(parentR)) {
|
306
|
// avoid endless looping when data is not correct
|
307
|
break;
|
308
|
}
|
309
|
}
|
310
|
if(c != null) {
|
311
|
taxon.setClassification(c);
|
312
|
}
|
313
|
*/
|
314
|
return taxon;
|
315
|
}
|
316
|
|
317
|
/**
|
318
|
* @param model
|
319
|
* @param taxonR
|
320
|
* @param taxonBase
|
321
|
*/
|
322
|
private void createSources(Vertex v, TaxonBase taxonBase) {
|
323
|
|
324
|
// Sources are source references, re there others like data bases?
|
325
|
|
326
|
GremlinPipeline<Graph, Vertex> taxonPipe = new GremlinPipeline<Graph, Vertex>(v);
|
327
|
|
328
|
try {
|
329
|
List<Vertex> titleVs = taxonPipe
|
330
|
.outE(RdfSchema.EUNIS_SPECIES.propertyURI("hasLegalReference")).inV()
|
331
|
.outE(RdfSchema.DCTERMS.propertyURI("source")).inV().dedup()
|
332
|
.outE(RdfSchema.DCTERMS.propertyURI("title")).inV()
|
333
|
.toList();
|
334
|
for(Vertex tv : titleVs) {
|
335
|
Source source = new Source();
|
336
|
logger.error(tv.toString());
|
337
|
source.setName(tv.getProperty(GraphSail.VALUE).toString());
|
338
|
taxonBase.getSources().add(source);
|
339
|
}
|
340
|
} catch (FastNoSuchElementException e) {
|
341
|
logger.debug("No sources found");
|
342
|
}
|
343
|
}
|
344
|
|
345
|
/**
|
346
|
* @param taxonR
|
347
|
* @return
|
348
|
*/
|
349
|
private TaxonName createTaxonName(Vertex v) {
|
350
|
|
351
|
TaxonName taxonName = new TaxonName();
|
352
|
// TaxonName
|
353
|
taxonName.setFullName(queryClient.relatedVertexValue(v, RdfSchema.RDFS, "label"));
|
354
|
// TODO rename CanonicalName to scientificName? compare with dwc:scientificName
|
355
|
taxonName.setCanonicalName(queryClient.relatedVertexValue(v, RdfSchema.EUNIS_SPECIES, "binomialName"));
|
356
|
taxonName.setRank(queryClient.relatedVertexValue(v, RdfSchema.EUNIS_SPECIES, "taxonomicRank"));
|
357
|
return taxonName;
|
358
|
}
|
359
|
|
360
|
|
361
|
private void createSynonyms(Vertex taxonV, Response tnrResponse) {
|
362
|
|
363
|
|
364
|
GremlinPipeline<Graph, Vertex> taxonPipe = new GremlinPipeline<Graph, Vertex>(taxonV);
|
365
|
|
366
|
try {
|
367
|
List<Vertex> synonymVs = taxonPipe
|
368
|
.inE(RdfSchema.EUNIS_SPECIES.propertyURI("eunisPrimaryName")).outV().dedup()
|
369
|
.toList();
|
370
|
for(Vertex synonymV : synonymVs) {
|
371
|
String typeUri = queryClient.relatedVertexValue(synonymV, RdfSchema.RDF, "type");
|
372
|
String status = null;
|
373
|
try {
|
374
|
status = URI.create(typeUri).getFragment();
|
375
|
} catch (Exception e) {
|
376
|
|
377
|
}
|
378
|
|
379
|
if (status != null && status.equals("SpeciesSynonym")) {
|
380
|
|
381
|
Synonym synonym = new Synonym();
|
382
|
|
383
|
TaxonName taxonName = createTaxonName(synonymV);
|
384
|
synonym.setTaxonomicStatus(status);
|
385
|
synonym.setTaxonName(taxonName);
|
386
|
synonym.setAccordingTo(queryClient.relatedVertexValue(synonymV, RdfSchema.DWC, "nameAccordingToID"));
|
387
|
|
388
|
createSources(synonymV, synonym);
|
389
|
|
390
|
tnrResponse.getSynonym().add(synonym);
|
391
|
}
|
392
|
}
|
393
|
} catch (FastNoSuchElementException e) {
|
394
|
logger.debug("No sources found");
|
395
|
}
|
396
|
|
397
|
}
|
398
|
|
399
|
/**
|
400
|
* Returns all subjects that are related to the taxonR
|
401
|
* via the es:eunisPrimaryName property.
|
402
|
*
|
403
|
* @param taxonR
|
404
|
* @return
|
405
|
*/
|
406
|
private List<Resource> queryForSynonyms(Resource taxonR) {
|
407
|
/* FIXME
|
408
|
List<Resource> synonymRList = null;
|
409
|
|
410
|
try {
|
411
|
StringBuilder queryString = prepareQueryString();
|
412
|
|
413
|
queryString.append("DESCRIBE ?synonym es:eunisPrimaryName <" + taxonR.getURI() + ">");
|
414
|
logger.debug("\n" + queryString.toString());
|
415
|
|
416
|
Model model = queryClient.describe(queryString.toString());
|
417
|
synonymRList = listSynonymResources(model, taxonR);
|
418
|
|
419
|
} catch (DRFChecklistException e) {
|
420
|
logger.error("SPARQL query error in queryForSynonyms()", e);
|
421
|
} finally {
|
422
|
if(synonymRList == null) {
|
423
|
synonymRList = new ArrayList<Resource>(0);
|
424
|
}
|
425
|
}
|
426
|
|
427
|
return synonymRList;
|
428
|
*/ return null;
|
429
|
}
|
430
|
|
431
|
/**
|
432
|
* @param model
|
433
|
* @return
|
434
|
*/
|
435
|
private List<Resource> listSynonymResources(Model model, Resource taxonR) {
|
436
|
List<Resource> synonymRList;
|
437
|
/*
|
438
|
Property filterProperty = model.createProperty(RdfSchema.EUNIS_SPECIES.schemaUri, "eunisPrimaryName");
|
439
|
synonymRList = queryClient.listResources(model, filterProperty, null, taxonR);
|
440
|
return synonymRList;
|
441
|
*/
|
442
|
return null;
|
443
|
}
|
444
|
|
445
|
@Override
|
446
|
public void resolveScientificNamesExact(TnrMsg tnrMsg) throws DRFChecklistException {
|
447
|
|
448
|
List<Query> queryList = tnrMsg.getQuery();
|
449
|
|
450
|
// selecting one request as representative, only
|
451
|
// the search mode and addSynonmy flag are important
|
452
|
// for the further usage of the request object
|
453
|
|
454
|
for (ServiceProviderInfo checklistInfo : getServiceProviderInfo().getSubChecklists()) {
|
455
|
|
456
|
Query query = singleQueryFrom(tnrMsg);
|
457
|
|
458
|
boolean TUPLEQUERY = false;
|
459
|
boolean Neo4jINDEX = true;
|
460
|
|
461
|
String filter;
|
462
|
String queryString = query.getRequest().getQueryString();
|
463
|
queryString = QueryParser.escape(queryString);
|
464
|
queryString = queryString.replace(" ", "\\ ");
|
465
|
logger.debug("original queryString: "+ queryString);
|
466
|
|
467
|
PipeFunction<Vertex, Boolean> matchFilter;
|
468
|
if(query.getRequest().getSearchMode().equals(SearchMode.scientificNameLike.name())) {
|
469
|
filter = "(regex(?name, \"^" + queryString + "\"))";
|
470
|
matchFilter = queryClient.createStarttWithFilter(queryString);
|
471
|
// need to escape white space and add wildcard to the end
|
472
|
queryString += "*";
|
473
|
} else {
|
474
|
filter = "(?name = \"" + queryString + "\")";
|
475
|
matchFilter = queryClient.createEqualsFilter(queryString);
|
476
|
}
|
477
|
|
478
|
logger.debug("prepared queryString: "+ queryString);
|
479
|
|
480
|
if(TUPLEQUERY) {
|
481
|
StringBuilder sparql = prepareQueryString();
|
482
|
sparql.append(
|
483
|
"SELECT ?eunisurl \n"
|
484
|
+ "WHERE {\n"
|
485
|
+ " ?eunisurl es:binomialName ?name . \n"
|
486
|
+ " FILTER " + filter + " \n"
|
487
|
+ "}"
|
488
|
);
|
489
|
|
490
|
Neo4j2Graph neo4jGraph = (Neo4j2Graph)queryClient.graph();
|
491
|
Vertex v = neo4jGraph.getVertex(2);
|
492
|
|
493
|
SailRepositoryConnection connection = null;
|
494
|
try {
|
495
|
|
496
|
Profiler profiler = Profiler.newCpuProfiler(true);
|
497
|
|
498
|
connection = queryClient.connection();
|
499
|
TupleQuery tquery = connection.prepareTupleQuery(QueryLanguage.SPARQL, sparql.toString());
|
500
|
TupleQueryResult tqresult = tquery.evaluate();
|
501
|
queryClient.showResults(tqresult);
|
502
|
|
503
|
profiler.end(System.err);
|
504
|
|
505
|
} catch (MalformedQueryException | RepositoryException | QueryEvaluationException e1) {
|
506
|
// TODO Auto-generated catch block
|
507
|
e1.printStackTrace();
|
508
|
} catch (Exception e1) {
|
509
|
// yourkit
|
510
|
e1.printStackTrace();
|
511
|
} finally {
|
512
|
try {
|
513
|
connection.close();
|
514
|
} catch (RepositoryException e1) {
|
515
|
// IGNORE //
|
516
|
}
|
517
|
connection = null;
|
518
|
}
|
519
|
|
520
|
}
|
521
|
GremlinPipeline<Graph, Vertex> pipe = null;
|
522
|
|
523
|
if(Neo4jINDEX) {
|
524
|
|
525
|
Profiler profiler = Profiler.newCpuProfiler(false);
|
526
|
|
527
|
logger.debug("Neo4jINDEX");
|
528
|
|
529
|
ArrayList<Vertex> hitVs = queryClient.vertexIndexQuery("value:" + queryString);
|
530
|
pipe = new GremlinPipeline<Graph, Vertex>(hitVs);
|
531
|
|
532
|
List<Vertex> vertices = new ArrayList<Vertex>();
|
533
|
pipe.in(RdfSchema.EUNIS_SPECIES.propertyURI("binomialName")).fill(vertices);
|
534
|
|
535
|
updateQueriesWithResponse(vertices, checklistInfo, query);
|
536
|
profiler.end(System.err);
|
537
|
}
|
538
|
}
|
539
|
}
|
540
|
|
541
|
@Override
|
542
|
public void resolveScientificNamesLike(TnrMsg tnrMsg) throws DRFChecklistException {
|
543
|
// delegate to resolveScientificNamesExact,
|
544
|
resolveScientificNamesExact(tnrMsg);
|
545
|
|
546
|
}
|
547
|
|
548
|
@Override
|
549
|
public void resolveVernacularNamesExact(TnrMsg tnrMsg) throws DRFChecklistException {
|
550
|
List<Query> queryList = tnrMsg.getQuery();
|
551
|
|
552
|
for (ServiceProviderInfo checklistInfo : getServiceProviderInfo().getSubChecklists()) {
|
553
|
|
554
|
// selecting one request as representative, only
|
555
|
// the search mode and addSynonmy flag are important
|
556
|
// for the further usage of the request object
|
557
|
Query query = singleQueryFrom(tnrMsg);
|
558
|
|
559
|
String queryString = query.getRequest().getQueryString();
|
560
|
queryString = QueryParser.escape(queryString);
|
561
|
queryString = queryString.replace(" ", "\\ ");
|
562
|
logger.debug("original queryString: "+ queryString);
|
563
|
if(query.getRequest().getSearchMode().equals(SearchMode.vernacularNameLike.name())) {
|
564
|
queryString = "*" + queryString + "*";
|
565
|
}
|
566
|
|
567
|
logger.debug("prepared queryString: "+ queryString);
|
568
|
|
569
|
GremlinPipeline<Graph, Vertex> pipe = null;
|
570
|
|
571
|
Profiler profiler = Profiler.newCpuProfiler(false);
|
572
|
|
573
|
// by using the Neo4j index directly it is possible to
|
574
|
// take full advantage of the underlying Lucene search engine
|
575
|
ArrayList<Vertex> hitVs = queryClient.vertexIndexQuery("value:" + queryString);
|
576
|
|
577
|
// List<String> matchingNames = new ArrayList<String>(hitVs.size());
|
578
|
// for(Vertex v : hitVs) {
|
579
|
// String matchValue = v.getProperty(GraphSail.VALUE).toString();
|
580
|
// matchingNames.add(matchValue);
|
581
|
// logger.debug("matchingName " + matchValue);
|
582
|
// }
|
583
|
|
584
|
List<Vertex> vertices = new ArrayList<Vertex>();
|
585
|
pipe = new GremlinPipeline<Graph, Vertex>(hitVs);
|
586
|
pipe.in(RdfSchema.DWC.propertyURI("vernacularName")).fill(vertices);
|
587
|
|
588
|
updateQueriesWithResponse(vertices, checklistInfo, query);
|
589
|
profiler.end(System.err);
|
590
|
}
|
591
|
}
|
592
|
|
593
|
@Override
|
594
|
public void resolveVernacularNamesLike(TnrMsg tnrMsg) throws DRFChecklistException {
|
595
|
resolveVernacularNamesExact(tnrMsg);
|
596
|
}
|
597
|
|
598
|
@Override
|
599
|
public void findByIdentifier(TnrMsg tnrMsg) throws DRFChecklistException {
|
600
|
|
601
|
for (ServiceProviderInfo checklistInfo : getServiceProviderInfo().getSubChecklists()) {
|
602
|
|
603
|
// FIXME query specific subchecklist
|
604
|
Query query = singleQueryFrom(tnrMsg);
|
605
|
String queryString = query.getRequest().getQueryString();
|
606
|
|
607
|
// by using the Neo4j index directly it is possible to
|
608
|
// take full advantage of the underlying Lucene search engine
|
609
|
queryString = QueryParser.escape(queryString);
|
610
|
ArrayList<Vertex> hitVs = queryClient.vertexIndexQuery("value:" + queryString);
|
611
|
if(hitVs.size() > 0) {
|
612
|
Response response = tnrResponseFromResource(hitVs.get(0), query.getRequest());
|
613
|
query.getResponse().add(response);
|
614
|
} else if(hitVs.size() > 1) {
|
615
|
throw new DRFChecklistException("More than one node with the id '" + queryString + "' found");
|
616
|
}
|
617
|
}
|
618
|
}
|
619
|
|
620
|
private void updateQueriesWithResponse(List<Vertex> nodes, ServiceProviderInfo ci, Query query){
|
621
|
|
622
|
if (nodes == null) {
|
623
|
return;
|
624
|
}
|
625
|
|
626
|
logger.debug("matching taxon nodes:");
|
627
|
for (Vertex v : nodes) {
|
628
|
logger.debug(" " + v.toString());
|
629
|
printPropertyKeys(v, System.err);
|
630
|
if(v.getProperty("kind").equals("url")) {
|
631
|
logger.error("vertex of type 'url' expected, but was " + v.getProperty("type").equals("url"));
|
632
|
continue;
|
633
|
}
|
634
|
Response tnrResponse = tnrResponseFromResource(v, query.getRequest());
|
635
|
if(tnrResponse != null) {
|
636
|
query.getResponse().add(tnrResponse);
|
637
|
}
|
638
|
}
|
639
|
}
|
640
|
|
641
|
/**
|
642
|
* @param model
|
643
|
* @param taxonR
|
644
|
* @param request
|
645
|
* @return
|
646
|
*/
|
647
|
@SuppressWarnings("unused")
|
648
|
private Response tnrResponseFromResource(Vertex taxonV, Request request) {
|
649
|
|
650
|
Response tnrResponse = TnrMsgUtils.tnrResponseFor(getServiceProviderInfo());
|
651
|
|
652
|
SearchMode searchMode = SearchMode.valueOf(request.getSearchMode());
|
653
|
|
654
|
GremlinPipeline<Graph, Vertex> pipe = new GremlinPipeline<Graph, Vertex>(taxonV);
|
655
|
|
656
|
String validName = queryClient.relatedVertexValue(taxonV, RdfSchema.EUNIS_SPECIES, "validName");
|
657
|
|
658
|
boolean isAccepted = validName != null && validName.equals("true");
|
659
|
boolean skipThis = false;
|
660
|
|
661
|
logger.debug("processing " + (isAccepted ? "accepted taxon" : "synonym or other") + " " + taxonV.getId());
|
662
|
|
663
|
// case when accepted name
|
664
|
if(isAccepted) {
|
665
|
Taxon taxon = createTaxon(taxonV);
|
666
|
tnrResponse.setTaxon(taxon);
|
667
|
tnrResponse.setMatchingNameType(NameType.TAXON);
|
668
|
String matchingName = taxon.getTaxonName().getCanonicalName();
|
669
|
tnrResponse.setMatchingNameString(matchingName);
|
670
|
|
671
|
}
|
672
|
else {
|
673
|
// case when synonym
|
674
|
Vertex synonymV = taxonV;
|
675
|
taxonV = null;
|
676
|
try {
|
677
|
taxonV = synonymV.getEdges(Direction.OUT, RdfSchema.EUNIS_SPECIES.propertyURI("eunisPrimaryName")).iterator().next().getVertex(Direction.IN);
|
678
|
} catch(Exception e) {
|
679
|
logger.error("No accepted taxon found for " + synonymV.toString() + " (" + synonymV.getProperty(GraphSail.VALUE) + ")");
|
680
|
}
|
681
|
|
682
|
if(taxonV != null) {
|
683
|
Taxon taxon = createTaxon(taxonV);
|
684
|
tnrResponse.setTaxon(taxon);
|
685
|
} else {
|
686
|
}
|
687
|
tnrResponse.setMatchingNameType(NameType.SYNONYM);
|
688
|
String matchingName = queryClient.relatedVertexValue(synonymV, RdfSchema.EUNIS_SPECIES, "binomialName");
|
689
|
tnrResponse.setMatchingNameString(matchingName);
|
690
|
}
|
691
|
|
692
|
if(!skipThis && request.isAddSynonymy()) {
|
693
|
createSynonyms(taxonV, tnrResponse);
|
694
|
}
|
695
|
|
696
|
logger.debug("processing " + (isAccepted ? "accepted taxon" : "synonym or other") + " " + taxonV.getId() + " DONE");
|
697
|
return tnrResponse;
|
698
|
}
|
699
|
|
700
|
/**
|
701
|
* @param vertex
|
702
|
*/
|
703
|
private void printEdges(Neo4j2Vertex vertex) {
|
704
|
Iterable<Relationship> rels = vertex.getRawVertex().getRelationships();
|
705
|
Iterator<Relationship> iterator = rels.iterator();
|
706
|
if(iterator.hasNext()) {
|
707
|
Relationship rel = iterator.next();
|
708
|
System.err.println(rel.toString() + ": " + rel.getStartNode().toString() + "-[" + rel.getType() + "]-" + rel.getEndNode().toString());
|
709
|
}
|
710
|
}
|
711
|
|
712
|
private void printPropertyKeys(Vertex v, PrintStream ps) {
|
713
|
StringBuilder out = new StringBuilder();
|
714
|
out.append(v.toString());
|
715
|
for(String key : v.getPropertyKeys()) {
|
716
|
out.append(key).append(": ").append(v.getProperty(key)).append(" ");
|
717
|
}
|
718
|
ps.println(out.toString());
|
719
|
}
|
720
|
|
721
|
@Override
|
722
|
public EnumSet<SearchMode> getSearchModes() {
|
723
|
return SEARCH_MODES;
|
724
|
}
|
725
|
|
726
|
@Override
|
727
|
public boolean isSupportedIdentifier(String value) {
|
728
|
return IdentifierUtils.checkURI(value);
|
729
|
}
|
730
|
|
731
|
}
|