1
|
package org.bgbm.biovel.drf.checklist;
|
2
|
|
3
|
import java.io.PrintStream;
|
4
|
import java.net.URI;
|
5
|
import java.util.ArrayList;
|
6
|
import java.util.EnumSet;
|
7
|
import java.util.Iterator;
|
8
|
import java.util.List;
|
9
|
|
10
|
import org.apache.jena.rdf.model.Model;
|
11
|
import org.apache.jena.rdf.model.Resource;
|
12
|
import org.apache.lucene.queryParser.QueryParser;
|
13
|
import org.bgbm.biovel.drf.client.ServiceProviderInfo;
|
14
|
import org.bgbm.biovel.drf.query.IQueryClient;
|
15
|
import org.bgbm.biovel.drf.query.SparqlClient;
|
16
|
import org.bgbm.biovel.drf.query.TinkerPopClient;
|
17
|
import org.bgbm.biovel.drf.store.Neo4jStore;
|
18
|
import org.bgbm.biovel.drf.store.Store;
|
19
|
import org.bgbm.biovel.drf.store.TDBStore;
|
20
|
import org.bgbm.biovel.drf.tnr.msg.NameType;
|
21
|
import org.bgbm.biovel.drf.tnr.msg.Query;
|
22
|
import org.bgbm.biovel.drf.tnr.msg.Query.Request;
|
23
|
import org.bgbm.biovel.drf.tnr.msg.Response;
|
24
|
import org.bgbm.biovel.drf.tnr.msg.Source;
|
25
|
import org.bgbm.biovel.drf.tnr.msg.Synonym;
|
26
|
import org.bgbm.biovel.drf.tnr.msg.Taxon;
|
27
|
import org.bgbm.biovel.drf.tnr.msg.TaxonBase;
|
28
|
import org.bgbm.biovel.drf.tnr.msg.TaxonName;
|
29
|
import org.bgbm.biovel.drf.tnr.msg.TnrMsg;
|
30
|
import org.bgbm.biovel.drf.utils.IdentifierUtils;
|
31
|
import org.bgbm.biovel.drf.utils.Profiler;
|
32
|
import org.bgbm.biovel.drf.utils.TnrMsgUtils;
|
33
|
import org.neo4j.graphdb.Relationship;
|
34
|
|
35
|
import com.tinkerpop.blueprints.Direction;
|
36
|
import com.tinkerpop.blueprints.Graph;
|
37
|
import com.tinkerpop.blueprints.Vertex;
|
38
|
import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Vertex;
|
39
|
import com.tinkerpop.blueprints.oupls.sail.GraphSail;
|
40
|
import com.tinkerpop.gremlin.java.GremlinPipeline;
|
41
|
import com.tinkerpop.pipes.util.FastNoSuchElementException;
|
42
|
import com.tinkerpop.pipes.util.structures.Table;
|
43
|
|
44
|
public class EEA_BDC_Client extends AggregateChecklistClient<TinkerPopClient> {
|
45
|
|
46
|
/**
|
47
|
*
|
48
|
*/
|
49
|
public static final String ID = "eea_bdc";
|
50
|
public static final String LABEL = "European Environment Agency (EEA) Biodiversity data centre (BDC)";
|
51
|
public static final String DOC_URL = "http://semantic.eea.europa.eu/documentation";
|
52
|
public static final String COPYRIGHT_URL = "http://www.eea.europa.eu/legal/eea-data-policy";
|
53
|
|
54
|
private static final String SPARQL_ENDPOINT_URL = "http://semantic.eea.europa.eu/sparql";
|
55
|
private static final boolean USE_REMOTE_SERVICE = false;
|
56
|
|
57
|
private static final String SPECIES_RDF_FILE_URL = "http://localhost/download/species.rdf.gz"; // http://eunis.eea.europa.eu/rdf/species.rdf.gz
|
58
|
private static final String LEGALREFS_RDF_FILE_URL = "http://localhost/download/legalrefs.rdf.gz"; // http://eunis.eea.europa.eu/rdf/legalrefs.rdf.gz
|
59
|
private static final String REFERENCES_RDF_FILE_URL = "http://localhost/download/references.rdf.gz"; // http://eunis.eea.europa.eu/rdf/references.rdf.gz
|
60
|
private static final boolean REFRESH_TDB = false;
|
61
|
|
62
|
private static final Class<? extends IQueryClient> clientClass = TinkerPopClient.class;
|
63
|
|
64
|
private static final int MAX_PAGING_LIMIT = 50;
|
65
|
|
66
|
public static final EnumSet<SearchMode> SEARCH_MODES = EnumSet.of(
|
67
|
SearchMode.scientificNameExact,
|
68
|
SearchMode.scientificNameLike,
|
69
|
SearchMode.vernacularNameExact,
|
70
|
SearchMode.vernacularNameLike,
|
71
|
SearchMode.findByIdentifier);
|
72
|
|
73
|
public static enum RdfSchema {
|
74
|
|
75
|
/*
|
76
|
* xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
77
|
xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
|
78
|
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
|
79
|
xmlns:dcterms="http://purl.org/dc/terms/"
|
80
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
81
|
xmlns:dwc="http://rs.tdwg.org/dwc/terms/"
|
82
|
xmlns:owl="http://www.w3.org/2002/07/owl#"
|
83
|
xmlns="http://eunis.eea.europa.eu/rdf/species-schema.rdf#"
|
84
|
xmlns:sioc="http://rdfs.org/sioc/ns#"
|
85
|
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
|
86
|
xmlns:bibo="http://purl.org/ontology/bibo/"
|
87
|
xmlns:cc="http://creativecommons.org/ns#"
|
88
|
xmlns:foaf="http://xmlns.com/foaf/0.1/"
|
89
|
*/
|
90
|
EUNIS_SPECIES("es","http://eunis.eea.europa.eu/rdf/species-schema.rdf#"),
|
91
|
EUNIS_TAXONOMY("et", "http://eunis.eea.europa.eu/rdf/taxonomies-schema.rdf#"),
|
92
|
DWC("dwc", "http://rs.tdwg.org/dwc/terms/"),
|
93
|
RDF("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
|
94
|
RDFS("rdfs", "http://www.w3.org/2000/01/rdf-schema#"),
|
95
|
SKOS_CORE("scos_core", "http://www.w3.org/2004/02/skos/core#"),
|
96
|
DC("dc", "http://purl.org/dc/terms/source"),
|
97
|
DCTERMS("dcterms", "http://purl.org/dc/terms/");
|
98
|
|
99
|
private String schemaUri;
|
100
|
private String abbreviation;
|
101
|
RdfSchema(String abbreviation, String schemaUri) {
|
102
|
this.abbreviation = abbreviation;
|
103
|
this.schemaUri = schemaUri;
|
104
|
}
|
105
|
|
106
|
public String schemaUri() {
|
107
|
|
108
|
return schemaUri;
|
109
|
}
|
110
|
|
111
|
public String abbreviation() {
|
112
|
|
113
|
return abbreviation;
|
114
|
}
|
115
|
|
116
|
public String propertyURI(String name) {
|
117
|
return schemaUri + name;
|
118
|
}
|
119
|
|
120
|
}
|
121
|
|
122
|
public enum SubCheckListId {
|
123
|
|
124
|
eunis, natura_2000;
|
125
|
}
|
126
|
|
127
|
private enum RankLevel{
|
128
|
|
129
|
Kingdom, Phylum, Clazz, Order, Family, Genus;
|
130
|
}
|
131
|
|
132
|
public EEA_BDC_Client() {
|
133
|
|
134
|
super();
|
135
|
}
|
136
|
|
137
|
public EEA_BDC_Client(String checklistInfoJson) throws DRFChecklistException {
|
138
|
|
139
|
super(checklistInfoJson);
|
140
|
}
|
141
|
|
142
|
@Override
|
143
|
public void initQueryClient() {
|
144
|
|
145
|
if(SparqlClient.class.isAssignableFrom(clientClass)) {
|
146
|
if(USE_REMOTE_SERVICE) {
|
147
|
// use SPARQL end point
|
148
|
//FIXME queryClient = new SparqlClient(SPARQL_ENDPOINT_URL);
|
149
|
} else {
|
150
|
TDBStore tripleStore;
|
151
|
try {
|
152
|
tripleStore = new TDBStore();
|
153
|
} catch (Exception e1) {
|
154
|
throw new RuntimeException("Creation of TripleStore failed", e1);
|
155
|
}
|
156
|
if(REFRESH_TDB) {
|
157
|
updateStore(tripleStore);
|
158
|
}
|
159
|
//FIXME queryClient = new SparqlClient(tripleStore);
|
160
|
|
161
|
}
|
162
|
} else if(TinkerPopClient.class.isAssignableFrom(clientClass)) {
|
163
|
if(USE_REMOTE_SERVICE) {
|
164
|
throw new RuntimeException("USE_REMOTE_SERVICE not suported by QueryClient class "+ clientClass);
|
165
|
} else {
|
166
|
Neo4jStore neo4jStore;
|
167
|
try {
|
168
|
neo4jStore = new Neo4jStore();
|
169
|
} catch (Exception e1) {
|
170
|
throw new RuntimeException("Creation of Neo4jStore failed", e1);
|
171
|
}
|
172
|
if(REFRESH_TDB) {
|
173
|
updateStore(neo4jStore);
|
174
|
}
|
175
|
queryClient = new TinkerPopClient(neo4jStore);
|
176
|
|
177
|
}
|
178
|
|
179
|
} else {
|
180
|
throw new RuntimeException("Unsuported QueryClient class "+ clientClass);
|
181
|
}
|
182
|
}
|
183
|
|
184
|
/**
|
185
|
* @param neo4jStore
|
186
|
*/
|
187
|
private void updateStore(Store neo4jStore) {
|
188
|
try {
|
189
|
neo4jStore.loadIntoStore(
|
190
|
SPECIES_RDF_FILE_URL,
|
191
|
LEGALREFS_RDF_FILE_URL,
|
192
|
REFERENCES_RDF_FILE_URL
|
193
|
);
|
194
|
} catch (Exception e) {
|
195
|
throw new RuntimeException("Loading "
|
196
|
+ SPECIES_RDF_FILE_URL + ", "
|
197
|
+ LEGALREFS_RDF_FILE_URL + ", "
|
198
|
+ REFERENCES_RDF_FILE_URL +
|
199
|
" into Neo4jStore failed", e);
|
200
|
}
|
201
|
}
|
202
|
|
203
|
@Override
|
204
|
public ServiceProviderInfo buildServiceProviderInfo() {
|
205
|
|
206
|
ServiceProviderInfo checklistInfo = new ServiceProviderInfo(ID, LABEL, DOC_URL, COPYRIGHT_URL, getSearchModes());
|
207
|
checklistInfo.addSubChecklist(new ServiceProviderInfo(SubCheckListId.eunis.name(), "EUNIS",
|
208
|
"http://www.eea.europa.eu/themes/biodiversity/eunis/eunis-db#tab-metadata",
|
209
|
"http://www.eea.europa.eu/legal/copyright", SEARCH_MODES));
|
210
|
return checklistInfo;
|
211
|
}
|
212
|
|
213
|
|
214
|
/**
|
215
|
* @param queryString
|
216
|
* @throws DRFChecklistException
|
217
|
*/
|
218
|
private void addPrexfixes(StringBuilder queryString) throws DRFChecklistException {
|
219
|
|
220
|
for(RdfSchema schema : RdfSchema.values()) {
|
221
|
queryString.append(String.format("PREFIX %s: <%s>\n", schema.abbreviation(), schema.schemaUri()));
|
222
|
}
|
223
|
}
|
224
|
|
225
|
/**
|
226
|
* @param checklistInfo
|
227
|
* @return
|
228
|
* @throws DRFChecklistException
|
229
|
*/
|
230
|
private StringBuilder prepareQueryString() throws DRFChecklistException {
|
231
|
|
232
|
StringBuilder queryString = new StringBuilder();
|
233
|
addPrexfixes(queryString);
|
234
|
return queryString;
|
235
|
}
|
236
|
|
237
|
private Taxon createTaxon(Vertex v) {
|
238
|
|
239
|
Taxon taxon = new Taxon();
|
240
|
|
241
|
TaxonName taxonName = createTaxonName(v);
|
242
|
|
243
|
// Taxon
|
244
|
taxon.setTaxonName(taxonName);
|
245
|
taxon.setIdentifier(v.getId().toString());
|
246
|
taxon.setAccordingTo(queryClient.relatedVertexValue(v, RdfSchema.DWC, "nameAccordingToID"));
|
247
|
URI typeUri = queryClient.relatedVertexURI(v, RdfSchema.RDF, "type");
|
248
|
taxon.setTaxonomicStatus(typeUri.getFragment());
|
249
|
|
250
|
createSources(v, taxon);
|
251
|
|
252
|
/*
|
253
|
|
254
|
// classification
|
255
|
Classification c = null;
|
256
|
Resource parentR = queryClient.objectAsResource(taxonR, RdfSchema.EUNIS_SPECIES, "taxonomy");
|
257
|
while (parentR != null) {
|
258
|
|
259
|
String level = queryClient.objectAsString(parentR, RdfSchema.EUNIS_TAXONOMY, "level");
|
260
|
String parentTaxonName = queryClient.objectAsString(parentR, RdfSchema.EUNIS_TAXONOMY, "name");
|
261
|
|
262
|
RankLevel rankLevel = null;
|
263
|
try {
|
264
|
rankLevel = RankLevel.valueOf(level);
|
265
|
} catch (Exception e) {
|
266
|
// IGNORE
|
267
|
}
|
268
|
if(rankLevel != null) {
|
269
|
if(c == null) {
|
270
|
c = new Classification();
|
271
|
}
|
272
|
switch(rankLevel) {
|
273
|
case Clazz:
|
274
|
c.setClazz(parentTaxonName);
|
275
|
break;
|
276
|
case Family:
|
277
|
c.setFamily(parentTaxonName);
|
278
|
break;
|
279
|
case Genus:
|
280
|
c.setGenus(parentTaxonName);
|
281
|
break;
|
282
|
case Kingdom:
|
283
|
c.setKingdom(parentTaxonName);
|
284
|
break;
|
285
|
case Order:
|
286
|
c.setOrder(parentTaxonName);
|
287
|
break;
|
288
|
case Phylum:
|
289
|
c.setPhylum(parentTaxonName);
|
290
|
break;
|
291
|
default:
|
292
|
break;
|
293
|
}
|
294
|
}
|
295
|
Resource lastParentR = parentR;
|
296
|
parentR = queryClient.objectAsResource(parentR, RdfSchema.EUNIS_TAXONOMY, "parent");
|
297
|
if(lastParentR.equals(parentR)) {
|
298
|
// avoid endless looping when data is not correct
|
299
|
break;
|
300
|
}
|
301
|
}
|
302
|
if(c != null) {
|
303
|
taxon.setClassification(c);
|
304
|
}
|
305
|
*/
|
306
|
return taxon;
|
307
|
}
|
308
|
|
309
|
/**
|
310
|
* @param model
|
311
|
* @param taxonR
|
312
|
* @param taxonBase
|
313
|
*/
|
314
|
private void createSources(Vertex v, TaxonBase taxonBase) {
|
315
|
|
316
|
// Sources are source references, re there others like data bases?
|
317
|
|
318
|
GremlinPipeline<Graph, Vertex> taxonPipe = new GremlinPipeline<Graph, Vertex>(v);
|
319
|
|
320
|
try {
|
321
|
List<Vertex> titleVs = taxonPipe
|
322
|
.outE(RdfSchema.EUNIS_SPECIES.propertyURI("hasLegalReference")).inV()
|
323
|
.outE(RdfSchema.DCTERMS.propertyURI("source")).inV().dedup()
|
324
|
.outE(RdfSchema.DCTERMS.propertyURI("title")).inV()
|
325
|
.toList();
|
326
|
for(Vertex tv : titleVs) {
|
327
|
Source source = new Source();
|
328
|
logger.error(tv.toString());
|
329
|
source.setName(tv.getProperty(GraphSail.VALUE).toString());
|
330
|
taxonBase.getSources().add(source);
|
331
|
}
|
332
|
} catch (FastNoSuchElementException e) {
|
333
|
logger.debug("No sources found");
|
334
|
}
|
335
|
}
|
336
|
|
337
|
/**
|
338
|
* @param taxonR
|
339
|
* @return
|
340
|
*/
|
341
|
private TaxonName createTaxonName(Vertex v) {
|
342
|
|
343
|
TaxonName taxonName = new TaxonName();
|
344
|
// TaxonName
|
345
|
taxonName.setFullName(queryClient.relatedVertexValue(v, RdfSchema.RDFS, "label"));
|
346
|
// TODO rename CanonicalName to scientificName? compare with dwc:scientificName
|
347
|
taxonName.setCanonicalName(queryClient.relatedVertexValue(v, RdfSchema.EUNIS_SPECIES, "binomialName"));
|
348
|
taxonName.setRank(queryClient.relatedVertexValue(v, RdfSchema.EUNIS_SPECIES, "taxonomicRank"));
|
349
|
return taxonName;
|
350
|
}
|
351
|
|
352
|
|
353
|
private void createSynonyms(Vertex taxonV, Response tnrResponse) {
|
354
|
|
355
|
|
356
|
GremlinPipeline<Graph, Vertex> taxonPipe = new GremlinPipeline<Graph, Vertex>(taxonV);
|
357
|
|
358
|
try {
|
359
|
List<Vertex> synonymVs = taxonPipe
|
360
|
.inE(RdfSchema.EUNIS_SPECIES.propertyURI("eunisPrimaryName")).outV().dedup()
|
361
|
.toList();
|
362
|
for(Vertex synonymV : synonymVs) {
|
363
|
String typeUri = queryClient.relatedVertexValue(synonymV, RdfSchema.RDF, "type");
|
364
|
String status = null;
|
365
|
try {
|
366
|
status = URI.create(typeUri).getFragment();
|
367
|
} catch (Exception e) {
|
368
|
|
369
|
}
|
370
|
|
371
|
if (status != null && status.equals("SpeciesSynonym")) {
|
372
|
|
373
|
Synonym synonym = new Synonym();
|
374
|
|
375
|
TaxonName taxonName = createTaxonName(synonymV);
|
376
|
synonym.setTaxonomicStatus(status);
|
377
|
synonym.setTaxonName(taxonName);
|
378
|
synonym.setAccordingTo(queryClient.relatedVertexValue(synonymV, RdfSchema.DWC, "nameAccordingToID"));
|
379
|
|
380
|
createSources(synonymV, synonym);
|
381
|
|
382
|
tnrResponse.getSynonym().add(synonym);
|
383
|
}
|
384
|
}
|
385
|
} catch (FastNoSuchElementException e) {
|
386
|
logger.debug("No sources found");
|
387
|
}
|
388
|
|
389
|
}
|
390
|
|
391
|
/**
|
392
|
* Returns all subjects that are related to the taxonR
|
393
|
* via the es:eunisPrimaryName property.
|
394
|
*
|
395
|
* @param taxonR
|
396
|
* @return
|
397
|
*/
|
398
|
private List<Resource> queryForSynonyms(Resource taxonR) {
|
399
|
/* FIXME
|
400
|
List<Resource> synonymRList = null;
|
401
|
|
402
|
try {
|
403
|
StringBuilder queryString = prepareQueryString();
|
404
|
|
405
|
queryString.append("DESCRIBE ?synonym es:eunisPrimaryName <" + taxonR.getURI() + ">");
|
406
|
logger.debug("\n" + queryString.toString());
|
407
|
|
408
|
Model model = queryClient.describe(queryString.toString());
|
409
|
synonymRList = listSynonymResources(model, taxonR);
|
410
|
|
411
|
} catch (DRFChecklistException e) {
|
412
|
logger.error("SPARQL query error in queryForSynonyms()", e);
|
413
|
} finally {
|
414
|
if(synonymRList == null) {
|
415
|
synonymRList = new ArrayList<Resource>(0);
|
416
|
}
|
417
|
}
|
418
|
|
419
|
return synonymRList;
|
420
|
*/ return null;
|
421
|
}
|
422
|
|
423
|
/**
|
424
|
* @param model
|
425
|
* @return
|
426
|
*/
|
427
|
private List<Resource> listSynonymResources(Model model, Resource taxonR) {
|
428
|
List<Resource> synonymRList;
|
429
|
/*
|
430
|
Property filterProperty = model.createProperty(RdfSchema.EUNIS_SPECIES.schemaUri, "eunisPrimaryName");
|
431
|
synonymRList = queryClient.listResources(model, filterProperty, null, taxonR);
|
432
|
return synonymRList;
|
433
|
*/
|
434
|
return null;
|
435
|
}
|
436
|
|
437
|
@Override
|
438
|
public void resolveScientificNamesExact(TnrMsg tnrMsg) throws DRFChecklistException {
|
439
|
|
440
|
List<Query> queryList = tnrMsg.getQuery();
|
441
|
|
442
|
// selecting one request as representative, only
|
443
|
// the search mode and addSynonmy flag are important
|
444
|
// for the further usage of the request object
|
445
|
|
446
|
for (ServiceProviderInfo checklistInfo : getServiceProviderInfo().getSubChecklists()) {
|
447
|
|
448
|
Query query = singleQueryFrom(tnrMsg);
|
449
|
|
450
|
String queryString = query.getRequest().getQueryString();
|
451
|
logger.debug("original queryString: "+ queryString);
|
452
|
queryString = QueryParser.escape(queryString);
|
453
|
queryString = queryString.replace(" ", "\\ ");
|
454
|
if(query.getRequest().getSearchMode().equals(SearchMode.scientificNameLike.name())) {
|
455
|
queryString += "*";
|
456
|
}
|
457
|
logger.debug("prepared queryString: "+ queryString);
|
458
|
|
459
|
GremlinPipeline<Graph, Vertex> pipe = null;
|
460
|
|
461
|
Profiler profiler = Profiler.newCpuProfiler(false);
|
462
|
|
463
|
logger.debug("Neo4jINDEX");
|
464
|
|
465
|
ArrayList<Vertex> hitVs = queryClient.vertexIndexQuery("value:" + queryString);
|
466
|
pipe = new GremlinPipeline<Graph, Vertex>(hitVs);
|
467
|
|
468
|
List<Vertex> vertices = new ArrayList<Vertex>();
|
469
|
pipe.in(RdfSchema.EUNIS_SPECIES.propertyURI("binomialName")).fill(vertices);
|
470
|
|
471
|
updateQueriesWithResponse(vertices, null, null, checklistInfo, query);
|
472
|
profiler.end(System.err);
|
473
|
}
|
474
|
}
|
475
|
|
476
|
@Override
|
477
|
public void resolveScientificNamesLike(TnrMsg tnrMsg) throws DRFChecklistException {
|
478
|
// delegate to resolveScientificNamesExact,
|
479
|
resolveScientificNamesExact(tnrMsg);
|
480
|
|
481
|
}
|
482
|
|
483
|
@Override
|
484
|
public void resolveVernacularNamesExact(TnrMsg tnrMsg) throws DRFChecklistException {
|
485
|
List<Query> queryList = tnrMsg.getQuery();
|
486
|
|
487
|
for (ServiceProviderInfo checklistInfo : getServiceProviderInfo().getSubChecklists()) {
|
488
|
|
489
|
// selecting one request as representative, only
|
490
|
// the search mode and addSynonmy flag are important
|
491
|
// for the further usage of the request object
|
492
|
Query query = singleQueryFrom(tnrMsg);
|
493
|
|
494
|
String queryString = query.getRequest().getQueryString();
|
495
|
logger.debug("original queryString: "+ queryString);
|
496
|
queryString = QueryParser.escape(queryString);
|
497
|
queryString = queryString.replace(" ", "\\ ");
|
498
|
if(query.getRequest().getSearchMode().equals(SearchMode.vernacularNameLike.name())) {
|
499
|
queryString = "*" + queryString + "*";
|
500
|
}
|
501
|
|
502
|
logger.debug("prepared queryString: "+ queryString);
|
503
|
|
504
|
GremlinPipeline<Graph, Vertex> pipe = null;
|
505
|
|
506
|
Profiler profiler = Profiler.newCpuProfiler(false);
|
507
|
|
508
|
// by using the Neo4j index directly it is possible to
|
509
|
// take full advantage of the underlying Lucene search engine
|
510
|
ArrayList<Vertex> hitVs = queryClient.vertexIndexQuery("value:" + queryString);
|
511
|
|
512
|
// List<String> matchingNames = new ArrayList<String>(hitVs.size());
|
513
|
// for(Vertex v : hitVs) {
|
514
|
// String matchValue = v.getProperty(GraphSail.VALUE).toString();
|
515
|
// matchingNames.add(matchValue);
|
516
|
// logger.debug("matchingName " + matchValue);
|
517
|
// }
|
518
|
|
519
|
List<Vertex> vertices = new ArrayList<Vertex>();
|
520
|
pipe = new GremlinPipeline<Graph, Vertex>(hitVs);
|
521
|
Table table = new Table();
|
522
|
pipe.as("match").in(RdfSchema.DWC.propertyURI("vernacularName")).as("taxon").table(table).iterate();
|
523
|
|
524
|
updateQueriesWithResponse(
|
525
|
table.getColumn("taxon"), table.getColumn("match"),
|
526
|
NameType.VERNACULAR_NAME, checklistInfo, query);
|
527
|
profiler.end(System.err);
|
528
|
}
|
529
|
}
|
530
|
|
531
|
@Override
|
532
|
public void resolveVernacularNamesLike(TnrMsg tnrMsg) throws DRFChecklistException {
|
533
|
resolveVernacularNamesExact(tnrMsg);
|
534
|
}
|
535
|
|
536
|
@Override
|
537
|
public void findByIdentifier(TnrMsg tnrMsg) throws DRFChecklistException {
|
538
|
|
539
|
for (ServiceProviderInfo checklistInfo : getServiceProviderInfo().getSubChecklists()) {
|
540
|
|
541
|
// FIXME query specific subchecklist
|
542
|
Query query = singleQueryFrom(tnrMsg);
|
543
|
String queryString = query.getRequest().getQueryString();
|
544
|
|
545
|
// by using the Neo4j index directly it is possible to
|
546
|
// take full advantage of the underlying Lucene search engine
|
547
|
queryString = QueryParser.escape(queryString);
|
548
|
ArrayList<Vertex> hitVs = queryClient.vertexIndexQuery("value:" + queryString);
|
549
|
if(hitVs.size() > 0) {
|
550
|
Response response = tnrResponseFromResource(hitVs.get(0), query.getRequest(), null, null);
|
551
|
query.getResponse().add(response);
|
552
|
} else if(hitVs.size() > 1) {
|
553
|
throw new DRFChecklistException("More than one node with the id '" + queryString + "' found");
|
554
|
}
|
555
|
}
|
556
|
}
|
557
|
|
558
|
private void updateQueriesWithResponse(List<Vertex> taxonNodes, List<Vertex> matchNodes, NameType matchType, ServiceProviderInfo ci, Query query){
|
559
|
|
560
|
if (taxonNodes == null) {
|
561
|
return;
|
562
|
}
|
563
|
|
564
|
logger.debug("matching taxon nodes:");
|
565
|
int i = -1;
|
566
|
for (Vertex v : taxonNodes) {
|
567
|
i++;
|
568
|
logger.debug(" " + v.toString());
|
569
|
printPropertyKeys(v, System.err);
|
570
|
if(v.getProperty("kind").equals("url")) {
|
571
|
logger.error("vertex of type 'url' expected, but was " + v.getProperty("type").equals("url"));
|
572
|
continue;
|
573
|
}
|
574
|
Vertex matchNode = null;
|
575
|
if(matchNodes != null) {
|
576
|
matchNode = matchNodes.get(i);
|
577
|
}
|
578
|
Response tnrResponse = tnrResponseFromResource(v, query.getRequest(), matchNode, matchType);
|
579
|
if(tnrResponse != null) {
|
580
|
query.getResponse().add(tnrResponse);
|
581
|
}
|
582
|
}
|
583
|
}
|
584
|
|
585
|
/**
|
586
|
* @param model
|
587
|
* @param taxonR
|
588
|
* @param request
|
589
|
* @param matchType
|
590
|
* @param matchNode
|
591
|
* @return
|
592
|
*/
|
593
|
@SuppressWarnings("unused")
|
594
|
private Response tnrResponseFromResource(Vertex taxonV, Request request, Vertex matchNode, NameType matchType) {
|
595
|
|
596
|
Response tnrResponse = TnrMsgUtils.tnrResponseFor(getServiceProviderInfo());
|
597
|
|
598
|
SearchMode searchMode = SearchMode.valueOf(request.getSearchMode());
|
599
|
|
600
|
GremlinPipeline<Graph, Vertex> pipe = new GremlinPipeline<Graph, Vertex>(taxonV);
|
601
|
|
602
|
String validName = queryClient.relatedVertexValue(taxonV, RdfSchema.EUNIS_SPECIES, "validName");
|
603
|
|
604
|
boolean isAccepted = validName != null && validName.equals("true");
|
605
|
|
606
|
logger.debug("processing " + (isAccepted ? "accepted taxon" : "synonym or other") + " " + taxonV.getId());
|
607
|
|
608
|
//
|
609
|
if(matchNode != null) {
|
610
|
String matchingName = matchNode.getProperty(GraphSail.VALUE).toString();
|
611
|
tnrResponse.setMatchingNameString(matchingName);
|
612
|
tnrResponse.setMatchingNameType(matchType);
|
613
|
}
|
614
|
|
615
|
// case when accepted name
|
616
|
if(isAccepted) {
|
617
|
Taxon taxon = createTaxon(taxonV);
|
618
|
tnrResponse.setTaxon(taxon);
|
619
|
if(matchNode == null) {
|
620
|
tnrResponse.setMatchingNameType(NameType.TAXON);
|
621
|
String matchingName = taxon.getTaxonName().getCanonicalName();
|
622
|
tnrResponse.setMatchingNameString(matchingName);
|
623
|
}
|
624
|
|
625
|
}
|
626
|
else {
|
627
|
// case when synonym
|
628
|
Vertex synonymV = taxonV;
|
629
|
taxonV = null;
|
630
|
try {
|
631
|
taxonV = synonymV.getEdges(Direction.OUT, RdfSchema.EUNIS_SPECIES.propertyURI("eunisPrimaryName")).iterator().next().getVertex(Direction.IN);
|
632
|
} catch(Exception e) {
|
633
|
logger.error("No accepted taxon found for " + synonymV.toString() + " (" + synonymV.getProperty(GraphSail.VALUE) + ")");
|
634
|
}
|
635
|
|
636
|
if(taxonV != null) {
|
637
|
Taxon taxon = createTaxon(taxonV);
|
638
|
tnrResponse.setTaxon(taxon);
|
639
|
} else {
|
640
|
}
|
641
|
if(matchNode == null) {
|
642
|
tnrResponse.setMatchingNameType(NameType.SYNONYM);
|
643
|
String matchingName = queryClient.relatedVertexValue(synonymV, RdfSchema.EUNIS_SPECIES, "binomialName");
|
644
|
tnrResponse.setMatchingNameString(matchingName);
|
645
|
}
|
646
|
}
|
647
|
|
648
|
if(request.isAddSynonymy()) {
|
649
|
createSynonyms(taxonV, tnrResponse);
|
650
|
}
|
651
|
|
652
|
logger.debug("processing " + (isAccepted ? "accepted taxon" : "synonym or other") + " " + taxonV.getId() + " DONE");
|
653
|
return tnrResponse;
|
654
|
}
|
655
|
|
656
|
/**
|
657
|
* @param vertex
|
658
|
*/
|
659
|
private void printEdges(Neo4j2Vertex vertex) {
|
660
|
Iterable<Relationship> rels = vertex.getRawVertex().getRelationships();
|
661
|
Iterator<Relationship> iterator = rels.iterator();
|
662
|
if(iterator.hasNext()) {
|
663
|
Relationship rel = iterator.next();
|
664
|
System.err.println(rel.toString() + ": " + rel.getStartNode().toString() + "-[" + rel.getType() + "]-" + rel.getEndNode().toString());
|
665
|
}
|
666
|
}
|
667
|
|
668
|
private void printPropertyKeys(Vertex v, PrintStream ps) {
|
669
|
StringBuilder out = new StringBuilder();
|
670
|
out.append(v.toString());
|
671
|
for(String key : v.getPropertyKeys()) {
|
672
|
out.append(key).append(": ").append(v.getProperty(key)).append(" ");
|
673
|
}
|
674
|
ps.println(out.toString());
|
675
|
}
|
676
|
|
677
|
@Override
|
678
|
public EnumSet<SearchMode> getSearchModes() {
|
679
|
return SEARCH_MODES;
|
680
|
}
|
681
|
|
682
|
@Override
|
683
|
public boolean isSupportedIdentifier(String value) {
|
684
|
return IdentifierUtils.checkURI(value);
|
685
|
}
|
686
|
|
687
|
}
|