Project

General

Profile

Download (16.8 KB) Statistics
| Branch: | Tag: | Revision:
1
package org.bgbm.biovel.drf.checklist;
2

    
3
import java.net.URI;
4
import java.util.EnumSet;
5
import java.util.Iterator;
6
import java.util.List;
7
import java.util.NoSuchElementException;
8

    
9
import org.apache.jena.rdf.model.Model;
10
import org.apache.jena.rdf.model.ResIterator;
11
import org.apache.jena.rdf.model.Resource;
12
import org.apache.jena.rdf.model.StmtIterator;
13
import org.bgbm.biovel.drf.client.ServiceProviderInfo;
14
import org.bgbm.biovel.drf.query.SparqlClient;
15
import org.bgbm.biovel.drf.tnr.msg.Classification;
16
import org.bgbm.biovel.drf.tnr.msg.NameType;
17
import org.bgbm.biovel.drf.tnr.msg.Query;
18
import org.bgbm.biovel.drf.tnr.msg.Query.Request;
19
import org.bgbm.biovel.drf.tnr.msg.Response;
20
import org.bgbm.biovel.drf.tnr.msg.Source;
21
import org.bgbm.biovel.drf.tnr.msg.Synonym;
22
import org.bgbm.biovel.drf.tnr.msg.Taxon;
23
import org.bgbm.biovel.drf.tnr.msg.TaxonName;
24
import org.bgbm.biovel.drf.tnr.msg.TnrMsg;
25
import org.bgbm.biovel.drf.utils.TnrMsgUtils;
26
import org.gbif.nameparser.NameParser;
27
import org.json.simple.JSONArray;
28
import org.json.simple.JSONObject;
29

    
30
public class EEA_BDC_Client extends AggregateChecklistClient<SparqlClient> {
31

    
32
    /**
33
     *
34
     */
35
    public static final String ID = "eea_bdc";
36
    public static final String LABEL = "European Environment Agency (EEA) Biodiversity data centre (BDC)";
37
    public static final String DOC_URL = "http://semantic.eea.europa.eu/documentation";
38
    public static final String COPYRIGHT_URL = "http://www.eea.europa.eu/legal/eea-data-policy";
39
    private static final String SPARQL_ENDPOINT_URL = "http://semantic.eea.europa.eu/sparql";
40
    private static final String RDF_FILE_URL = "http://localhost/download/species.rdf.gz"; // http://eunis.eea.europa.eu/rdf/species.rdf.gz
41
    private static final boolean USE_REMOTE_SERVICE = false;
42

    
43
    private static final int MAX_PAGING_LIMIT = 50;
44

    
45
    public static final EnumSet<SearchMode> SEARCH_MODES = EnumSet.of(
46
            SearchMode.scientificNameExact,
47
            SearchMode.scientificNameLike,
48
            SearchMode.findByIdentifier);
49

    
50
    public static enum RdfSchema {
51

    
52
        /*
53
         *     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
54
    xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
55
    xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
56
    xmlns:dcterms="http://purl.org/dc/terms/"
57
    xmlns:dc="http://purl.org/dc/elements/1.1/"
58
    xmlns:dwc="http://rs.tdwg.org/dwc/terms/"
59
    xmlns:owl="http://www.w3.org/2002/07/owl#"
60
    xmlns="http://eunis.eea.europa.eu/rdf/species-schema.rdf#"
61
    xmlns:sioc="http://rdfs.org/sioc/ns#"
62
    xmlns:skos="http://www.w3.org/2004/02/skos/core#"
63
    xmlns:bibo="http://purl.org/ontology/bibo/"
64
    xmlns:cc="http://creativecommons.org/ns#"
65
    xmlns:foaf="http://xmlns.com/foaf/0.1/"
66
         */
67
        EUNIS_SPECIES("es","http://eunis.eea.europa.eu/rdf/species-schema.rdf#"),
68
        EUNIS_TAXONOMY("et", "http://eunis.eea.europa.eu/rdf/taxonomies-schema.rdf#"),
69
        DWC("dwc", "http://rs.tdwg.org/dwc/terms/"),
70
        RDF("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
71
        RDFS("rdf", "http://www.w3.org/2000/01/rdf-schema#"),
72
        SKOS_CORE("scos_core", "http://www.w3.org/2004/02/skos/core#");
73

    
74
        private String schemaUri;
75
        private String abbreviation;
76
        RdfSchema(String abbreviation, String schemaUri) {
77
            this.abbreviation = abbreviation;
78
            this.schemaUri = schemaUri;
79
        }
80

    
81
        public String schemaUri() {
82

    
83
            return schemaUri;
84
        }
85

    
86
        public String abbreviation() {
87

    
88
            return abbreviation;
89
        }
90

    
91
    }
92

    
93
    public enum subCheckListIds {
94

    
95
        eunis, natura_2000;
96
    }
97

    
98
    private enum RankLevel{
99

    
100
        Kingdom, Phylum, Clazz, Order, Family, Genus;
101
    }
102

    
103
    public EEA_BDC_Client() {
104

    
105
        super();
106
    }
107

    
108
    public EEA_BDC_Client(String checklistInfoJson) throws DRFChecklistException {
109

    
110
        super(checklistInfoJson);
111
    }
112

    
113
    @Override
114
    public void initQueryClient() {
115

    
116
        if(USE_REMOTE_SERVICE) {
117
            // use SPARQL end point
118
            queryClient = new SparqlClient(SPARQL_ENDPOINT_URL, SparqlClient.Opmode.SPARCLE_ENDPOINT);
119
        } else {
120
            // use downloadable rdf
121
            queryClient = new SparqlClient(RDF_FILE_URL, SparqlClient.Opmode.RDF_ARCHIVE);
122
            // reuse existing TDB_STORE
123
            // queryClient = new SparqlClient(null, SparqlClient.Opmode.RDF_ARCHIVE);
124
        }
125
    }
126

    
127
    @Override
128
    public ServiceProviderInfo buildServiceProviderInfo() {
129

    
130
        ServiceProviderInfo checklistInfo = new ServiceProviderInfo(ID, LABEL, DOC_URL, COPYRIGHT_URL, getSearchModes());
131
        checklistInfo.addSubChecklist(new ServiceProviderInfo(subCheckListIds.eunis.name(), "EUNIS",
132
                "http://www.eea.europa.eu/themes/biodiversity/eunis/eunis-db#tab-metadata",
133
                "http://www.eea.europa.eu/legal/copyright", SEARCH_MODES));
134
        return checklistInfo;
135
    }
136

    
137

    
138
    /**
139
     * @param queryString
140
     * @throws DRFChecklistException
141
     */
142
    private void addPrexfixes(StringBuilder queryString) throws DRFChecklistException {
143

    
144
        for(RdfSchema schema : RdfSchema.values()) {
145
            queryString.append(String.format("PREFIX %s: <%s>\n", schema.abbreviation(), schema.schemaUri()));
146
        }
147
    }
148

    
149
    /**
150
     * @param checklistInfo
151
     * @return
152
     * @throws DRFChecklistException
153
     */
154
    private StringBuilder prepareQueryString() throws DRFChecklistException {
155

    
156
        StringBuilder queryString = new StringBuilder();
157
        addPrexfixes(queryString);
158
        return queryString;
159
    }
160

    
161
    private Taxon generateTaxon(Model model, Resource taxonR) {
162

    
163
        Taxon taxon = new Taxon();
164
        TaxonName taxonName = new TaxonName();
165

    
166
        // TaxonName
167
        taxonName.setFullName(queryClient.objectAsString(taxonR, RdfSchema.RDFS, "label"));
168
        NameParser ecatParser = new NameParser();
169
        String nameCanonical = ecatParser.parseToCanonical(taxonName.getFullName());
170
        taxonName.setCanonicalName(nameCanonical);
171
        taxonName.setRank(queryClient.objectAsString(taxonR, RdfSchema.EUNIS_SPECIES, "taxonomicRank"));
172

    
173
        // Taxon
174
        taxon.setTaxonName(taxonName);
175
        taxon.setIdentifier(taxonR.getURI());
176
        taxon.setAccordingTo(queryClient.objectAsString(taxonR, RdfSchema.DWC, "nameAccordingToID"));
177
        taxon.setTaxonomicStatus(queryClient.objectAsURI(taxonR, RdfSchema.RDF, "type").getFragment());
178

    
179
        // Sources are source references, re there others like data bases?
180
        for ( StmtIterator refIt = taxonR.listProperties(model.getProperty("rdf", "hasLegalReference")); refIt.hasNext();) {
181
            try {
182
            Source source = new Source();
183
            Resource sourceR = refIt.next().getObject().asResource();
184
            String sourceName = queryClient.objectAsString(sourceR, RdfSchema.RDFS, "source");
185
            source.setName(sourceName);
186
            taxon.getSources().add(source);
187
            } catch (NoSuchElementException e) {
188
                logger.debug("No statements for rdf:hasLegalReference" , e);
189
            }
190
        }
191

    
192
        // classification
193
        Classification c = null;
194
        Resource parentR = queryClient.objectAsResource(taxonR, RdfSchema.EUNIS_SPECIES, "taxonomy");
195
        while (parentR != null) {
196

    
197
            String level = queryClient.objectAsString(parentR, RdfSchema.EUNIS_TAXONOMY, "level");
198
            String parentTaxonName = queryClient.objectAsString(parentR, RdfSchema.EUNIS_TAXONOMY, "name");
199

    
200
            RankLevel rankLevel = null;
201
            try {
202
                rankLevel = RankLevel.valueOf(level);
203
            } catch (Exception e) {
204
                // IGNORE
205
            }
206
            if(rankLevel != null) {
207
                if(c == null) {
208
                 c = new Classification();
209
                }
210
                switch(rankLevel) {
211
                case Clazz:
212
                    c.setClazz(parentTaxonName);
213
                    break;
214
                case Family:
215
                    c.setFamily(parentTaxonName);
216
                    break;
217
                case Genus:
218
                    c.setGenus(parentTaxonName);
219
                    break;
220
                case Kingdom:
221
                    c.setKingdom(parentTaxonName);
222
                    break;
223
                case Order:
224
                    c.setOrder(parentTaxonName);
225
                    break;
226
                case Phylum:
227
                    c.setPhylum(parentTaxonName);
228
                    break;
229
                default:
230
                    break;
231
                }
232
            }
233
            Resource lastParentR = parentR;
234
            parentR = queryClient.objectAsResource(parentR, RdfSchema.EUNIS_TAXONOMY, "parent");
235
            if(lastParentR.equals(parentR)) {
236
                // avoid endless looping when data is not correct
237
                break;
238
            }
239
        }
240
        if(c != null) {
241
            taxon.setClassification(c);
242
        }
243
        return taxon;
244
    }
245

    
246

    
247

    
248

    
249
    private void generateSynonyms(JSONArray relatedTaxa, Response tnrResponse) {
250

    
251
        Iterator<JSONObject> itrSynonyms = relatedTaxa.iterator();
252
        while (itrSynonyms.hasNext()) {
253

    
254
            JSONObject synonymjs = itrSynonyms.next();
255
            String status = (String) synonymjs.get("taxonStatus");
256
            if (status != null && status.equals("synonym")) {
257
                Synonym synonym = new Synonym();
258
                TaxonName taxonName = new TaxonName();
259

    
260
                String resName = (String) synonymjs.get("name");
261
                taxonName.setFullName(resName);
262
                NameParser ecatParser = new NameParser();
263
                String nameCanonical = ecatParser.parseToCanonical(resName);
264
                taxonName.setCanonicalName(nameCanonical);
265
                synonym.setTaxonomicStatus((String) synonymjs.get("taxonStatus"));
266

    
267
                taxonName.setRank((String) synonymjs.get("rank"));
268

    
269
                synonym.setTaxonName(taxonName);
270

    
271
                JSONObject scrutinyjs = (JSONObject) synonymjs.get("taxonomicScrutiny");
272
                synonym.setAccordingTo((String) scrutinyjs.get("accordingTo"));
273

    
274
                JSONObject sourcejs = (JSONObject) synonymjs.get("source");
275
                String sourceUrl = (String) sourcejs.get("url");
276
                String sourceDatasetID = (String) sourcejs.get("datasetID");
277
                String sourceDatasetName = (String) sourcejs.get("datasetName");
278
                String sourceName = "";
279

    
280
                Source source = new Source();
281
                source.setIdentifier(sourceDatasetID);
282
                source.setDatasetName(sourceDatasetName);
283
                source.setName(sourceName);
284
                source.setUrl(sourceUrl);
285
                synonym.getSources().add(source);
286

    
287
                tnrResponse.getSynonym().add(synonym);
288
            }
289
        }
290
    }
291

    
292
    @Override
293
    public void resolveScientificNamesExact(TnrMsg tnrMsg) throws DRFChecklistException {
294

    
295
        List<Query> queryList = tnrMsg.getQuery();
296

    
297
        // selecting one request as representative, only
298
        // the search mode and addSynonmy flag are important
299
        // for the further usage of the request object
300

    
301
        for (ServiceProviderInfo checklistInfo : getServiceProviderInfo().getSubChecklists()) {
302

    
303
            Query query = singleQueryFrom(tnrMsg);
304
            StringBuilder queryString = prepareQueryString();
305

    
306
            String filter = "";
307
            if(query.getRequest().getSearchMode().equals(SearchMode.scientificNameLike.name())) {
308
                filter = "regex(?name, \"" + query.getRequest().getQueryString() + "\")";
309
            } else {
310
                filter = "(?name = \"" + query.getRequest().getQueryString() + "\")";
311
            }
312

    
313
            queryString.append(
314
                    "DESCRIBE ?eunisurl \n"
315
                    + "WHERE {\n"
316
                    + "     ?eunisurl es:validName true .  \n"
317
                    + "     ?eunisurl es:binomialName ?name . \n"
318
                    + "     ?eunisurl rdf:label ?fullName . \n"
319
                    + "     ?eunisurl dwc:scientificNameAuthorship ?author . \n"
320
                    + "     OPTIONAL {  \n"
321
                    + "       ?eunisurl es:sameSynonymCoL ?sameSpecies . \n"
322
                    + "     } \n"
323
                    + "     OPTIONAL {  \n"
324
                    + "       ?eunisurl dwc:vernacularName ?vernacularName . \n"
325
                    + "     } \n"
326

    
327
                    + "     OPTIONAL {  \n"
328
                    + "       ?eunisurl es:eunisPrimaryName ?eunisPrimaryName . \n" // accepted taxon
329
                    + "     } \n"
330
                    + "     OPTIONAL {  \n"
331
                    + "       ?eunisurl rdf:hasLegalReference ?sourceReference . \n"
332
                    + "     } \n"
333
                    + "     OPTIONAL {  \n"
334
                    + "       ?eunisurl rdf:taxonomy ?rank . \n"
335
                    + "     } \n"
336
                    + "     FILTER " + filter  + " \n"
337
                    + "} \n"
338
                    + "LIMIT " + MAX_PAGING_LIMIT + " OFFSET 0"
339
                    );
340

    
341
            logger.debug("\n" + queryString.toString());
342

    
343
            Model model = queryClient.describe(queryString.toString());
344
            updateQueriesWithResponse(model, checklistInfo, query);
345
        }
346
    }
347

    
348
    @Override
349
    public void resolveScientificNamesLike(TnrMsg tnrMsg) throws DRFChecklistException {
350
        // delegate to resolveScientificNamesExact, since the like search mode
351
        // is handled in buildUriFromQueryList
352
        resolveScientificNamesExact(tnrMsg);
353

    
354
    }
355

    
356
    @Override
357
    public void resolveVernacularNamesExact(TnrMsg tnrMsg) throws DRFChecklistException {
358
        // TODO Auto-generated method stub
359

    
360
    }
361

    
362
    @Override
363
    public void resolveVernacularNamesLike(TnrMsg tnrMsg) throws DRFChecklistException {
364
        // TODO Auto-generated method stub
365
    }
366

    
367
    @Override
368
    public void findByIdentifier(TnrMsg tnrMsg) throws DRFChecklistException {
369
        // TODO Auto-generated method stub
370
    }
371

    
372
    private void updateQueriesWithResponse(Model model, ServiceProviderInfo ci, Query query)
373
            throws DRFChecklistException {
374

    
375
        if (model == null) {
376
            return;
377
        }
378

    
379
        ResIterator subjectIt = model.listSubjects();
380

    
381
        int i = -1;
382
        while (subjectIt.hasNext()) {
383
            i++;
384
            Resource subject = subjectIt.next();
385
            Resource taxonR;
386
            URI matchedResourceURI = queryClient.objectAsURI(subject, RdfSchema.SKOS_CORE, "exactMatch");
387
            if(matchedResourceURI != null) {
388
                // need to follow the exactMatch uri in this case
389
                taxonR = queryClient.getFromUri(matchedResourceURI);
390
            } else {
391
                // the subject is already a species
392
                taxonR = subject;
393
            }
394
            Response tnrResponse = tnrResponseFromResource(model, taxonR, query.getRequest());
395
            query.getResponse().add(tnrResponse);
396
        }
397
    }
398

    
399
    /**
400
     * @param model
401
     * @param taxonR
402
     * @param request
403
     * @return
404
     */
405
    private Response tnrResponseFromResource(Model model, Resource taxonR, Request request) {
406

    
407
        Response tnrResponse = TnrMsgUtils.tnrResponseFor(getServiceProviderInfo());
408

    
409
        SearchMode searchMode = SearchMode.valueOf(request.getSearchMode());
410

    
411
        // A synonym has always taxonomicRank = "Synonym", validName usually is false but in two cases it is true
412
        boolean isAccepted = taxonR.hasLiteral(model.getProperty(RdfSchema.EUNIS_SPECIES.schemaUri(), "taxonomicRank"), "Synonym");
413

    
414
        // TODO: is this possible with this service?
415
        //    tnrResponse.setMatchingNameString(record.getScientificname());
416

    
417
            // case when accepted name
418
            if(isAccepted) {
419
                Taxon taxon = generateTaxon(model, taxonR);
420
                tnrResponse.setTaxon(taxon);
421
                tnrResponse.setMatchingNameType(NameType.TAXON);
422

    
423
            } else {
424
                // case when synonym
425
                Resource synonymR = taxonR;
426
                taxonR = queryClient.objectAsResource(taxonR, RdfSchema.EUNIS_SPECIES, "eunisPrimaryName");
427
                if(taxonR != null) {
428
                    Taxon taxon = generateTaxon(model, taxonR);
429
                    tnrResponse.setTaxon(taxon);
430
                } else {
431
                    logger.error("No accepted taxon found for " + synonymR.getURI());
432
                }
433
                tnrResponse.setMatchingNameType(NameType.SYNONYM);
434
                // TODO find accepted it is linked via eunisPrimaryName and (synonymFor)
435
            }
436

    
437
            if(request.isAddSynonymy()) {
438
                // add Synonyms
439
//                generateSynonyms(records,tnrResponse);
440
            }
441

    
442
        return tnrResponse;
443
    }
444

    
445
    @Override
446
    public EnumSet<SearchMode> getSearchModes() {
447
        return SEARCH_MODES;
448
    }
449

    
450
    @Override
451
    public boolean isSupportedIdentifier(String value) {
452
        // return IdentifierUtils.checkLSID(value) ||
453
        // IdentifierUtils.checkUUID(value);
454
        return value != null;
455
    }
456

    
457
}
(5-5/12)