Project

General

Profile

Download (17.6 KB) Statistics
| Branch: | Tag: | Revision:
1
package org.bgbm.biovel.drf.checklist;
2

    
3
import java.net.URI;
4
import java.util.ArrayList;
5
import java.util.EnumSet;
6
import java.util.HashMap;
7
import java.util.List;
8
import java.util.NoSuchElementException;
9

    
10
import org.apache.jena.rdf.model.Model;
11
import org.apache.jena.rdf.model.Property;
12
import org.apache.jena.rdf.model.ResIterator;
13
import org.apache.jena.rdf.model.Resource;
14
import org.apache.jena.rdf.model.StmtIterator;
15
import org.bgbm.biovel.drf.client.ServiceProviderInfo;
16
import org.bgbm.biovel.drf.query.SparqlClient;
17
import org.bgbm.biovel.drf.tnr.msg.Classification;
18
import org.bgbm.biovel.drf.tnr.msg.NameType;
19
import org.bgbm.biovel.drf.tnr.msg.Query;
20
import org.bgbm.biovel.drf.tnr.msg.Query.Request;
21
import org.bgbm.biovel.drf.tnr.msg.Response;
22
import org.bgbm.biovel.drf.tnr.msg.Source;
23
import org.bgbm.biovel.drf.tnr.msg.Synonym;
24
import org.bgbm.biovel.drf.tnr.msg.Taxon;
25
import org.bgbm.biovel.drf.tnr.msg.TaxonBase;
26
import org.bgbm.biovel.drf.tnr.msg.TaxonName;
27
import org.bgbm.biovel.drf.tnr.msg.TnrMsg;
28
import org.bgbm.biovel.drf.utils.TnrMsgUtils;
29

    
30
public class EEA_BDC_Client extends AggregateChecklistClient<SparqlClient> {
31

    
32
    /**
33
     *
34
     */
35
    public static final String ID = "eea_bdc";
36
    public static final String LABEL = "European Environment Agency (EEA) Biodiversity data centre (BDC)";
37
    public static final String DOC_URL = "http://semantic.eea.europa.eu/documentation";
38
    public static final String COPYRIGHT_URL = "http://www.eea.europa.eu/legal/eea-data-policy";
39
    private static final String SPARQL_ENDPOINT_URL = "http://semantic.eea.europa.eu/sparql";
40
    private static final String RDF_FILE_URL = "http://localhost/download/species.rdf.gz"; // http://eunis.eea.europa.eu/rdf/species.rdf.gz
41
    private static final boolean USE_REMOTE_SERVICE = true;
42
    private static final boolean REFRESH_TDB = true;
43

    
44
    private static final int MAX_PAGING_LIMIT = 50;
45

    
46
    public static final EnumSet<SearchMode> SEARCH_MODES = EnumSet.of(
47
            SearchMode.scientificNameExact,
48
            SearchMode.scientificNameLike,
49
            SearchMode.findByIdentifier);
50

    
51
    public static enum RdfSchema {
52

    
53
        /*
54
         *     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
55
    xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#"
56
    xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
57
    xmlns:dcterms="http://purl.org/dc/terms/"
58
    xmlns:dc="http://purl.org/dc/elements/1.1/"
59
    xmlns:dwc="http://rs.tdwg.org/dwc/terms/"
60
    xmlns:owl="http://www.w3.org/2002/07/owl#"
61
    xmlns="http://eunis.eea.europa.eu/rdf/species-schema.rdf#"
62
    xmlns:sioc="http://rdfs.org/sioc/ns#"
63
    xmlns:skos="http://www.w3.org/2004/02/skos/core#"
64
    xmlns:bibo="http://purl.org/ontology/bibo/"
65
    xmlns:cc="http://creativecommons.org/ns#"
66
    xmlns:foaf="http://xmlns.com/foaf/0.1/"
67
         */
68
        EUNIS_SPECIES("es","http://eunis.eea.europa.eu/rdf/species-schema.rdf#"),
69
        EUNIS_TAXONOMY("et", "http://eunis.eea.europa.eu/rdf/taxonomies-schema.rdf#"),
70
        DWC("dwc", "http://rs.tdwg.org/dwc/terms/"),
71
        RDF("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
72
        RDFS("rdfs", "http://www.w3.org/2000/01/rdf-schema#"),
73
        SKOS_CORE("scos_core", "http://www.w3.org/2004/02/skos/core#");
74

    
75
        private String schemaUri;
76
        private String abbreviation;
77
        RdfSchema(String abbreviation, String schemaUri) {
78
            this.abbreviation = abbreviation;
79
            this.schemaUri = schemaUri;
80
        }
81

    
82
        public String schemaUri() {
83

    
84
            return schemaUri;
85
        }
86

    
87
        public String abbreviation() {
88

    
89
            return abbreviation;
90
        }
91

    
92
    }
93

    
94
    public enum subCheckListIds {
95

    
96
        eunis, natura_2000;
97
    }
98

    
99
    private enum RankLevel{
100

    
101
        Kingdom, Phylum, Clazz, Order, Family, Genus;
102
    }
103

    
104
    private HashMap taxonIdTnrResponseMap;
105

    
106
    public EEA_BDC_Client() {
107

    
108
        super();
109
    }
110

    
111
    public EEA_BDC_Client(String checklistInfoJson) throws DRFChecklistException {
112

    
113
        super(checklistInfoJson);
114
    }
115

    
116
    @Override
117
    public void initQueryClient() {
118

    
119
        if(USE_REMOTE_SERVICE) {
120
            // use SPARQL end point
121
            queryClient = new SparqlClient(SPARQL_ENDPOINT_URL, SparqlClient.Opmode.SPARCLE_ENDPOINT);
122
        } else {
123
            if(REFRESH_TDB) {
124
                // use downloadable rdf
125
                queryClient = new SparqlClient(RDF_FILE_URL, SparqlClient.Opmode.RDF_ARCHIVE);
126
            }else {
127
                // reuse existing TDB_STORE
128
                queryClient = new SparqlClient(null, SparqlClient.Opmode.RDF_ARCHIVE);
129
            }
130
        }
131
    }
132

    
133
    @Override
134
    public ServiceProviderInfo buildServiceProviderInfo() {
135

    
136
        ServiceProviderInfo checklistInfo = new ServiceProviderInfo(ID, LABEL, DOC_URL, COPYRIGHT_URL, getSearchModes());
137
        checklistInfo.addSubChecklist(new ServiceProviderInfo(subCheckListIds.eunis.name(), "EUNIS",
138
                "http://www.eea.europa.eu/themes/biodiversity/eunis/eunis-db#tab-metadata",
139
                "http://www.eea.europa.eu/legal/copyright", SEARCH_MODES));
140
        return checklistInfo;
141
    }
142

    
143

    
144
    /**
145
     * @param queryString
146
     * @throws DRFChecklistException
147
     */
148
    private void addPrexfixes(StringBuilder queryString) throws DRFChecklistException {
149

    
150
        for(RdfSchema schema : RdfSchema.values()) {
151
            queryString.append(String.format("PREFIX %s: <%s>\n", schema.abbreviation(), schema.schemaUri()));
152
        }
153
    }
154

    
155
    /**
156
     * @param checklistInfo
157
     * @return
158
     * @throws DRFChecklistException
159
     */
160
    private StringBuilder prepareQueryString() throws DRFChecklistException {
161

    
162
        StringBuilder queryString = new StringBuilder();
163
        addPrexfixes(queryString);
164
        return queryString;
165
    }
166

    
167
    private Taxon createTaxon(Model model, Resource taxonR) {
168

    
169
        Taxon taxon = new Taxon();
170

    
171
        TaxonName taxonName = createTaxonName(taxonR);
172

    
173
        // Taxon
174
        taxon.setTaxonName(taxonName);
175
        taxon.setIdentifier(taxonR.getURI());
176
        taxon.setAccordingTo(queryClient.objectAsString(taxonR, RdfSchema.DWC, "nameAccordingToID"));
177
        URI typeUri = queryClient.objectAsURI(taxonR, RdfSchema.RDF, "type");
178
        taxon.setTaxonomicStatus(typeUri.getFragment());
179

    
180
        createSources(model, taxonR, taxon);
181

    
182
        // classification
183
        Classification c = null;
184
        Resource parentR = queryClient.objectAsResource(taxonR, RdfSchema.EUNIS_SPECIES, "taxonomy");
185
        while (parentR != null) {
186

    
187
            String level = queryClient.objectAsString(parentR, RdfSchema.EUNIS_TAXONOMY, "level");
188
            String parentTaxonName = queryClient.objectAsString(parentR, RdfSchema.EUNIS_TAXONOMY, "name");
189

    
190
            RankLevel rankLevel = null;
191
            try {
192
                rankLevel = RankLevel.valueOf(level);
193
            } catch (Exception e) {
194
                // IGNORE
195
            }
196
            if(rankLevel != null) {
197
                if(c == null) {
198
                 c = new Classification();
199
                }
200
                switch(rankLevel) {
201
                case Clazz:
202
                    c.setClazz(parentTaxonName);
203
                    break;
204
                case Family:
205
                    c.setFamily(parentTaxonName);
206
                    break;
207
                case Genus:
208
                    c.setGenus(parentTaxonName);
209
                    break;
210
                case Kingdom:
211
                    c.setKingdom(parentTaxonName);
212
                    break;
213
                case Order:
214
                    c.setOrder(parentTaxonName);
215
                    break;
216
                case Phylum:
217
                    c.setPhylum(parentTaxonName);
218
                    break;
219
                default:
220
                    break;
221
                }
222
            }
223
            Resource lastParentR = parentR;
224
            parentR = queryClient.objectAsResource(parentR, RdfSchema.EUNIS_TAXONOMY, "parent");
225
            if(lastParentR.equals(parentR)) {
226
                // avoid endless looping when data is not correct
227
                break;
228
            }
229
        }
230
        if(c != null) {
231
            taxon.setClassification(c);
232
        }
233
        return taxon;
234
    }
235

    
236
    /**
237
     * @param model
238
     * @param taxonR
239
     * @param taxonBase
240
     */
241
    private void createSources(Model model, Resource taxonR, TaxonBase taxonBase) {
242
        // Sources are source references, re there others like data bases?
243
        for ( StmtIterator refIt = taxonR.listProperties(model.getProperty(RdfSchema.EUNIS_SPECIES.schemaUri, "hasLegalReference")); refIt.hasNext();) {
244
            try {
245
            Source source = new Source();
246
            Resource sourceR = refIt.next().getObject().asResource();
247
            String sourceName = queryClient.objectAsString(sourceR, RdfSchema.RDFS, "source");
248
            source.setName(sourceName);
249
            taxonBase.getSources().add(source);
250
            } catch (NoSuchElementException e) {
251
                logger.debug("No statements for rdf:hasLegalReference" , e);
252
            }
253
        }
254
    }
255

    
256
    /**
257
     * @param taxonR
258
     * @return
259
     */
260
    private TaxonName createTaxonName(Resource taxonR) {
261
        TaxonName taxonName = new TaxonName();
262
        // TaxonName
263
        taxonName.setFullName(queryClient.objectAsString(taxonR, RdfSchema.RDFS, "label"));
264
        // TODO rename CanonicalName to scientificName? compare with dwc:scientificName
265
        taxonName.setCanonicalName(queryClient.objectAsString(taxonR, RdfSchema.EUNIS_SPECIES, "binomialName"));
266
        taxonName.setRank(queryClient.objectAsString(taxonR, RdfSchema.EUNIS_SPECIES, "taxonomicRank"));
267
        return taxonName;
268
    }
269

    
270

    
271

    
272

    
273
    private void createSynonyms(Resource taxonR, Response tnrResponse) {
274

    
275
        List<Resource> synonymRList = queryForSynonyms(taxonR);
276

    
277
        for (Resource synonymR  : synonymRList) {
278

    
279
            URI typeUri = queryClient.objectAsURI(synonymR, RdfSchema.RDF, "type");
280
            String status = typeUri.getFragment();
281

    
282

    
283
            if (status != null && status.equals("SpeciesSynonym")) {
284

    
285
                Synonym synonym = new Synonym();
286

    
287
                TaxonName taxonName = createTaxonName(synonymR);
288

    
289
                synonym.setTaxonomicStatus(status);
290
                synonym.setTaxonName(taxonName);
291
                synonym.setAccordingTo(queryClient.objectAsString(synonymR, RdfSchema.DWC, "nameAccordingToID"));
292

    
293
                createSources(synonymR.getModel(), synonymR, synonym);
294

    
295
                tnrResponse.getSynonym().add(synonym);
296
            }
297
        }
298
    }
299

    
300
    /**
301
     * Returns all subjects that are related to the taxonR
302
     * via the es:eunisPrimaryName property.
303
     *
304
     * @param taxonR
305
     * @return
306
     */
307
    private List<Resource> queryForSynonyms(Resource taxonR) {
308

    
309
        List<Resource> synonymRList = null;
310

    
311
        try {
312
            StringBuilder queryString = prepareQueryString();
313

    
314
            queryString.append("DESCRIBE ?synonym es:eunisPrimaryName <" + taxonR.getURI() + ">");
315
            logger.debug("\n" + queryString.toString());
316

    
317
            Model model = queryClient.describe(queryString.toString());
318
            synonymRList = listSynonymResources(model, taxonR);
319

    
320
        } catch (DRFChecklistException e) {
321
            logger.error("SPARQL query error in queryForSynonyms()", e);
322
        } finally {
323
            if(synonymRList == null) {
324
                synonymRList = new ArrayList<Resource>(0);
325
            }
326
        }
327

    
328
        return synonymRList;
329

    
330
    }
331

    
332
    /**
333
     * @param model
334
     * @return
335
     */
336
    private List<Resource> listSynonymResources(Model model, Resource taxonR) {
337
        List<Resource> synonymRList;
338
        Property filterProperty = model.createProperty(RdfSchema.EUNIS_SPECIES.schemaUri, "eunisPrimaryName");
339
        synonymRList = queryClient.listResources(model, filterProperty, null, taxonR);
340
        return synonymRList;
341
    }
342

    
343
    @Override
344
    public void resolveScientificNamesExact(TnrMsg tnrMsg) throws DRFChecklistException {
345

    
346
        List<Query> queryList = tnrMsg.getQuery();
347

    
348
        // selecting one request as representative, only
349
        // the search mode and addSynonmy flag are important
350
        // for the further usage of the request object
351

    
352
        for (ServiceProviderInfo checklistInfo : getServiceProviderInfo().getSubChecklists()) {
353

    
354
            Query query = singleQueryFrom(tnrMsg);
355
            StringBuilder queryString = prepareQueryString();
356

    
357
            String filter;
358
            if(query.getRequest().getSearchMode().equals(SearchMode.scientificNameLike.name())) {
359
                filter = "(regex(?name, \"" + query.getRequest().getQueryString() + "\"))";
360
            } else {
361
                filter = "(?name = \"" + query.getRequest().getQueryString() + "\")";
362
            }
363

    
364
            queryString.append(
365
                    "DESCRIBE ?eunisurl \n"
366
                    + "WHERE {\n"
367
                    + "     ?eunisurl es:binomialName ?name . \n"
368
                    + "     FILTER " + filter  + " \n"
369
                    + "} \n"
370
                    + "LIMIT " + MAX_PAGING_LIMIT + " OFFSET 0"
371
                    );
372

    
373
            logger.debug("\n" + queryString.toString());
374

    
375
            Model model = queryClient.describe(queryString.toString());
376
            updateQueriesWithResponse(model, checklistInfo, query);
377
        }
378
    }
379

    
380
    @Override
381
    public void resolveScientificNamesLike(TnrMsg tnrMsg) throws DRFChecklistException {
382
        // delegate to resolveScientificNamesExact, since the like search mode
383
        // is handled in buildUriFromQueryList
384
        resolveScientificNamesExact(tnrMsg);
385

    
386
    }
387

    
388
    @Override
389
    public void resolveVernacularNamesExact(TnrMsg tnrMsg) throws DRFChecklistException {
390
        // TODO Auto-generated method stub
391

    
392
    }
393

    
394
    @Override
395
    public void resolveVernacularNamesLike(TnrMsg tnrMsg) throws DRFChecklistException {
396
        // TODO Auto-generated method stub
397
    }
398

    
399
    @Override
400
    public void findByIdentifier(TnrMsg tnrMsg) throws DRFChecklistException {
401
        // TODO Auto-generated method stub
402
    }
403

    
404
    private void updateQueriesWithResponse(Model model, ServiceProviderInfo ci, Query query)
405
            throws DRFChecklistException {
406

    
407
        if (model == null) {
408
            return;
409
        }
410

    
411
        ResIterator subjectIt = model.listSubjects();
412

    
413
        while (subjectIt.hasNext()) {
414
            Resource subject = subjectIt.next();
415
            Resource taxonR;
416
            StmtIterator exactMatches = subject.listProperties(subject.getModel().getProperty(RdfSchema.SKOS_CORE.schemaUri, "exactMatch"));
417
            if(exactMatches.hasNext()) {
418
                // need to follow the exactMatch uri in this case
419
                taxonR = queryClient.getFromUri(exactMatches.next().getResource().getURI());
420
            } else {
421
                // the subject is already a species
422
                taxonR = subject;
423
            }
424

    
425
            Response tnrResponse = tnrResponseFromResource(model, taxonR, query.getRequest());
426
            if(tnrResponse != null) {
427
                query.getResponse().add(tnrResponse);
428
            }
429
        }
430
    }
431

    
432
    /**
433
     * @param model
434
     * @param taxonR
435
     * @param request
436
     * @return
437
     */
438
    private Response tnrResponseFromResource(Model model, Resource taxonR, Request request) {
439

    
440
        Response tnrResponse = TnrMsgUtils.tnrResponseFor(getServiceProviderInfo());
441

    
442
        SearchMode searchMode = SearchMode.valueOf(request.getSearchMode());
443

    
444
        // Check for type to ignore the triple pointing from synonyms to accepted taxonUris
445
        // only complete descriptions of taxa and synonym are relevant.
446
        boolean isCompleteResource = taxonR.hasProperty(taxonR.getModel().getProperty(RdfSchema.RDF.schemaUri, "type"));
447
        if(!isCompleteResource) {
448
            return null;
449
        }
450

    
451
        String validName = queryClient.objectAsString(taxonR, RdfSchema.EUNIS_SPECIES, "validName");
452
        boolean isAccepted = validName != null && validName.equals("true^^http://www.w3.org/2001/XMLSchema#boolean");
453
        boolean skipThis = false;
454

    
455
        logger.debug("processing " + (isAccepted ? "accepted taxon" : "synonym or other")  + " " + taxonR.getURI());
456

    
457
        // case when accepted name
458
        if(isAccepted) {
459
            Taxon taxon = createTaxon(model, taxonR);
460
            tnrResponse.setTaxon(taxon);
461
            tnrResponse.setMatchingNameType(NameType.TAXON);
462
            String matchingName = taxon.getTaxonName().getCanonicalName();
463
            tnrResponse.setMatchingNameString(matchingName);
464

    
465
        } else {
466
            // case when synonym
467
            Resource synonymR = taxonR;
468
            URI taxonUri = queryClient.objectAsURI(taxonR, RdfSchema.EUNIS_SPECIES, "eunisPrimaryName");
469
            if(taxonUri == null) {
470
                logger.error("no taxon uri found");
471
            }
472

    
473
            taxonR = queryClient.getFromUri(taxonUri);
474
            if(taxonR != null) {
475
                Taxon taxon = createTaxon(model, taxonR);
476
                tnrResponse.setTaxon(taxon);
477
            } else {
478
                logger.error("No accepted taxon found for " + synonymR.getURI());
479
            }
480
            tnrResponse.setMatchingNameType(NameType.SYNONYM);
481
            String matchingName = queryClient.objectAsString(synonymR, RdfSchema.EUNIS_SPECIES, "binomialName");
482
            tnrResponse.setMatchingNameString(matchingName);
483
        }
484

    
485
        if(!skipThis && request.isAddSynonymy()) {
486
            createSynonyms(taxonR, tnrResponse);
487
        }
488
        logger.debug("processing " + (isAccepted ? "accepted taxon" : "synonym or other")  + " " + taxonR.getURI() + " DONE");
489

    
490
        return tnrResponse;
491
    }
492

    
493
    @Override
494
    public EnumSet<SearchMode> getSearchModes() {
495
        return SEARCH_MODES;
496
    }
497

    
498
    @Override
499
    public boolean isSupportedIdentifier(String value) {
500
        // return IdentifierUtils.checkLSID(value) ||
501
        // IdentifierUtils.checkUUID(value);
502
        return value != null;
503
    }
504

    
505
}
(5-5/12)