Project

General

Profile

Download (13.5 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2015 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9
package org.bgbm.biovel.drf.query;
10

    
11
import java.io.File;
12
import java.io.FileInputStream;
13
import java.io.FileOutputStream;
14
import java.io.IOException;
15
import java.io.InputStream;
16
import java.net.URI;
17
import java.net.URISyntaxException;
18
import java.util.NoSuchElementException;
19

    
20
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
21
import org.apache.commons.compress.compressors.gzip.GzipUtils;
22
import org.apache.commons.compress.utils.IOUtils;
23
import org.apache.commons.io.FileUtils;
24
import org.apache.commons.io.FilenameUtils;
25
import org.apache.http.client.ClientProtocolException;
26
import org.apache.http.client.methods.CloseableHttpResponse;
27
import org.apache.http.client.methods.HttpGet;
28
import org.apache.http.impl.client.CloseableHttpClient;
29
import org.apache.http.impl.client.HttpClients;
30
import org.apache.jena.atlas.web.HttpException;
31
import org.apache.jena.query.Dataset;
32
import org.apache.jena.query.Query;
33
import org.apache.jena.query.QueryExecution;
34
import org.apache.jena.query.QueryExecutionFactory;
35
import org.apache.jena.query.QueryFactory;
36
import org.apache.jena.query.ReadWrite;
37
import org.apache.jena.query.ResultSet;
38
import org.apache.jena.query.ResultSetFormatter;
39
import org.apache.jena.rdf.model.Model;
40
import org.apache.jena.rdf.model.ModelFactory;
41
import org.apache.jena.rdf.model.RDFNode;
42
import org.apache.jena.rdf.model.ResIterator;
43
import org.apache.jena.rdf.model.Resource;
44
import org.apache.jena.rdf.model.StmtIterator;
45
import org.apache.jena.tdb.TDBFactory;
46
import org.apache.jena.tdb.base.file.Location;
47
import org.bgbm.biovel.drf.checklist.DRFChecklistException;
48
import org.bgbm.biovel.drf.checklist.EEA_BDC_Client.RdfSchema;
49
import org.slf4j.Logger;
50
import org.slf4j.LoggerFactory;
51

    
52
/**
53
 * @author a.kohlbecker
54
 * @date Sep 30, 2015
55
 *
56
 */
57
public class SparqlClient implements IQueryClient {
58

    
59
    protected Logger logger = LoggerFactory.getLogger(SparqlClient.class);
60

    
61
    private static final File tmpDir = new File(System.getProperty("java.io.tmpdir"));
62

    
63
    private Opmode opmode = null;
64

    
65
    private final String baseUri;
66

    
67
    private File rdfFile = null;
68

    
69
    private Dataset dataset = null;
70

    
71
    public enum Opmode{
72
        SPARCLE_ENDPOINT, RDF_ARCHIVE;
73
    }
74

    
75
    /**
76
     *
77
     */
78
    public SparqlClient(String baseUri, Opmode opmode) {
79
        this.baseUri = baseUri;
80
        this.opmode = opmode;
81
        if(opmode.equals(Opmode.RDF_ARCHIVE)) {
82
            if(baseUri != null) {
83
                this.rdfFile = downloadAndExtract();
84
            }
85
            try {
86
                createStore();
87
            } catch (IOException e) {
88
                // TODO Auto-generated catch block
89
                e.printStackTrace();
90
            }
91
        }
92
    }
93

    
94
    /**
95
     *
96
     */
97
    private File downloadAndExtract() {
98
        File expandedFile = null;
99
        CloseableHttpClient httpClient = HttpClients.createDefault();
100
        CloseableHttpResponse response;
101
        try {
102
            // 1. download and store in local filesystem in TMP
103
            logger.debug("downloading rdf file from " + baseUri);
104
            HttpGet httpGet = new HttpGet(baseUri);
105
            response = httpClient.execute(httpGet);
106
            String archiveFileName = FilenameUtils.getName(httpGet.getURI().getRawPath());
107
            File archiveFile = new File(tmpDir, archiveFileName);
108
            FileOutputStream fout = new FileOutputStream(archiveFile);
109
            IOUtils.copy(response.getEntity().getContent(), new FileOutputStream(archiveFile));
110
            fout.close();
111
            logger.debug(archiveFile.length() + " bytes downloaded to " + archiveFile.getCanonicalPath());
112

    
113
            // 2. extract the archive
114
            FileInputStream fin = new FileInputStream(archiveFile);
115
            InputStream ain = null;
116

    
117
            if(GzipUtils.isCompressedFilename(archiveFileName)) {
118
                logger.debug("Extracting GZIP file " + archiveFile.getCanonicalPath());
119
                ain = new GzipCompressorInputStream(fin);
120
            } else {
121
                // TO UNZIP
122
                //ArchiveInputStream ain = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, fin);
123
            }
124

    
125
            expandedFile = new File(tmpDir, GzipUtils.getUncompressedFilename(archiveFileName));
126
            fout = new FileOutputStream(expandedFile);
127
            IOUtils.copy(ain, fout);
128
            fout.close();
129
            fin.close();
130
            logger.debug("Extracted to " + expandedFile.getCanonicalPath());
131
        } catch (ClientProtocolException e) {
132
            // TODO Auto-generated catch block
133
            e.printStackTrace();
134
        } catch (IOException e) {
135
            // TODO Auto-generated catch block
136
            e.printStackTrace();
137
        }
138
        return expandedFile;
139
    }
140

    
141
    /**
142
     * WARNING!!! This needs at least 1.5GB of heap space!!!
143
     * set -Xmx1500M
144
     *
145
     * NOTE: The bulkloader is a faster way to load data into an empty dataset than just using the Jena update operations.
146
     * the bulkloader also requires less memory
147
     * It is accessed through the command line utility tdbloader.
148
     *
149
     * rm /tmp/drf_tnb_store/*; bin/tdbloader2 -l /tmp/drf_tnb_store /tmp/species.rdf
150
     * @throws IOException
151
     */
152
    private void createStore() throws IOException {
153

    
154
        boolean doClearStoreLocation = rdfFile != null;
155
        boolean doLoadRdfFile = rdfFile != null;
156

    
157
        File tdbStoreFolder = new File(tmpDir, "drf_tnb_store" + File.separator);
158
        if(tdbStoreFolder.exists()) {
159
            if( doClearStoreLocation ) {
160
                FileUtils.cleanDirectory(tdbStoreFolder);
161
            }
162
        } else {
163
            tdbStoreFolder.mkdirs();
164
        }
165
        Location location = Location.create(tdbStoreFolder.toString());
166

    
167
        Dataset dataset = TDBFactory.createDataset(location);
168

    
169
        logger.info("Using TDB store at " + location);
170

    
171
        dataset.begin(ReadWrite.READ) ;
172
        // Get model inside the transaction
173
        Model model = dataset.getDefaultModel() ;
174
        logger.info("Dataset in TDB has " + dataset.asDatasetGraph().size() + " named graphs");
175
        logger.info("Model-size: " + model.size());
176
        dataset.end();
177

    
178
        if(doLoadRdfFile) {
179
            dataset.begin(ReadWrite.WRITE);
180
            model = dataset.getDefaultModel();
181
            // parse InputStream as RDF in Turtle format
182
            InputStream fin = new FileInputStream(rdfFile);
183
            logger.info("loading RDF/XML into TDB store");
184
            model.read(fin, null, "RDF/XML");
185
            logger.info("loading RDF/XML done");
186
            logger.info("Dataset in TDB has " + dataset.asDatasetGraph().size() + " named graphs");
187
            logger.info("Model-size: " + model.size());
188
            dataset.commit();
189
            dataset.end();
190
            logger.info("rdf loaded into TDB store at " + tdbStoreFolder);
191
        }
192

    
193
        this.dataset = dataset;
194
    }
195

    
196
    public String select(String queryString) throws DRFChecklistException {
197

    
198
        QueryExecution qe = executionFor(queryString);
199

    
200
        try {
201
            ResultSet results = qe.execSelect();
202
            System.err.println(ResultSetFormatter.asText(results));
203
        } catch (HttpException e) {
204
            switch(e.getResponseCode()) {
205
                // interpretation based on
206
                // http://image.slidesharecdn.com/swtss1006sparql-100614020655-phpapp02/95/semantic-web-technologies-ss-2010-06-sparql-46-728.jpg?cb=1276481316
207
                case 400:
208
                    throw new DRFChecklistException("Malformed Query ?", e);
209
                case 500:
210
                    throw new DRFChecklistException("Query Request Refused ?", e);
211
                default:
212
                    throw e;
213
            }
214
        } finally {
215
            // Important - free up resources used running the query
216
            qe.close();
217
        }
218

    
219
        return null;
220
    }
221

    
222
    public Model describe(String queryString) throws DRFChecklistException {
223

    
224
        QueryExecution qe = executionFor(queryString);
225
        Model result = null;
226
        try {
227
            if(dataset != null) {
228
                dataset.begin(ReadWrite.READ) ;
229
            }
230
            result = qe.execDescribe();
231
            if(logger.isDebugEnabled()) {
232
                result.write(System.err);
233
            }
234

    
235
        } catch (HttpException e) {
236
            switch(e.getResponseCode()) {
237
                // interpretation based on
238
                // http://image.slidesharecdn.com/swtss1006sparql-100614020655-phpapp02/95/semantic-web-technologies-ss-2010-06-sparql-46-728.jpg?cb=1276481316
239
                case 400:
240
                    throw new DRFChecklistException("Malformed Query ?", e);
241
                case 500:
242
                    throw new DRFChecklistException("Query Request Refused ?", e);
243
                default:
244
                    throw e;
245
            }
246
        } finally {
247
            // Important - free up resources used running the query
248
            qe.close();
249
        }
250

    
251
        if(result != null && logger.isDebugEnabled()) {
252
            StringBuilder msg = new StringBuilder();
253
            msg.append("subjects in response:\n");
254
            int i = 1;
255
            for(ResIterator it = result.listSubjects(); it.hasNext(); ++i) {
256
                Resource res = it.next();
257
                msg.append("    " + i + ": " + res.toString() + "\n");
258
            }
259
            logger.debug(msg.toString());
260
        }
261
        if(dataset != null) {
262
            dataset.end();
263
        }
264

    
265
        return result;
266
    }
267

    
268
    /**
269
     * @param queryString
270
     * @return
271
     */
272
    private QueryExecution executionFor(String queryString) {
273

    
274
        QueryExecution qe;
275
        if(opmode.equals(Opmode.SPARCLE_ENDPOINT)) {
276
            Query query = QueryFactory.create(queryString);
277
            qe = QueryExecutionFactory.sparqlService(baseUri, query);
278
        } else {
279
            // local TDB Store
280
            qe = QueryExecutionFactory.create(queryString, dataset);
281
        }
282
        return qe;
283
    }
284

    
285
    /**
286
     * @param subject
287
     * @param nameSpace
288
     * @param localName
289
     * @return
290
     */
291
    public RDFNode asSingleObject(Resource subject, RdfSchema nameSpace, String localName) {
292
        RDFNode node = null;
293
        Resource _subject = subject;
294
        try {
295
            boolean hasNoPropertiesInGraph = !_subject.listProperties().hasNext();
296
            if(_subject.isURIResource() && hasNoPropertiesInGraph ) {
297
                logger.debug("loading UriResource " + _subject.getURI());
298
                _subject = getFromUri(_subject.getURI());
299
            }
300
            Model _model = _subject.getModel();
301
            node = _subject.listProperties(_model.getProperty(nameSpace.schemaUri(), localName)).next().getObject();
302
        } catch (NoSuchElementException e) {
303
            if(logger.isTraceEnabled()) {
304
                logger.debug(_subject.getURI() + " " +  nameSpace + ":" + localName + " not found in current graph");
305
                printProperties(_subject);
306
            }
307
        }
308
        return node;
309
    }
310

    
311
    /**
312
     * @param subject
313
     * @param nameSpace
314
     * @param localName
315
     * @return
316
     */
317
    public String objectAsString(Resource subject, RdfSchema nameSpace, String localName) {
318
        String txt = null;
319
        RDFNode node = asSingleObject(subject, nameSpace, localName);
320
        if(node != null) {
321
            txt = node.toString();
322
        }
323
        return txt;
324
    }
325

    
326
    /**
327
     * @param subject
328
     * @param nameSpace
329
     * @param localName
330
     * @return
331
     */
332
    public Resource objectAsResource(Resource subject, RdfSchema nameSpace, String localName) {
333
        Resource resource = null;
334
        RDFNode node = asSingleObject(subject, nameSpace, localName);
335
        if(node != null) {
336
            node.isResource();
337
            resource  = node.asResource();
338
        }
339
        return resource;
340
    }
341

    
342
    /**
343
     * @param subject
344
     * @param nameSpace
345
     * @param localName
346
     * @return
347
     */
348
    public URI objectAsURI(Resource subject, RdfSchema nameSpace, String localName) {
349
        URI uri = null;
350
        RDFNode node = asSingleObject(subject, nameSpace, localName);
351
        if(node != null) {
352
            node.isURIResource();
353
            try {
354
                uri  = new URI(node.asResource().getURI());
355
            } catch (URISyntaxException e) {
356
                // this should actually never happen
357
                throw new RuntimeException(e);
358
            }
359
        }
360
        return uri;
361
    }
362

    
363
    /**
364
     * @param subject
365
     */
366
    private void printProperties(Resource subject) {
367
        for( StmtIterator it = subject.listProperties(); it.hasNext(); ) {
368
            System.err.println(it.next().toString());
369
        }
370
    }
371

    
372
    public Resource getFromUri(String uri) {
373

    
374
        Model model;
375
        if(dataset != null) {
376
            dataset.begin(ReadWrite.READ) ;
377
            model = dataset.getDefaultModel();
378
            dataset.end();
379
        } else {
380
            model = ModelFactory.createDefaultModel();
381
            model.read(uri);
382
            if(logger.isDebugEnabled()) {
383
                model.write(System.err);
384
            }
385
        }
386
        return model.getResource(uri);
387

    
388
    }
389

    
390
    /**
391
     * @param matchedResourceURI
392
     * @return
393
     * @throws DRFChecklistException
394
     */
395
    public Resource getFromUri(URI matchedResourceURI) {
396
        return getFromUri(matchedResourceURI.toString());
397
    }
398

    
399
    /**
400
     * @return the rdfFile
401
     */
402
    public File getRdfFile() {
403
        return rdfFile;
404
    }
405

    
406

    
407
}
(5-5/5)