Project

General

Profile

Download (13.2 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2015 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9
package org.bgbm.biovel.drf.query;
10

    
11
import java.io.File;
12
import java.io.FileInputStream;
13
import java.io.FileOutputStream;
14
import java.io.IOException;
15
import java.io.InputStream;
16
import java.net.URI;
17
import java.net.URISyntaxException;
18
import java.util.NoSuchElementException;
19

    
20
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
21
import org.apache.commons.compress.compressors.gzip.GzipUtils;
22
import org.apache.commons.compress.utils.IOUtils;
23
import org.apache.commons.io.FileUtils;
24
import org.apache.commons.io.FilenameUtils;
25
import org.apache.http.client.ClientProtocolException;
26
import org.apache.http.client.methods.CloseableHttpResponse;
27
import org.apache.http.client.methods.HttpGet;
28
import org.apache.http.impl.client.CloseableHttpClient;
29
import org.apache.http.impl.client.HttpClients;
30
import org.apache.jena.atlas.web.HttpException;
31
import org.apache.jena.query.Dataset;
32
import org.apache.jena.query.Query;
33
import org.apache.jena.query.QueryExecution;
34
import org.apache.jena.query.QueryExecutionFactory;
35
import org.apache.jena.query.QueryFactory;
36
import org.apache.jena.query.ReadWrite;
37
import org.apache.jena.query.ResultSet;
38
import org.apache.jena.query.ResultSetFormatter;
39
import org.apache.jena.rdf.model.Model;
40
import org.apache.jena.rdf.model.ModelFactory;
41
import org.apache.jena.rdf.model.RDFNode;
42
import org.apache.jena.rdf.model.ResIterator;
43
import org.apache.jena.rdf.model.Resource;
44
import org.apache.jena.rdf.model.StmtIterator;
45
import org.apache.jena.tdb.TDBFactory;
46
import org.apache.jena.tdb.base.file.Location;
47
import org.bgbm.biovel.drf.checklist.DRFChecklistException;
48
import org.bgbm.biovel.drf.checklist.EEA_BDC_Client.RdfSchema;
49
import org.slf4j.Logger;
50
import org.slf4j.LoggerFactory;
51

    
52
/**
53
 * @author a.kohlbecker
54
 * @date Sep 30, 2015
55
 *
56
 */
57
public class SparqlClient implements IQueryClient {
58

    
59
    protected Logger logger = LoggerFactory.getLogger(SparqlClient.class);
60

    
61
    private static final File tmpDir = new File(System.getProperty("java.io.tmpdir"));
62

    
63
    private Opmode opmode = null;
64

    
65
    private final String baseUri;
66

    
67
    private File rdfFile = null;
68

    
69
    private final Model model = null;
70

    
71
    private Dataset dataset = null;
72

    
73
    public enum Opmode{
74
        SPARCLE_ENDPOINT, RDF_ARCHIVE;
75
    }
76

    
77
    /**
78
     *
79
     */
80
    public SparqlClient(String baseUri, Opmode opmode) {
81
        this.baseUri = baseUri;
82
        this.opmode = opmode;
83
        if(opmode.equals(Opmode.RDF_ARCHIVE)) {
84
            if(baseUri != null) {
85
                this.rdfFile = downloadAndExtract();
86
            }
87
            try {
88
                createStore();
89
//                loadModel();
90
            } catch (IOException e) {
91
                // TODO Auto-generated catch block
92
                e.printStackTrace();
93
            }
94
        }
95
    }
96

    
97
    /**
98
     *
99
     */
100
    private File downloadAndExtract() {
101
        File expandedFile = null;
102
        CloseableHttpClient httpClient = HttpClients.createDefault();
103
        CloseableHttpResponse response;
104
        try {
105
            // 1. download and store in local filesystem in TMP
106
            logger.debug("downloading rdf file from " + baseUri);
107
            HttpGet httpGet = new HttpGet(baseUri);
108
            response = httpClient.execute(httpGet);
109
            String archiveFileName = FilenameUtils.getName(httpGet.getURI().getRawPath());
110
            File archiveFile = new File(tmpDir, archiveFileName);
111
            FileOutputStream fout = new FileOutputStream(archiveFile);
112
            IOUtils.copy(response.getEntity().getContent(), new FileOutputStream(archiveFile));
113
            fout.close();
114
            logger.debug(archiveFile.length() + " bytes downloaded to " + archiveFile.getCanonicalPath());
115

    
116
            // 2. extract the archive
117
            FileInputStream fin = new FileInputStream(archiveFile);
118
            InputStream ain = null;
119

    
120
            if(GzipUtils.isCompressedFilename(archiveFileName)) {
121
                logger.debug("Extracting GZIP file " + archiveFile.getCanonicalPath());
122
                ain = new GzipCompressorInputStream(fin);
123
            } else {
124
                // TO UNZIP
125
                //ArchiveInputStream ain = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, fin);
126
            }
127

    
128
            expandedFile = new File(tmpDir, GzipUtils.getUncompressedFilename(archiveFileName));
129
            fout = new FileOutputStream(expandedFile);
130
            IOUtils.copy(ain, fout);
131
            fout.close();
132
            fin.close();
133
            logger.debug("Extracted to " + expandedFile.getCanonicalPath());
134
        } catch (ClientProtocolException e) {
135
            // TODO Auto-generated catch block
136
            e.printStackTrace();
137
        } catch (IOException e) {
138
            // TODO Auto-generated catch block
139
            e.printStackTrace();
140
        }
141
        return expandedFile;
142
    }
143

    
144
    /**
145
     * WARNING!!! This needs at least 1.5GB of heap space!!!
146
     * set -Xmx1500M
147
     *
148
     * NOTE: The bulkloader is a faster way to load data into an empty dataset than just using the Jena update operations.
149
     * the bulkloader also requires less memory
150
     * It is accessed through the command line utility tdbloader.
151
     * @throws IOException
152
     */
153
    private void createStore() throws IOException {
154

    
155
        File tdbStoreFolder = new File(tmpDir, "drf_tnb_store" + File.separator);
156
        if(tdbStoreFolder.exists()) {
157
            if(rdfFile != null) {
158
                FileUtils.cleanDirectory(tdbStoreFolder);
159
            }
160
        } else {
161
            tdbStoreFolder.mkdirs();
162
        }
163
        Location location = Location.create(tdbStoreFolder.toString());
164

    
165
        Dataset dataset = TDBFactory.createDataset(location);
166

    
167
        dataset.begin(ReadWrite.READ) ;
168
        // Get model inside the transaction
169
        Model model = dataset.getDefaultModel() ;
170
        dataset.end();
171

    
172
        if(rdfFile != null) {
173
            dataset.begin(ReadWrite.WRITE);
174
            model = dataset.getDefaultModel();
175
            // parse InputStream as RDF in Turtle format
176
            InputStream fin = new FileInputStream(rdfFile);
177
            logger.info("loading DRF/XML into TDB store");
178
            model.read(fin, null, "RDF/XML");
179
            logger.info("loading DRF/XML done");
180
            dataset.end();
181
            logger.info("rdf loaded into TDB store at " + tdbStoreFolder);
182
        }
183

    
184
        this.dataset = dataset;
185
    }
186

    
187
    public String select(String queryString) throws DRFChecklistException {
188

    
189
        QueryExecution qe = executionFor(queryString);
190

    
191
        try {
192
            ResultSet results = qe.execSelect();
193
            System.err.println(ResultSetFormatter.asText(results));
194
        } catch (HttpException e) {
195
            switch(e.getResponseCode()) {
196
                // interpretation based on
197
                // http://image.slidesharecdn.com/swtss1006sparql-100614020655-phpapp02/95/semantic-web-technologies-ss-2010-06-sparql-46-728.jpg?cb=1276481316
198
                case 400:
199
                    throw new DRFChecklistException("Malformed Query ?", e);
200
                case 500:
201
                    throw new DRFChecklistException("Query Request Refused ?", e);
202
                default:
203
                    throw e;
204
            }
205
        } finally {
206
            // Important - free up resources used running the query
207
            qe.close();
208
        }
209

    
210
        return null;
211
    }
212

    
213
    public Model describe(String queryString) throws DRFChecklistException {
214

    
215
        QueryExecution qe = executionFor(queryString);
216
        Model model = null;
217
        try {
218
            model = qe.execDescribe();
219
            if(dataset != null) {
220
                dataset.begin(ReadWrite.READ) ;
221
            }
222
            if(dataset== null && logger.isDebugEnabled()) {
223
                model.write(System.err);
224
            }
225
            if(dataset != null) {
226
                dataset.end();
227
            }
228

    
229
        } catch (HttpException e) {
230
            switch(e.getResponseCode()) {
231
                // interpretation based on
232
                // http://image.slidesharecdn.com/swtss1006sparql-100614020655-phpapp02/95/semantic-web-technologies-ss-2010-06-sparql-46-728.jpg?cb=1276481316
233
                case 400:
234
                    throw new DRFChecklistException("Malformed Query ?", e);
235
                case 500:
236
                    throw new DRFChecklistException("Query Request Refused ?", e);
237
                default:
238
                    throw e;
239
            }
240
        } finally {
241
            // Important - free up resources used running the query
242
            qe.close();
243
        }
244

    
245
        if(model != null && logger.isDebugEnabled()) {
246
            StringBuilder msg = new StringBuilder();
247
            msg.append("subjects in response:\n");
248
            int i = 1;
249
            for(ResIterator it = model.listSubjects(); it.hasNext(); ++i) {
250
                Resource res = it.next();
251
                msg.append("    " + i + ": " + res.toString() + "\n");
252
            }
253
            logger.debug(msg.toString());
254
        }
255

    
256
        return model;
257
    }
258

    
259
    /**
260
     * @param queryString
261
     * @return
262
     */
263
    private QueryExecution executionFor(String queryString) {
264

    
265
        Query query = QueryFactory.create(queryString);
266

    
267
        QueryExecution qe;
268
        // Execute the query and obtain results
269
        if(opmode.equals(Opmode.SPARCLE_ENDPOINT)) {
270
            qe = QueryExecutionFactory.sparqlService(baseUri, query);
271
        } else {
272
            // RDF_ARCHIVE
273
            if(model != null) {
274
                // in-memory model
275
                qe = QueryExecutionFactory.create(queryString, model);
276
            } else if(dataset != null) {
277
                // TDB Store
278
                qe = QueryExecutionFactory.create(queryString, dataset);
279
            } else {
280
                throw new RuntimeException("Opmode is RDF_ARCHIVE but model was null");
281
            }
282
        }
283
        return qe;
284
    }
285

    
286
    /**
287
     * @param subject
288
     * @param nameSpace
289
     * @param localName
290
     * @return
291
     */
292
    public RDFNode asSingleObject(Resource subject, RdfSchema nameSpace, String localName) {
293
        RDFNode node = null;
294
        Resource _subject = subject;
295
        try {
296
            boolean hasNoPropertiesInGraph = !_subject.listProperties().hasNext();
297
            if(_subject.isURIResource() && hasNoPropertiesInGraph ) {
298
                logger.debug("loading RDF for UriResource " + _subject.getURI());
299
                _subject = getFromUri(_subject.getURI());
300
            }
301
            Model _model = _subject.getModel();
302
            node = _subject.listProperties(_model.getProperty(nameSpace.schemaUri(), localName)).next().getObject();
303
        } catch (NoSuchElementException e) {
304
            if(logger.isDebugEnabled()) {
305
                logger.debug(_subject.getURI() + " " +  nameSpace + ":" + localName + " not found in current graph");
306
                printProperties(_subject);
307
            }
308
        }
309
        return node;
310
    }
311

    
312
    /**
313
     * @param subject
314
     * @param nameSpace
315
     * @param localName
316
     * @return
317
     */
318
    public String objectAsString(Resource subject, RdfSchema nameSpace, String localName) {
319
        String txt = null;
320
        RDFNode node = asSingleObject(subject, nameSpace, localName);
321
        if(node != null) {
322
            txt = node.toString();
323
        }
324
        return txt;
325
    }
326

    
327
    /**
328
     * @param subject
329
     * @param nameSpace
330
     * @param localName
331
     * @return
332
     */
333
    public Resource objectAsResource(Resource subject, RdfSchema nameSpace, String localName) {
334
        Resource resource = null;
335
        RDFNode node = asSingleObject(subject, nameSpace, localName);
336
        if(node != null) {
337
            node.isResource();
338
            resource  = node.asResource();
339
        }
340
        return resource;
341
    }
342

    
343
    /**
344
     * @param subject
345
     * @param nameSpace
346
     * @param localName
347
     * @return
348
     */
349
    public URI objectAsURI(Resource subject, RdfSchema nameSpace, String localName) {
350
        URI uri = null;
351
        RDFNode node = asSingleObject(subject, nameSpace, localName);
352
        if(node != null) {
353
            node.isURIResource();
354
            try {
355
                uri  = new URI(node.asResource().getURI());
356
            } catch (URISyntaxException e) {
357
                // this should actually never happen
358
                throw new RuntimeException(e);
359
            }
360
        }
361
        return uri;
362
    }
363

    
364
    /**
365
     * @param subject
366
     */
367
    private void printProperties(Resource subject) {
368
        for( StmtIterator it = subject.listProperties(); it.hasNext(); ) {
369
            System.err.println(it.next().toString());
370
        }
371
    }
372

    
373
    public Resource getFromUri(String uri) {
374

    
375
        final Model model = ModelFactory.createDefaultModel();
376
        model.read(uri);
377
        if(logger.isDebugEnabled()) {
378
            model.write(System.err);
379
        }
380
        return model.getResource(uri);
381

    
382
    }
383

    
384
    /**
385
     * @param matchedResourceURI
386
     * @return
387
     * @throws DRFChecklistException
388
     */
389
    public Resource getFromUri(URI matchedResourceURI) {
390
        return getFromUri(matchedResourceURI.toString());
391
    }
392

    
393
    /**
394
     * @return the rdfFile
395
     */
396
    public File getRdfFile() {
397
        return rdfFile;
398
    }
399

    
400

    
401
}
(5-5/5)