Project

General

Profile

« Previous | Next » 

Revision fb736afc

Added by Andreas Kohlbecker over 8 years ago

externalizing TripleStore functionality from SparqlClient

View differences:

src/main/java/org/bgbm/biovel/drf/query/SparqlClient.java
9 9
package org.bgbm.biovel.drf.query;
10 10

  
11 11
import java.io.File;
12
import java.io.FileInputStream;
13
import java.io.FileOutputStream;
14
import java.io.IOException;
15
import java.io.InputStream;
16 12
import java.net.URI;
17 13
import java.net.URISyntaxException;
18 14
import java.util.ArrayList;
19 15
import java.util.List;
20 16
import java.util.NoSuchElementException;
21 17

  
22
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
23
import org.apache.commons.compress.compressors.gzip.GzipUtils;
24
import org.apache.commons.compress.utils.IOUtils;
25
import org.apache.commons.io.FileUtils;
26
import org.apache.commons.io.FilenameUtils;
27
import org.apache.http.client.ClientProtocolException;
28
import org.apache.http.client.methods.CloseableHttpResponse;
29
import org.apache.http.client.methods.HttpGet;
30
import org.apache.http.impl.client.CloseableHttpClient;
31
import org.apache.http.impl.client.HttpClients;
32 18
import org.apache.jena.atlas.web.HttpException;
33 19
import org.apache.jena.query.Dataset;
34 20
import org.apache.jena.query.Query;
......
45 31
import org.apache.jena.rdf.model.ResIterator;
46 32
import org.apache.jena.rdf.model.Resource;
47 33
import org.apache.jena.rdf.model.StmtIterator;
48
import org.apache.jena.tdb.TDBFactory;
49
import org.apache.jena.tdb.base.file.Location;
50 34
import org.bgbm.biovel.drf.checklist.DRFChecklistException;
51 35
import org.bgbm.biovel.drf.checklist.EEA_BDC_Client.RdfSchema;
36
import org.bgbm.biovel.drf.store.TripleStore;
52 37
import org.slf4j.Logger;
53 38
import org.slf4j.LoggerFactory;
54 39

  
......
65 50
    private static final File userHomeDir = new File(System.getProperty("user.home"));
66 51
    private static final File utisHome = new File(userHomeDir, ".utis");
67 52

  
68
    private Opmode opmode = null;
53
    private String baseUri = null;
69 54

  
70
    private final String baseUri;
55
    private TripleStore tripleStore = null;
71 56

  
72
    private File rdfFile = null;
73

  
74
    private Dataset dataset = null;
57
    /**
58
     * A model for caching
59
     */
60
    private final Model cache = null;
75 61

  
76
    public enum Opmode{
77
        SPARCLE_ENDPOINT, RDF_ARCHIVE;
78
    }
79 62

  
80 63
    /**
81 64
     *
82 65
     */
83
    public SparqlClient(String baseUri, Opmode opmode) {
66
    public SparqlClient(String baseUri) {
84 67
        this.baseUri = baseUri;
85
        this.opmode = opmode;
86
        if(opmode.equals(Opmode.RDF_ARCHIVE)) {
87
            if(baseUri != null) {
88
                this.rdfFile = downloadAndExtract();
89
            }
90
            try {
91
                createStore();
92
            } catch (IOException e) {
93
                // TODO Auto-generated catch block
94
                e.printStackTrace();
95
            }
96
        }
97 68
    }
98 69

  
99 70
    /**
100
     *
71
     * @param tripleStore
101 72
     */
102
    private File downloadAndExtract() {
103
        File expandedFile = null;
104
        CloseableHttpClient httpClient = HttpClients.createDefault();
105
        CloseableHttpResponse response;
106
        try {
107
            // 1. download and store in local filesystem in TMP
108
            logger.debug("downloading rdf file from " + baseUri);
109
            HttpGet httpGet = new HttpGet(baseUri);
110
            response = httpClient.execute(httpGet);
111
            String archiveFileName = FilenameUtils.getName(httpGet.getURI().getRawPath());
112
            File archiveFile = new File(tmpDir, archiveFileName);
113
            FileOutputStream fout = new FileOutputStream(archiveFile);
114
            IOUtils.copy(response.getEntity().getContent(), new FileOutputStream(archiveFile));
115
            fout.close();
116
            logger.debug(archiveFile.length() + " bytes downloaded to " + archiveFile.getCanonicalPath());
117

  
118
            // 2. extract the archive
119
            FileInputStream fin = new FileInputStream(archiveFile);
120
            InputStream ain = null;
121

  
122
            if(GzipUtils.isCompressedFilename(archiveFileName)) {
123
                logger.debug("Extracting GZIP file " + archiveFile.getCanonicalPath());
124
                ain = new GzipCompressorInputStream(fin);
125
            } else {
126
                // TO UNZIP
127
                //ArchiveInputStream ain = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, fin);
128
            }
129

  
130
            expandedFile = new File(tmpDir, GzipUtils.getUncompressedFilename(archiveFileName));
131
            fout = new FileOutputStream(expandedFile);
132
            IOUtils.copy(ain, fout);
133
            fout.close();
134
            fin.close();
135
            logger.debug("Extracted to " + expandedFile.getCanonicalPath());
136
        } catch (ClientProtocolException e) {
137
            // TODO Auto-generated catch block
138
            e.printStackTrace();
139
        } catch (IOException e) {
140
            // TODO Auto-generated catch block
141
            e.printStackTrace();
142
        }
143
        return expandedFile;
73
    public SparqlClient(TripleStore tripleStore) {
74
        this.tripleStore = tripleStore;
144 75
    }
145 76

  
146
    /**
147
     * WARNING!!! This needs at least 1.5GB of heap space!!!
148
     * set -Xmx1500M
149
     *
150
     * NOTE: The bulkloader is a faster way to load data into an empty dataset than just using the Jena update operations.
151
     * the bulkloader also requires less memory
152
     * It is accessed through the command line utility tdbloader.
153
     *
154
     * rm /tmp/drf_tnb_store/*; bin/tdbloader2 -l /tmp/drf_tnb_store /tmp/species.rdf
155
     * @throws IOException
156
     */
157
    private void createStore() throws IOException {
158

  
159
        boolean doClearStoreLocation = rdfFile != null;
160
        boolean doLoadRdfFile = rdfFile != null;
161

  
162
        File tdbStoreFolder = new File(utisHome, "tdb" + File.separator);
163
        if(tdbStoreFolder.exists()) {
164
            if( doClearStoreLocation ) {
165
                FileUtils.cleanDirectory(tdbStoreFolder);
166
            }
167
        } else {
168
            tdbStoreFolder.mkdirs();
169
        }
170
        Location location = Location.create(tdbStoreFolder.toString());
171

  
172
        Dataset dataset = TDBFactory.createDataset(location);
173

  
174
        logger.info("Using TDB store at " + location);
175

  
176
        dataset.begin(ReadWrite.READ) ;
177
        // Get model inside the transaction
178
        Model model = dataset.getDefaultModel() ;
179
        logger.info("Dataset in TDB has " + dataset.asDatasetGraph().size() + " named graphs");
180
        logger.info("Model-size: " + model.size());
181
        dataset.end();
182

  
183
        if(doLoadRdfFile) {
184
            dataset.begin(ReadWrite.WRITE);
185
            model = dataset.getDefaultModel();
186
            // parse InputStream as RDF in Turtle format
187
            InputStream fin = new FileInputStream(rdfFile);
188
            logger.info("loading RDF/XML into TDB store");
189
            model.read(fin, null, "RDF/XML");
190
            logger.info("loading RDF/XML done");
191
            logger.info("Dataset in TDB has " + dataset.asDatasetGraph().size() + " named graphs");
192
            logger.info("Model-size: " + model.size());
193
            dataset.commit();
194
            dataset.end();
195
            logger.info("rdf loaded into TDB store at " + tdbStoreFolder);
196
        }
197

  
198
        this.dataset = dataset;
199
    }
200 77

  
201 78
    public String select(String queryString) throws DRFChecklistException {
202 79

  
......
229 106
        QueryExecution qe = executionFor(queryString);
230 107
        Model result = null;
231 108
        try {
232
            if(dataset != null) {
233
                dataset.begin(ReadWrite.READ) ;
109
            if(tripleStore != null) {
110
                tripleStore.getDataset().begin(ReadWrite.READ);
234 111
            }
235 112
            result = qe.execDescribe();
236 113
            if(logger.isDebugEnabled()) {
......
263 140
            }
264 141
            logger.debug(msg.toString());
265 142
        }
266
        if(dataset != null) {
267
            dataset.end();
143
        if(tripleStore != null) {
144
            tripleStore.getDataset().end();
268 145
        }
269 146

  
270 147
        return result;
......
276 153
     */
277 154
    private QueryExecution executionFor(String queryString) {
278 155

  
279
        QueryExecution qe;
280
        if(opmode.equals(Opmode.SPARCLE_ENDPOINT)) {
156

  
157
        if(baseUri != null) {
281 158
            Query query = QueryFactory.create(queryString);
282
            qe = QueryExecutionFactory.sparqlService(baseUri, query);
283
        } else {
159
            return QueryExecutionFactory.sparqlService(baseUri, query);
160
        }
161
        if(tripleStore != null) {
284 162
            // local TDB Store
285
            qe = QueryExecutionFactory.create(queryString, dataset);
163
            return QueryExecutionFactory.create(queryString, tripleStore.getDataset());
286 164
        }
287
        return qe;
165

  
166
        return null;
288 167
    }
289 168

  
290 169
    /**
......
454 333
    public Resource getFromUri(String uri) {
455 334

  
456 335
        Model model;
457
        if(dataset != null) {
336
        if(tripleStore != null) {
337
            Dataset dataset = tripleStore.getDataset();
458 338
            dataset.begin(ReadWrite.READ) ;
459 339
            model = dataset.getDefaultModel();
460 340
            dataset.end();
......
484 364
        return getFromUri(matchedResourceURI.toString());
485 365
    }
486 366

  
487
    /**
488
     * @return the rdfFile
489
     */
490
    public File getRdfFile() {
491
        return rdfFile;
492
    }
493

  
494 367

  
495 368
}

Also available in: Unified diff