Revision fb736afc
Added by Andreas Kohlbecker over 8 years ago
src/main/java/org/bgbm/biovel/drf/query/SparqlClient.java | ||
---|---|---|
9 | 9 |
package org.bgbm.biovel.drf.query; |
10 | 10 |
|
11 | 11 |
import java.io.File; |
12 |
import java.io.FileInputStream; |
|
13 |
import java.io.FileOutputStream; |
|
14 |
import java.io.IOException; |
|
15 |
import java.io.InputStream; |
|
16 | 12 |
import java.net.URI; |
17 | 13 |
import java.net.URISyntaxException; |
18 | 14 |
import java.util.ArrayList; |
19 | 15 |
import java.util.List; |
20 | 16 |
import java.util.NoSuchElementException; |
21 | 17 |
|
22 |
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; |
|
23 |
import org.apache.commons.compress.compressors.gzip.GzipUtils; |
|
24 |
import org.apache.commons.compress.utils.IOUtils; |
|
25 |
import org.apache.commons.io.FileUtils; |
|
26 |
import org.apache.commons.io.FilenameUtils; |
|
27 |
import org.apache.http.client.ClientProtocolException; |
|
28 |
import org.apache.http.client.methods.CloseableHttpResponse; |
|
29 |
import org.apache.http.client.methods.HttpGet; |
|
30 |
import org.apache.http.impl.client.CloseableHttpClient; |
|
31 |
import org.apache.http.impl.client.HttpClients; |
|
32 | 18 |
import org.apache.jena.atlas.web.HttpException; |
33 | 19 |
import org.apache.jena.query.Dataset; |
34 | 20 |
import org.apache.jena.query.Query; |
... | ... | |
45 | 31 |
import org.apache.jena.rdf.model.ResIterator; |
46 | 32 |
import org.apache.jena.rdf.model.Resource; |
47 | 33 |
import org.apache.jena.rdf.model.StmtIterator; |
48 |
import org.apache.jena.tdb.TDBFactory; |
|
49 |
import org.apache.jena.tdb.base.file.Location; |
|
50 | 34 |
import org.bgbm.biovel.drf.checklist.DRFChecklistException; |
51 | 35 |
import org.bgbm.biovel.drf.checklist.EEA_BDC_Client.RdfSchema; |
36 |
import org.bgbm.biovel.drf.store.TripleStore; |
|
52 | 37 |
import org.slf4j.Logger; |
53 | 38 |
import org.slf4j.LoggerFactory; |
54 | 39 |
|
... | ... | |
65 | 50 |
private static final File userHomeDir = new File(System.getProperty("user.home")); |
66 | 51 |
private static final File utisHome = new File(userHomeDir, ".utis"); |
67 | 52 |
|
68 |
private Opmode opmode = null;
|
|
53 |
private String baseUri = null;
|
|
69 | 54 |
|
70 |
private final String baseUri;
|
|
55 |
private TripleStore tripleStore = null;
|
|
71 | 56 |
|
72 |
private File rdfFile = null; |
|
73 |
|
|
74 |
private Dataset dataset = null; |
|
57 |
/** |
|
58 |
* A model for caching |
|
59 |
*/ |
|
60 |
private final Model cache = null; |
|
75 | 61 |
|
76 |
public enum Opmode{ |
|
77 |
SPARCLE_ENDPOINT, RDF_ARCHIVE; |
|
78 |
} |
|
79 | 62 |
|
80 | 63 |
/** |
81 | 64 |
* |
82 | 65 |
*/ |
83 |
public SparqlClient(String baseUri, Opmode opmode) {
|
|
66 |
public SparqlClient(String baseUri) { |
|
84 | 67 |
this.baseUri = baseUri; |
85 |
this.opmode = opmode; |
|
86 |
if(opmode.equals(Opmode.RDF_ARCHIVE)) { |
|
87 |
if(baseUri != null) { |
|
88 |
this.rdfFile = downloadAndExtract(); |
|
89 |
} |
|
90 |
try { |
|
91 |
createStore(); |
|
92 |
} catch (IOException e) { |
|
93 |
// TODO Auto-generated catch block |
|
94 |
e.printStackTrace(); |
|
95 |
} |
|
96 |
} |
|
97 | 68 |
} |
98 | 69 |
|
99 | 70 |
/** |
100 |
* |
|
71 |
* @param tripleStore
|
|
101 | 72 |
*/ |
102 |
private File downloadAndExtract() { |
|
103 |
File expandedFile = null; |
|
104 |
CloseableHttpClient httpClient = HttpClients.createDefault(); |
|
105 |
CloseableHttpResponse response; |
|
106 |
try { |
|
107 |
// 1. download and store in local filesystem in TMP |
|
108 |
logger.debug("downloading rdf file from " + baseUri); |
|
109 |
HttpGet httpGet = new HttpGet(baseUri); |
|
110 |
response = httpClient.execute(httpGet); |
|
111 |
String archiveFileName = FilenameUtils.getName(httpGet.getURI().getRawPath()); |
|
112 |
File archiveFile = new File(tmpDir, archiveFileName); |
|
113 |
FileOutputStream fout = new FileOutputStream(archiveFile); |
|
114 |
IOUtils.copy(response.getEntity().getContent(), new FileOutputStream(archiveFile)); |
|
115 |
fout.close(); |
|
116 |
logger.debug(archiveFile.length() + " bytes downloaded to " + archiveFile.getCanonicalPath()); |
|
117 |
|
|
118 |
// 2. extract the archive |
|
119 |
FileInputStream fin = new FileInputStream(archiveFile); |
|
120 |
InputStream ain = null; |
|
121 |
|
|
122 |
if(GzipUtils.isCompressedFilename(archiveFileName)) { |
|
123 |
logger.debug("Extracting GZIP file " + archiveFile.getCanonicalPath()); |
|
124 |
ain = new GzipCompressorInputStream(fin); |
|
125 |
} else { |
|
126 |
// TO UNZIP |
|
127 |
//ArchiveInputStream ain = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, fin); |
|
128 |
} |
|
129 |
|
|
130 |
expandedFile = new File(tmpDir, GzipUtils.getUncompressedFilename(archiveFileName)); |
|
131 |
fout = new FileOutputStream(expandedFile); |
|
132 |
IOUtils.copy(ain, fout); |
|
133 |
fout.close(); |
|
134 |
fin.close(); |
|
135 |
logger.debug("Extracted to " + expandedFile.getCanonicalPath()); |
|
136 |
} catch (ClientProtocolException e) { |
|
137 |
// TODO Auto-generated catch block |
|
138 |
e.printStackTrace(); |
|
139 |
} catch (IOException e) { |
|
140 |
// TODO Auto-generated catch block |
|
141 |
e.printStackTrace(); |
|
142 |
} |
|
143 |
return expandedFile; |
|
73 |
public SparqlClient(TripleStore tripleStore) { |
|
74 |
this.tripleStore = tripleStore; |
|
144 | 75 |
} |
145 | 76 |
|
146 |
/** |
|
147 |
* WARNING!!! This needs at least 1.5GB of heap space!!! |
|
148 |
* set -Xmx1500M |
|
149 |
* |
|
150 |
* NOTE: The bulkloader is a faster way to load data into an empty dataset than just using the Jena update operations. |
|
151 |
* the bulkloader also requires less memory |
|
152 |
* It is accessed through the command line utility tdbloader. |
|
153 |
* |
|
154 |
* rm /tmp/drf_tnb_store/*; bin/tdbloader2 -l /tmp/drf_tnb_store /tmp/species.rdf |
|
155 |
* @throws IOException |
|
156 |
*/ |
|
157 |
private void createStore() throws IOException { |
|
158 |
|
|
159 |
boolean doClearStoreLocation = rdfFile != null; |
|
160 |
boolean doLoadRdfFile = rdfFile != null; |
|
161 |
|
|
162 |
File tdbStoreFolder = new File(utisHome, "tdb" + File.separator); |
|
163 |
if(tdbStoreFolder.exists()) { |
|
164 |
if( doClearStoreLocation ) { |
|
165 |
FileUtils.cleanDirectory(tdbStoreFolder); |
|
166 |
} |
|
167 |
} else { |
|
168 |
tdbStoreFolder.mkdirs(); |
|
169 |
} |
|
170 |
Location location = Location.create(tdbStoreFolder.toString()); |
|
171 |
|
|
172 |
Dataset dataset = TDBFactory.createDataset(location); |
|
173 |
|
|
174 |
logger.info("Using TDB store at " + location); |
|
175 |
|
|
176 |
dataset.begin(ReadWrite.READ) ; |
|
177 |
// Get model inside the transaction |
|
178 |
Model model = dataset.getDefaultModel() ; |
|
179 |
logger.info("Dataset in TDB has " + dataset.asDatasetGraph().size() + " named graphs"); |
|
180 |
logger.info("Model-size: " + model.size()); |
|
181 |
dataset.end(); |
|
182 |
|
|
183 |
if(doLoadRdfFile) { |
|
184 |
dataset.begin(ReadWrite.WRITE); |
|
185 |
model = dataset.getDefaultModel(); |
|
186 |
// parse InputStream as RDF in Turtle format |
|
187 |
InputStream fin = new FileInputStream(rdfFile); |
|
188 |
logger.info("loading RDF/XML into TDB store"); |
|
189 |
model.read(fin, null, "RDF/XML"); |
|
190 |
logger.info("loading RDF/XML done"); |
|
191 |
logger.info("Dataset in TDB has " + dataset.asDatasetGraph().size() + " named graphs"); |
|
192 |
logger.info("Model-size: " + model.size()); |
|
193 |
dataset.commit(); |
|
194 |
dataset.end(); |
|
195 |
logger.info("rdf loaded into TDB store at " + tdbStoreFolder); |
|
196 |
} |
|
197 |
|
|
198 |
this.dataset = dataset; |
|
199 |
} |
|
200 | 77 |
|
201 | 78 |
public String select(String queryString) throws DRFChecklistException { |
202 | 79 |
|
... | ... | |
229 | 106 |
QueryExecution qe = executionFor(queryString); |
230 | 107 |
Model result = null; |
231 | 108 |
try { |
232 |
if(dataset != null) {
|
|
233 |
dataset.begin(ReadWrite.READ) ;
|
|
109 |
if(tripleStore != null) {
|
|
110 |
tripleStore.getDataset().begin(ReadWrite.READ);
|
|
234 | 111 |
} |
235 | 112 |
result = qe.execDescribe(); |
236 | 113 |
if(logger.isDebugEnabled()) { |
... | ... | |
263 | 140 |
} |
264 | 141 |
logger.debug(msg.toString()); |
265 | 142 |
} |
266 |
if(dataset != null) {
|
|
267 |
dataset.end();
|
|
143 |
if(tripleStore != null) {
|
|
144 |
tripleStore.getDataset().end();
|
|
268 | 145 |
} |
269 | 146 |
|
270 | 147 |
return result; |
... | ... | |
276 | 153 |
*/ |
277 | 154 |
private QueryExecution executionFor(String queryString) { |
278 | 155 |
|
279 |
QueryExecution qe; |
|
280 |
if(opmode.equals(Opmode.SPARCLE_ENDPOINT)) {
|
|
156 |
|
|
157 |
if(baseUri != null) {
|
|
281 | 158 |
Query query = QueryFactory.create(queryString); |
282 |
qe = QueryExecutionFactory.sparqlService(baseUri, query); |
|
283 |
} else { |
|
159 |
return QueryExecutionFactory.sparqlService(baseUri, query); |
|
160 |
} |
|
161 |
if(tripleStore != null) { |
|
284 | 162 |
// local TDB Store |
285 |
qe = QueryExecutionFactory.create(queryString, dataset);
|
|
163 |
return QueryExecutionFactory.create(queryString, tripleStore.getDataset());
|
|
286 | 164 |
} |
287 |
return qe; |
|
165 |
|
|
166 |
return null; |
|
288 | 167 |
} |
289 | 168 |
|
290 | 169 |
/** |
... | ... | |
454 | 333 |
public Resource getFromUri(String uri) { |
455 | 334 |
|
456 | 335 |
Model model; |
457 |
if(dataset != null) { |
|
336 |
if(tripleStore != null) { |
|
337 |
Dataset dataset = tripleStore.getDataset(); |
|
458 | 338 |
dataset.begin(ReadWrite.READ) ; |
459 | 339 |
model = dataset.getDefaultModel(); |
460 | 340 |
dataset.end(); |
... | ... | |
484 | 364 |
return getFromUri(matchedResourceURI.toString()); |
485 | 365 |
} |
486 | 366 |
|
487 |
/** |
|
488 |
* @return the rdfFile |
|
489 |
*/ |
|
490 |
public File getRdfFile() { |
|
491 |
return rdfFile; |
|
492 |
} |
|
493 |
|
|
494 | 367 |
|
495 | 368 |
} |
Also available in: Unified diff
externalizing TripleStore functionality from SparqlClient