Project

General

Profile

Download (6.37 KB) Statistics
| Branch: | Tag: | Revision:
1
// $Id$
2
/**
3
* Copyright (C) 2015 EDIT
4
* European Distributed Institute of Taxonomy
5
* http://www.e-taxonomy.eu
6
*
7
* The contents of this file are subject to the Mozilla Public License Version 1.1
8
* See LICENSE.TXT at the top of this package for the full license terms.
9
*/
10
package org.bgbm.biovel.drf.store;
11

    
12
import java.io.File;
13
import java.io.FileInputStream;
14
import java.io.FileOutputStream;
15
import java.io.IOException;
16
import java.io.InputStream;
17

    
18
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
19
import org.apache.commons.compress.compressors.gzip.GzipUtils;
20
import org.apache.commons.compress.utils.IOUtils;
21
import org.apache.commons.io.FileUtils;
22
import org.apache.commons.io.FilenameUtils;
23
import org.apache.http.client.ClientProtocolException;
24
import org.apache.http.client.methods.CloseableHttpResponse;
25
import org.apache.http.client.methods.HttpGet;
26
import org.apache.http.impl.client.CloseableHttpClient;
27
import org.apache.http.impl.client.HttpClients;
28
import org.apache.jena.query.Dataset;
29
import org.apache.jena.query.ReadWrite;
30
import org.apache.jena.rdf.model.Model;
31
import org.apache.jena.tdb.TDBFactory;
32
import org.apache.jena.tdb.base.file.Location;
33
import org.slf4j.Logger;
34
import org.slf4j.LoggerFactory;
35

    
36
/**
37
 * @author a.kohlbecker
38
 * @date Oct 19, 2015
39
 *
40
 */
41
public class TripleStore {
42

    
43
    protected Logger logger = LoggerFactory.getLogger(TripleStore.class);
44

    
45
    private static final File tmpDir = new File(System.getProperty("java.io.tmpdir"));
46
    private static final File userHomeDir = new File(System.getProperty("user.home"));
47
    private static final File utisHome = new File(userHomeDir, ".utis");
48

    
49
    private File rdfFile = null;
50

    
51
    private Dataset dataset = null;
52

    
53

    
54
    public TripleStore() {
55

    
56
    }
57

    
58

    
59
    /**
60
     *
61
     * @param rdfFileUri
62
     *  the location of the file to load the rdf triples from
63
     * @throws IOException
64
     */
65
    public void loadIntoStore(String rdfFileUri) throws IOException {
66
        this.rdfFile = downloadAndExtract(rdfFileUri);
67
        createStore();
68
    }
69

    
70

    
71
    /**
72
     * WARNING!!! This needs at least 1.5GB of heap space!!!
73
     * set -Xmx1500M
74
     *
75
     * NOTE: The bulkloader is a faster way to load data into an empty dataset than just using the Jena update operations.
76
     * the bulkloader also requires less memory
77
     * It is accessed through the command line utility tdbloader.
78
     *
79
     * rm /tmp/drf_tnb_store/*; bin/tdbloader2 -l /tmp/drf_tnb_store /tmp/species.rdf
80
     * @throws IOException
81
     */
82
    private void createStore() throws IOException {
83

    
84
        boolean doClearStoreLocation = rdfFile != null;
85
        boolean doLoadRdfFile = rdfFile != null;
86

    
87
        File tdbStoreFolder = new File(utisHome, "tdb" + File.separator);
88
        if(tdbStoreFolder.exists()) {
89
            if( doClearStoreLocation ) {
90
                FileUtils.cleanDirectory(tdbStoreFolder);
91
            }
92
        } else {
93
            tdbStoreFolder.mkdirs();
94
        }
95
        Location location = Location.create(tdbStoreFolder.toString());
96

    
97
        Dataset dataset = TDBFactory.createDataset(location);
98

    
99
        logger.info("Using TDB store at " + location);
100

    
101
        dataset.begin(ReadWrite.READ) ;
102
        // Get model inside the transaction
103
        Model model = dataset.getDefaultModel() ;
104
        logger.info("Dataset in TDB has " + dataset.asDatasetGraph().size() + " named graphs");
105
        logger.info("Model-size: " + model.size());
106
        dataset.end();
107

    
108
        if(doLoadRdfFile) {
109
            dataset.begin(ReadWrite.WRITE);
110
            model = dataset.getDefaultModel();
111
            // parse InputStream as RDF in Turtle format
112
            InputStream fin = new FileInputStream(rdfFile);
113
            logger.info("loading RDF/XML into TDB store");
114
            model.read(fin, null, "RDF/XML");
115
            logger.info("loading RDF/XML done");
116
            logger.info("Dataset in TDB has " + dataset.asDatasetGraph().size() + " named graphs");
117
            logger.info("Model-size: " + model.size());
118
            dataset.commit();
119
            dataset.end();
120
            logger.info("rdf loaded into TDB store at " + tdbStoreFolder);
121
        }
122

    
123
        this.setDataset(dataset);
124
    }
125

    
126
    /**
127
     * @param rdfFileUri
128
    *
129
    */
130
   private File downloadAndExtract(String rdfFileUri) {
131
       File expandedFile = null;
132
       CloseableHttpClient httpClient = HttpClients.createDefault();
133
       CloseableHttpResponse response;
134
       try {
135
           // 1. download and store in local filesystem in TMP
136
           logger.debug("downloading rdf file from " + rdfFileUri);
137
           HttpGet httpGet = new HttpGet(rdfFileUri);
138
           response = httpClient.execute(httpGet);
139
           String archiveFileName = FilenameUtils.getName(httpGet.getURI().getRawPath());
140
           File archiveFile = new File(tmpDir, archiveFileName);
141
           FileOutputStream fout = new FileOutputStream(archiveFile);
142
           IOUtils.copy(response.getEntity().getContent(), new FileOutputStream(archiveFile));
143
           fout.close();
144
           logger.debug(archiveFile.length() + " bytes downloaded to " + archiveFile.getCanonicalPath());
145

    
146
           // 2. extract the archive
147
           FileInputStream fin = new FileInputStream(archiveFile);
148
           InputStream ain = null;
149

    
150
           if(GzipUtils.isCompressedFilename(archiveFileName)) {
151
               logger.debug("Extracting GZIP file " + archiveFile.getCanonicalPath());
152
               ain = new GzipCompressorInputStream(fin);
153
           } else {
154
               // TO UNZIP
155
               //ArchiveInputStream ain = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, fin);
156
           }
157

    
158
           expandedFile = new File(tmpDir, GzipUtils.getUncompressedFilename(archiveFileName));
159
           fout = new FileOutputStream(expandedFile);
160
           IOUtils.copy(ain, fout);
161
           fout.close();
162
           fin.close();
163
           logger.debug("Extracted to " + expandedFile.getCanonicalPath());
164
       } catch (ClientProtocolException e) {
165
           // TODO Auto-generated catch block
166
           e.printStackTrace();
167
       } catch (IOException e) {
168
           // TODO Auto-generated catch block
169
           e.printStackTrace();
170
       }
171
       return expandedFile;
172
   }
173

    
174

    
175
    /**
176
     * @return the dataset
177
     */
178
    public Dataset getDataset() {
179
        return dataset;
180
    }
181

    
182

    
183
    /**
184
     * @param dataset the dataset to set
185
     */
186
    public void setDataset(Dataset dataset) {
187
        this.dataset = dataset;
188
    }
189

    
190
}
    (1-1/1)