Project

General

Profile

Download (24.1 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2014 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.ext.occurrence.gbif;
10

    
11
import java.io.IOException;
12
import java.io.InputStream;
13
import java.io.StringWriter;
14
import java.net.MalformedURLException;
15
import java.net.URISyntaxException;
16
import java.net.URL;
17
import java.nio.charset.Charset;
18
import java.text.ParseException;
19
import java.util.ArrayList;
20
import java.util.Collection;
21

    
22
import org.apache.commons.io.IOUtils;
23
import org.apache.http.HttpException;
24
import org.apache.log4j.Logger;
25

    
26
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
27
import eu.etaxonomy.cdm.api.service.media.MediaInfoFileReader;
28
import eu.etaxonomy.cdm.common.URI;
29
import eu.etaxonomy.cdm.common.UriUtils;
30
import eu.etaxonomy.cdm.common.media.CdmImageInfo;
31
import eu.etaxonomy.cdm.model.agent.Institution;
32
import eu.etaxonomy.cdm.model.agent.Person;
33
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
34
import eu.etaxonomy.cdm.model.common.TimePeriod;
35
import eu.etaxonomy.cdm.model.location.Country;
36
import eu.etaxonomy.cdm.model.location.Point;
37
import eu.etaxonomy.cdm.model.location.ReferenceSystem;
38
import eu.etaxonomy.cdm.model.media.ImageFile;
39
import eu.etaxonomy.cdm.model.media.Media;
40
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
41
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
42
import eu.etaxonomy.cdm.model.name.Rank;
43
import eu.etaxonomy.cdm.model.name.TaxonName;
44
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
45
import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
46
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
47
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
48
import net.sf.json.JSONArray;
49
import net.sf.json.JSONObject;
50

    
51
/**
52
 * Utility class which provides the functionality to convert a JSON response
53
 * resulting from a GBIF query for occurrences to the corresponding CDM entities.
54
 * @author pplitzner
55
 * @since 22.05.2014
56
 */
57
public class GbifJsonOccurrenceParser {
58

    
59
    private static final Logger logger = Logger.getLogger(GbifJsonOccurrenceParser.class);
60

    
61
    private static final String DATASET_KEY = "datasetKey";
62
    private static final String DATASET_PROTOCOL = "protocol";
63

    
64
    private static final String KEY = "key";
65
    private static final String URL = "url";
66
    private static final String TYPE = "type";
67

    
68
    private static final String COUNTRY_CODE = "countryCode";
69
    private static final String LOCALITY = "locality";
70
    private static final String LONGITUDE = "decimalLongitude";
71
    private static final String LATITUDE = "decimalLatitude";
72
    private static final String GEOREFERENCE_PROTOCOL = "georeferenceProtocol";//reference system
73
    private static final String VERBATIM_ELEVATION = "verbatimElevation";
74
    private static final String YEAR = "year";
75
    private static final String MONTH = "month";
76
    private static final String DAY = "day";
77
    private static final String EVENT_DATE= "eventDate";
78
    private static final String RECORDED_BY= "recordedBy";//collector
79
    private static final String RECORD_NUMBER = "recordNumber";//collector number
80
    private static final String FIELD_NUMBER = "fieldNumber";//collector number
81
    private static final String EVENT_REMARKS = "eventRemarks";//gathering event description
82
    private static final String OCCURRENCE_REMARKS = "occurrenceRemarks";//ecology
83
    private static final String COLLECTION_CODE = "collectionCode";
84
    private static final String CATALOG_NUMBER = "catalogNumber";//accession number
85
    private static final String INSTITUTION_CODE = "institutionCode";
86

    
87
    protected static final String PUBLISHING_ORG_KEY = "publishingOrgKey";
88
    protected static final String PUBLISHING_COUNTRY = "publishingCountry";
89

    
90
    protected static final String EXTENSIONS = "extensions";
91
    protected static final String BASIS_OF_RECORD = "basisOfRecord";
92
    protected static final String INDIVIDUAL_COUNT = "individualCount";
93
    protected static final String TAXONKEY = "taxonKey";
94
    protected static final String KINGDOM_KEY = "kingdomKey";
95
    protected static final String PHYLUM_KEY = "phylumKey";
96
    protected static final String CLASS_KEY = "classKey";
97
    protected static final String ORDER_KEY = "orderKey";
98
    protected static final String FAMILY_KEY = "familyKey";
99
    protected static final String GENUS_KEY = "genusKey";
100
    protected static final String SPECIES_KEY = "speciesKey";
101
    protected static final String SCIENTIFIC_NAME = "scientificName";
102
    protected static final String KINGDOM =  "kingdom";
103
    protected static final String PHYLUM = "phylum";
104
    protected static final String ORDER = "order";
105
    protected static final String FAMILY  = "family";
106
    protected static final String GENUS = "genus";
107
    protected static final String SPECIES = "species";
108
    protected static final String GENERIC_NAME = "genericName";
109
    protected static final String SPECIFIC_EPITHET = "specificEpithet";
110
    protected static final String INFRASPECIFIC_EPITHET = "infraspecificEpithet";
111
    protected static final String TAXON_RANK = "taxonRank";
112
    protected static final String DATE_IDENTIFIED = "dateIdentified";
113
    protected static final String SCIENTIFIC_NAME_AUTHORSHIP = "scientificNameAuthorship";
114

    
115
    protected static final String ELEVATION = "elevation";
116
    protected static final String CONITNENT = "continent";
117
    protected static final String STATE_PROVINCE = "stateProvince";
118

    
119
    protected static final String ISSUES = "issues";
120
    protected static final String LAST_INTERPRETED = "lastInterpreted";
121
    protected static final String IDENTIFIERS = "identifiers";
122
    protected static final String FACTS = "facts";
123
    protected static final String RELATIONS = "relations";
124
    protected static final String GEODETICDATUM = "geodeticDatum";
125
    protected static final String CLASS = "class";
126

    
127
    protected static final String COUNTRY = "country";
128
    protected static final String NOMENCLATURAL_STATUS = "nomenclaturalStatus";
129
    protected static final String RIGHTSHOLDER = "rightsHolder";
130
    protected static final String IDEMTIFIER = "identifier";
131

    
132
    protected static final String NOMENCLATURALCODE = "nomenclaturalCode";
133
    protected static final String COUNTY = "county";
134

    
135
    protected static final String DATASET_NAME = "datasetName";
136
    protected static final String GBIF_ID = "gbifID";
137

    
138
    protected static final String OCCURENCE_ID = "occurrenceID";
139

    
140
    protected static final String TAXON_ID = "taxonID";
141
    protected static final String LICENCE = "license";
142

    
143
    protected static final String OWNER_INSTITUTION_CODE = "ownerInstitutionCode";
144
    protected static final String BIBLIOGRAPHIC_CITATION = "bibliographicCitation";
145
    protected static final String IDENTIFIED_BY = "identifiedBy";
146
    protected static final String COLLECTION_ID = "collectionID";
147

    
148
    private static final String PLANTAE = "Plantae";
149
    private static final String ANIMALIA = "Animalia";
150
    private static final String FUNGI = "Fungi";
151
    private static final String BACTERIA = "Bacteria";
152
    private static final String MULTIMEDIA = "media";
153

    
154
    /**
155
     * Parses the given {@link String} for occurrences.<br>
156
     * Note: The data structure of the GBIF response should not be changed.
157
     * @param jsonString JSON data as a String
158
     * @return the found occurrences as a collection of {@link GbifResponse}
159
     */
160
    public static Collection<GbifResponse> parseJsonRecords(String jsonString) {
161
        return parseJsonRecords(JSONObject.fromObject(jsonString));
162
    }
163

    
164
    /**
165
     * Parses the given {@link InputStream} for occurrences.
166
     * @param jsonString JSON data as an InputStream
167
     * @return the found occurrences as a collection of {@link GbifResponse}
168
     */
169
    public static Collection<GbifResponse> parseJsonRecords(InputStream inputStream) throws IOException{
170
        StringWriter stringWriter = new StringWriter();
171
        IOUtils.copy(inputStream, stringWriter, Charset.defaultCharset());
172
        return parseJsonRecords(stringWriter.toString());
173
    }
174

    
175
    /**
176
     * Parses the given {@link JSONObject} for occurrences.<br>
177
     * Note: The data structure of the GBIF response should not be changed.
178
     * @param jsonString JSON data as an JSONObject
179
     * @return the found occurrences as a collection of {@link GbifResponse}
180
     */
181
    public static Collection<GbifResponse> parseJsonRecords(JSONObject jsonObject){
182
        return parseJsonRecords(jsonObject.getJSONArray("results"));
183
    }
184

    
185
    /**
186
     * Parses the given {@link JSONArray} for occurrences.
187
     * @param jsonString JSON data as an {@link JSONArray}
188
     * @return the found occurrences as a collection of {@link GbifResponse}
189
     */
190
    private static Collection<GbifResponse> parseJsonRecords(JSONArray jsonArray) {
191
        Collection<GbifResponse> results = new ArrayList<>();
192
        String[] tripleId = new String[3];
193
        String string;
194
        for(Object o:jsonArray){
195
            //parse every record
196
            tripleId = new String[3];
197
            if(o instanceof JSONObject){
198
                String dataSetKey = null;
199
                GbifDataSetProtocol dataSetProtocol = null;
200
                DerivedUnitFacade derivedUnitFacade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen);
201
                TaxonName name = null;
202
                JSONObject record = (JSONObject)o;
203

    
204
                if(record.has(DATASET_PROTOCOL)){
205
                    dataSetProtocol = GbifDataSetProtocol.parseProtocol(record.getString(DATASET_PROTOCOL));
206
                }
207
                if(record.has(DATASET_KEY)){
208
                    dataSetKey = record.getString(DATASET_KEY);
209
                }
210
                if(record.has(COUNTRY_CODE)){
211
                    string = record.getString(COUNTRY_CODE);
212
                    Country country = Country.getCountryByIso3166A2(string);
213
                    if(country!=null){
214
                        derivedUnitFacade.setCountry(country);
215
                    }
216
                }
217
                if(record.has(LOCALITY)){
218
                    string = record.getString(LOCALITY);
219
                    derivedUnitFacade.setLocality(string);
220
                }
221

    
222
                if (record.has("species")){
223
                    Rank rank = null;
224

    
225
                    if (record.has(TAXON_RANK)){
226
                        string= record.getString(TAXON_RANK);
227
                        try {
228
                            rank = Rank.getRankByLatinName(string);
229
                        } catch (UnknownCdmTypeException e) {
230
                            // TODO Auto-generated catch block
231
                            e.printStackTrace();
232
                        }
233
                    }
234
                    if (rank != null){
235
                        if (record.has(NOMENCLATURALCODE)){
236
                            string = record.getString(NOMENCLATURALCODE);
237

    
238
                            if (string.equals(NomenclaturalCode.ICZN.getTitleCache())){
239
                                name = TaxonNameFactory.NewZoologicalInstance(rank);
240
                            } else if (string.equals(NomenclaturalCode.ICNAFP.getTitleCache())) {
241
                                name = TaxonNameFactory.NewBotanicalInstance(rank);
242
                            } else if (string.equals(NomenclaturalCode.ICNP.getTitleCache())){
243
                                name = TaxonNameFactory.NewBacterialInstance(rank);
244
                            } else if (string.equals(NomenclaturalCode.ICNCP.getTitleCache())){
245
                                name = TaxonNameFactory.NewCultivarInstance(rank);
246
                            } else if (string.equals(NomenclaturalCode.ICVCN.getTitleCache())){
247
                                name = TaxonNameFactory.NewViralInstance(rank);
248
                            } else if (string.equals("ICN")){
249
                                name = TaxonNameFactory.NewBotanicalInstance(rank);
250
                            }
251
                        }else {
252
                            if (record.has(KINGDOM)){
253
                                if (record.getString(KINGDOM).equals(PLANTAE)){
254
                                    name = TaxonNameFactory.NewBotanicalInstance(rank);
255
                                } else if (record.getString(KINGDOM).equals(ANIMALIA)){
256
                                    name = TaxonNameFactory.NewZoologicalInstance(rank);
257
                                } else if (record.getString(KINGDOM).equals(FUNGI)){
258
                                    name = TaxonNameFactory.NewBotanicalInstance(rank);
259
                                } else if (record.getString(KINGDOM).equals(BACTERIA)){
260
                                    name = TaxonNameFactory.NewBacterialInstance(rank);
261
                                } else{
262
                                    name = TaxonNameFactory.NewNonViralInstance(rank);
263
                                }
264
                            } else{
265
                                name = TaxonNameFactory.NewNonViralInstance(rank);
266
                            }
267
                        }
268
                        if (name == null){
269
                            name = TaxonNameFactory.NewNonViralInstance(rank);
270
                        }
271
                        if (record.has(GENUS)){
272
                            name.setGenusOrUninomial(record.getString(GENUS));
273
                        }
274
                        if (record.has(SPECIFIC_EPITHET)){
275
                            name.setSpecificEpithet(record.getString(SPECIFIC_EPITHET));
276
                        }
277
                        if (record.has(INFRASPECIFIC_EPITHET)){
278
                            name.setInfraSpecificEpithet(record.getString(INFRASPECIFIC_EPITHET));
279
                        }
280
                        if (record.has(SCIENTIFIC_NAME)){
281
                            name.setTitleCache(record.getString(SCIENTIFIC_NAME), true);
282
                        }
283
                    }
284
                    DeterminationEvent detEvent = DeterminationEvent.NewInstance();
285

    
286
                    if (record.has(IDENTIFIED_BY)){
287
                        Person determiner = Person.NewTitledInstance(record.getString(IDENTIFIED_BY));
288
                        detEvent.setDeterminer(determiner);
289
                    }
290
                    detEvent.setTaxonName(name);
291
                    detEvent.setPreferredFlag(true);
292
                    derivedUnitFacade.addDetermination(detEvent);
293
                }
294

    
295
                // GPS location
296
                Point location = Point.NewInstance();
297
                derivedUnitFacade.setExactLocation(location);
298
                try {
299
                    if(record.has(LATITUDE)){
300
                        String lat = record.getString(LATITUDE);
301
                        location.setLatitudeByParsing(lat);
302
                    }
303
                    if(record.has(LONGITUDE)){
304
                        String lon = record.getString(LONGITUDE);
305
                        location.setLongitudeByParsing(lon);
306
                    }
307
                } catch (ParseException e) {
308
                    logger.error("Could not parse GPS coordinates", e);
309
                }
310
                if(record.has(GEOREFERENCE_PROTOCOL)){
311
                    String geo = record.getString(GEOREFERENCE_PROTOCOL);
312
                    ReferenceSystem referenceSystem = null;
313
                    //TODO: Is there another way than string comparison
314
                    //to check which reference system is used?
315
                    if(ReferenceSystem.WGS84().getLabel().contains(geo)){
316
                        referenceSystem = ReferenceSystem.WGS84();
317
                    }
318
                    else if(ReferenceSystem.GOOGLE_EARTH().getLabel().contains(geo)){
319
                        referenceSystem = ReferenceSystem.GOOGLE_EARTH();
320
                    }
321
                    else if(ReferenceSystem.GAZETTEER().getLabel().contains(geo)){
322
                        referenceSystem = ReferenceSystem.GAZETTEER();
323
                    }
324
                    location.setReferenceSystem(referenceSystem);
325
                }
326

    
327
                if(record.has(ELEVATION)){
328
                    try {
329
                        //parse integer and strip of unit
330
                        string = record.getString(ELEVATION);
331
                        int length = string.length();
332
                        StringBuilder builder = new StringBuilder();
333
                        for(int i=0;i<length;i++){
334
                            if(Character.isDigit(string.charAt(i))){
335
                                builder.append(string.charAt(i));
336
                            }
337
                            else{
338
                                break;
339
                            }
340
                        }
341
                        derivedUnitFacade.setAbsoluteElevation(Integer.parseInt(builder.toString()));
342
                    } catch (NumberFormatException e) {
343
                        logger.warn("Could not parse elevation", e);
344
                    }
345
                }
346

    
347
                //Date (Gathering Period)
348
                TimePeriod timePeriod = TimePeriod.NewInstance();
349
                derivedUnitFacade.setGatheringPeriod(timePeriod);
350
                //TODO what happens with eventDate??
351
                if(record.has(YEAR)){
352
                    timePeriod.setStartYear(record.getInt(YEAR));
353
                }
354
                if(record.has(MONTH)){
355
                    timePeriod.setStartMonth(record.getInt(MONTH));
356
                }
357
                if(record.has(DAY)){
358
                    timePeriod.setStartDay(record.getInt(DAY));
359
                }
360
                if(record.has(RECORDED_BY)){
361
                    Person person = Person.NewTitledInstance(record.getString(RECORDED_BY));
362
                    //FIXME check data base if collector already present
363
                    derivedUnitFacade.setCollector(person);
364
                }
365

    
366
                //collector number (fieldNumber OR recordNumber)
367
                if(record.has(FIELD_NUMBER)){
368
                    derivedUnitFacade.setFieldNumber(record.getString(FIELD_NUMBER));
369
                }
370
                //collector number (fieldNumber OR recordNumber)
371
                if(record.has(RECORD_NUMBER)){
372
                    derivedUnitFacade.setFieldNumber(record.getString(RECORD_NUMBER));
373
                }
374

    
375
                if(record.has(EVENT_REMARKS)){
376
                    derivedUnitFacade.setGatheringEventDescription(record.getString(EVENT_REMARKS));
377
                }
378
                if(record.has(OCCURRENCE_REMARKS)){
379
                    derivedUnitFacade.setEcology(record.getString(OCCURRENCE_REMARKS));
380
                }
381
                if(record.has(COLLECTION_CODE)){
382
                    String collectionCode = record.getString(COLLECTION_CODE);
383
                    tripleId[2] = collectionCode;
384
                    //FIXME: check data base for existing collections
385
                    eu.etaxonomy.cdm.model.occurrence.Collection collection = eu.etaxonomy.cdm.model.occurrence.Collection.NewInstance();
386
                    collection.setCode(collectionCode);
387
                    if(record.has(INSTITUTION_CODE)){
388
                        Institution institution = Institution.NewNamedInstance(record.getString(INSTITUTION_CODE));
389
                        institution.setCode(record.getString(INSTITUTION_CODE));
390
                        collection.setInstitute(institution);
391
                    }
392
                    derivedUnitFacade.setCollection(collection);
393
                }
394
                if(record.has(CATALOG_NUMBER)){
395
                    derivedUnitFacade.setCatalogNumber(record.getString(CATALOG_NUMBER));
396
                    derivedUnitFacade.setAccessionNumber(record.getString(CATALOG_NUMBER));
397
                    tripleId[0]= record.getString(CATALOG_NUMBER);
398
                }
399
                if(record.has(INSTITUTION_CODE)){
400
                    derivedUnitFacade.setAccessionNumber(record.getString(INSTITUTION_CODE));
401
                    tripleId[1]= record.getString(INSTITUTION_CODE);
402
                }
403

    
404
                if (record.has(OCCURENCE_ID)){
405
                    IdentifiableSource source = IdentifiableSource.NewDataImportInstance((record.getString(OCCURENCE_ID)));
406
                    derivedUnitFacade.addSource(source);
407
                }
408

    
409
                if (record.has(MULTIMEDIA)){
410
                    //http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
411
                    JSONArray multimediaArray = record.getJSONArray(MULTIMEDIA);
412
                    JSONObject mediaRecord;
413
                    SpecimenOrObservationType type = null;
414
                    for(Object object:multimediaArray){
415
                        //parse every record
416
                        Media media = Media.NewInstance();
417
                        URI uri = null;
418
                        CdmImageInfo imageInf = null;
419

    
420
                        if(object instanceof JSONObject){
421
                            mediaRecord = (JSONObject) object;
422

    
423
                            if (mediaRecord.has("identifier")){
424
                                try {
425
                                    uri = new URI(mediaRecord.getString("identifier"));
426
                                    imageInf = MediaInfoFileReader.legacyFactoryMethod(uri)
427
                                        .readBaseInfo()
428
                                        .getCdmImageInfo();
429
                                } catch (URISyntaxException |IOException | HttpException e) {
430
                                    e.printStackTrace();
431
                                }
432
                               // media.addIdentifier(mediaRecord.getString("identifier"), null);
433
                            }
434
                            if (mediaRecord.has("references")){
435

    
436

    
437
                            }
438
                            if (mediaRecord.has("format")){
439

    
440
                            }
441
                            if (mediaRecord.has("type")){
442
                                if (mediaRecord.get("type").equals("StillImage")){
443
                                    type = SpecimenOrObservationType.StillImage;
444
                                }
445
                            }
446
                        }
447
                        ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInf);
448
                        MediaRepresentation representation = MediaRepresentation.NewInstance();
449

    
450
                        representation.addRepresentationPart(imageFile);
451
                        media.addRepresentation(representation);
452

    
453
                        derivedUnitFacade.addDerivedUnitMedia(media);
454
                    }
455
                    //identifier=http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
456
                    //references=http://ww2.bgbm.org/herbarium/view_biocase.cfm?SpecimenPK=136628
457
                    //format=image/jpeg
458
                    //type=StillImage
459
                }
460

    
461
                // create dataset URL
462
                URI uri = null;
463
                try {
464
                    uri = UriUtils.createUri(new URL(GbifQueryServiceWrapper.BASE_URL), "/v1/dataset/"+dataSetKey+"/endpoint", null, null);
465
                } catch (MalformedURLException e) {
466
                    logger.error("Endpoint URI could not be created!", e);
467
                } catch (URISyntaxException e) {
468
                    logger.error("Endpoint URI could not be created!", e);
469
                }
470
                results.add(new GbifResponse(derivedUnitFacade, uri, dataSetProtocol, tripleId, name));
471
            }
472
        }
473
        return results;
474
    }
475

    
476
    public static DataSetResponse parseOriginalDataSetUri(InputStream inputStream) throws IOException {
477
        StringWriter stringWriter = new StringWriter();
478
        IOUtils.copy(inputStream, stringWriter, Charset.defaultCharset());
479
        return parseOriginalDataSetUri(stringWriter.toString());
480
    }
481

    
482
    public static DataSetResponse parseOriginalDataSetUri(String jsonString) {
483
        DataSetResponse response = new DataSetResponse();
484
        JSONArray jsonArray = JSONArray.fromObject(jsonString);
485
        Object next = jsonArray.iterator().next();
486
        if(next instanceof JSONObject){
487
            JSONObject jsonObject = (JSONObject)next;
488
            if(jsonObject.has(URL)){
489
                response.setEndpoint(URI.create(jsonObject.getString(URL)));
490
            }
491
            if(jsonObject.has(TYPE)){
492
                response.setProtocol(GbifDataSetProtocol.parseProtocol(jsonObject.getString(TYPE)));
493
            }
494
        }
495
        return response;
496
    }
497
}
(3-3/6)