Project

General

Profile

Download (24.1 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2014 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.ext.occurrence.gbif;
10

    
11
import java.io.IOException;
12
import java.io.InputStream;
13
import java.io.StringWriter;
14
import java.net.MalformedURLException;
15
import java.net.URISyntaxException;
16
import java.net.URL;
17
import java.nio.charset.Charset;
18
import java.text.ParseException;
19
import java.util.ArrayList;
20
import java.util.Collection;
21

    
22
import org.apache.commons.io.IOUtils;
23
import org.apache.http.HttpException;
24
import org.apache.log4j.Logger;
25

    
26
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
27
import eu.etaxonomy.cdm.common.URI;
28
import eu.etaxonomy.cdm.common.UriUtils;
29
import eu.etaxonomy.cdm.common.media.CdmImageInfo;
30
import eu.etaxonomy.cdm.model.agent.Institution;
31
import eu.etaxonomy.cdm.model.agent.Person;
32
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
33
import eu.etaxonomy.cdm.model.common.TimePeriod;
34
import eu.etaxonomy.cdm.model.location.Country;
35
import eu.etaxonomy.cdm.model.location.Point;
36
import eu.etaxonomy.cdm.model.location.ReferenceSystem;
37
import eu.etaxonomy.cdm.model.media.ImageFile;
38
import eu.etaxonomy.cdm.model.media.Media;
39
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
40
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
41
import eu.etaxonomy.cdm.model.name.Rank;
42
import eu.etaxonomy.cdm.model.name.TaxonName;
43
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
44
import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
45
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
46
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
47
import net.sf.json.JSONArray;
48
import net.sf.json.JSONObject;
49

    
50
/**
51
 * Utility class which provides the functionality to convert a JSON response
52
 * resulting from a GBIF query for occurrences to the corresponding CDM entities.
53
 * @author pplitzner
54
 * @since 22.05.2014
55
 */
56
public class GbifJsonOccurrenceParser {
57

    
58
    private static final Logger logger = Logger.getLogger(GbifJsonOccurrenceParser.class);
59

    
60
    private static final String DATASET_KEY = "datasetKey";
61
    private static final String DATASET_PROTOCOL = "protocol";
62

    
63
    private static final String KEY = "key";
64
    private static final String URL = "url";
65
    private static final String TYPE = "type";
66

    
67
    private static final String COUNTRY_CODE = "countryCode";
68
    private static final String LOCALITY = "locality";
69
    private static final String LONGITUDE = "decimalLongitude";
70
    private static final String LATITUDE = "decimalLatitude";
71
    private static final String GEOREFERENCE_PROTOCOL = "georeferenceProtocol";//reference system
72
    private static final String VERBATIM_ELEVATION = "verbatimElevation";
73
    private static final String YEAR = "year";
74
    private static final String MONTH = "month";
75
    private static final String DAY = "day";
76
    private static final String EVENT_DATE= "eventDate";
77
    private static final String RECORDED_BY= "recordedBy";//collector
78
    private static final String RECORD_NUMBER = "recordNumber";//collector number
79
    private static final String FIELD_NUMBER = "fieldNumber";//collector number
80
    private static final String EVENT_REMARKS = "eventRemarks";//gathering event description
81
    private static final String OCCURRENCE_REMARKS = "occurrenceRemarks";//ecology
82
    private static final String COLLECTION_CODE = "collectionCode";
83
    private static final String CATALOG_NUMBER = "catalogNumber";//accession number
84
    private static final String INSTITUTION_CODE = "institutionCode";
85

    
86
    protected static final String PUBLISHING_ORG_KEY = "publishingOrgKey";
87
    protected static final String PUBLISHING_COUNTRY = "publishingCountry";
88

    
89
    protected static final String EXTENSIONS = "extensions";
90
    protected static final String BASIS_OF_RECORD = "basisOfRecord";
91
    protected static final String INDIVIDUAL_COUNT = "individualCount";
92
    protected static final String TAXONKEY = "taxonKey";
93
    protected static final String KINGDOM_KEY = "kingdomKey";
94
    protected static final String PHYLUM_KEY = "phylumKey";
95
    protected static final String CLASS_KEY = "classKey";
96
    protected static final String ORDER_KEY = "orderKey";
97
    protected static final String FAMILY_KEY = "familyKey";
98
    protected static final String GENUS_KEY = "genusKey";
99
    protected static final String SPECIES_KEY = "speciesKey";
100
    protected static final String SCIENTIFIC_NAME = "scientificName";
101
    protected static final String KINGDOM =  "kingdom";
102
    protected static final String PHYLUM = "phylum";
103
    protected static final String ORDER = "order";
104
    protected static final String FAMILY  = "family";
105
    protected static final String GENUS = "genus";
106
    protected static final String SPECIES = "species";
107
    protected static final String GENERIC_NAME = "genericName";
108
    protected static final String SPECIFIC_EPITHET = "specificEpithet";
109
    protected static final String INFRASPECIFIC_EPITHET = "infraspecificEpithet";
110
    protected static final String TAXON_RANK = "taxonRank";
111
    protected static final String DATE_IDENTIFIED = "dateIdentified";
112
    protected static final String SCIENTIFIC_NAME_AUTHORSHIP = "scientificNameAuthorship";
113

    
114
    protected static final String ELEVATION = "elevation";
115
    protected static final String CONITNENT = "continent";
116
    protected static final String STATE_PROVINCE = "stateProvince";
117

    
118
    protected static final String ISSUES = "issues";
119
    protected static final String LAST_INTERPRETED = "lastInterpreted";
120
    protected static final String IDENTIFIERS = "identifiers";
121
    protected static final String FACTS = "facts";
122
    protected static final String RELATIONS = "relations";
123
    protected static final String GEODETICDATUM = "geodeticDatum";
124
    protected static final String CLASS = "class";
125

    
126
    protected static final String COUNTRY = "country";
127
    protected static final String NOMENCLATURAL_STATUS = "nomenclaturalStatus";
128
    protected static final String RIGHTSHOLDER = "rightsHolder";
129
    protected static final String IDEMTIFIER = "identifier";
130

    
131
    protected static final String NOMENCLATURALCODE = "nomenclaturalCode";
132
    protected static final String COUNTY = "county";
133

    
134
    protected static final String DATASET_NAME = "datasetName";
135
    protected static final String GBIF_ID = "gbifID";
136

    
137
    protected static final String OCCURENCE_ID = "occurrenceID";
138

    
139
    protected static final String TAXON_ID = "taxonID";
140
    protected static final String LICENCE = "license";
141

    
142
    protected static final String OWNER_INSTITUTION_CODE = "ownerInstitutionCode";
143
    protected static final String BIBLIOGRAPHIC_CITATION = "bibliographicCitation";
144
    protected static final String IDENTIFIED_BY = "identifiedBy";
145
    protected static final String COLLECTION_ID = "collectionID";
146

    
147
    private static final String PLANTAE = "Plantae";
148
    private static final String ANIMALIA = "Animalia";
149
    private static final String FUNGI = "Fungi";
150
    private static final String BACTERIA = "Bacteria";
151
    private static final String MULTIMEDIA = "media";
152

    
153
    /**
154
     * Parses the given {@link String} for occurrences.<br>
155
     * Note: The data structure of the GBIF response should not be changed.
156
     * @param jsonString JSON data as a String
157
     * @return the found occurrences as a collection of {@link GbifResponse}
158
     */
159
    public static Collection<GbifResponse> parseJsonRecords(String jsonString) {
160
        return parseJsonRecords(JSONObject.fromObject(jsonString));
161
    }
162

    
163
    /**
164
     * Parses the given {@link InputStream} for occurrences.
165
     * @param jsonString JSON data as an InputStream
166
     * @return the found occurrences as a collection of {@link GbifResponse}
167
     */
168
    public static Collection<GbifResponse> parseJsonRecords(InputStream inputStream) throws IOException{
169
        StringWriter stringWriter = new StringWriter();
170
        IOUtils.copy(inputStream, stringWriter, Charset.defaultCharset());
171
        return parseJsonRecords(stringWriter.toString());
172
    }
173

    
174
    /**
175
     * Parses the given {@link JSONObject} for occurrences.<br>
176
     * Note: The data structure of the GBIF response should not be changed.
177
     * @param jsonString JSON data as an JSONObject
178
     * @return the found occurrences as a collection of {@link GbifResponse}
179
     */
180
    public static Collection<GbifResponse> parseJsonRecords(JSONObject jsonObject){
181
        return parseJsonRecords(jsonObject.getJSONArray("results"));
182
    }
183

    
184
    /**
185
     * Parses the given {@link JSONArray} for occurrences.
186
     * @param jsonString JSON data as an {@link JSONArray}
187
     * @return the found occurrences as a collection of {@link GbifResponse}
188
     */
189
    private static Collection<GbifResponse> parseJsonRecords(JSONArray jsonArray) {
190
        Collection<GbifResponse> results = new ArrayList<>();
191
        String[] tripleId = new String[3];
192
        String string;
193
        for(Object o:jsonArray){
194
            //parse every record
195
            tripleId = new String[3];
196
            if(o instanceof JSONObject){
197
                String dataSetKey = null;
198
                GbifDataSetProtocol dataSetProtocol = null;
199
                DerivedUnitFacade derivedUnitFacade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen);
200
                TaxonName name = null;
201
                JSONObject record = (JSONObject)o;
202

    
203
                if(record.has(DATASET_PROTOCOL)){
204
                    dataSetProtocol = GbifDataSetProtocol.parseProtocol(record.getString(DATASET_PROTOCOL));
205
                }
206
                if(record.has(DATASET_KEY)){
207
                    dataSetKey = record.getString(DATASET_KEY);
208
                }
209
                if(record.has(COUNTRY_CODE)){
210
                    string = record.getString(COUNTRY_CODE);
211
                    Country country = Country.getCountryByIso3166A2(string);
212
                    if(country!=null){
213
                        derivedUnitFacade.setCountry(country);
214
                    }
215
                }
216
                if(record.has(LOCALITY)){
217
                    string = record.getString(LOCALITY);
218
                    derivedUnitFacade.setLocality(string);
219
                }
220

    
221
                if (record.has("species")){
222
                    Rank rank = null;
223

    
224
                    if (record.has(TAXON_RANK)){
225
                        string= record.getString(TAXON_RANK);
226
                        try {
227
                            rank = Rank.getRankByName(string);
228
                        } catch (UnknownCdmTypeException e) {
229
                            // TODO Auto-generated catch block
230
                            e.printStackTrace();
231
                        }
232
                    }
233
                    if (rank != null){
234
                        if (record.has(NOMENCLATURALCODE)){
235
                            string = record.getString(NOMENCLATURALCODE);
236

    
237
                            if (string.equals(NomenclaturalCode.ICZN.getTitleCache())){
238
                                name = TaxonNameFactory.NewZoologicalInstance(rank);
239
                            } else if (string.equals(NomenclaturalCode.ICNAFP.getTitleCache())) {
240
                                name = TaxonNameFactory.NewBotanicalInstance(rank);
241
                            } else if (string.equals(NomenclaturalCode.ICNB.getTitleCache())){
242
                                name = TaxonNameFactory.NewBacterialInstance(rank);
243
                            } else if (string.equals(NomenclaturalCode.ICNCP.getTitleCache())){
244
                                name = TaxonNameFactory.NewCultivarInstance(rank);
245
                            } else if (string.equals(NomenclaturalCode.ICVCN.getTitleCache())){
246
                                name = TaxonNameFactory.NewViralInstance(rank);
247
                            } else if (string.equals("ICN")){
248
                                name = TaxonNameFactory.NewBotanicalInstance(rank);
249
                            }
250
                        }else {
251
                            if (record.has(KINGDOM)){
252
                                if (record.getString(KINGDOM).equals(PLANTAE)){
253
                                    name = TaxonNameFactory.NewBotanicalInstance(rank);
254
                                } else if (record.getString(KINGDOM).equals(ANIMALIA)){
255
                                    name = TaxonNameFactory.NewZoologicalInstance(rank);
256
                                } else if (record.getString(KINGDOM).equals(FUNGI)){
257
                                    name = TaxonNameFactory.NewBotanicalInstance(rank);
258
                                } else if (record.getString(KINGDOM).equals(BACTERIA)){
259
                                    name = TaxonNameFactory.NewBacterialInstance(rank);
260
                                } else{
261
                                    name = TaxonNameFactory.NewNonViralInstance(rank);
262
                                }
263
                            } else{
264
                                name = TaxonNameFactory.NewNonViralInstance(rank);
265
                            }
266
                        }
267
                        if (name == null){
268
                            name = TaxonNameFactory.NewNonViralInstance(rank);
269
                        }
270
                        if (record.has(GENUS)){
271
                            name.setGenusOrUninomial(record.getString(GENUS));
272
                        }
273
                        if (record.has(SPECIFIC_EPITHET)){
274
                            name.setSpecificEpithet(record.getString(SPECIFIC_EPITHET));
275
                        }
276
                        if (record.has(INFRASPECIFIC_EPITHET)){
277
                            name.setInfraSpecificEpithet(record.getString(INFRASPECIFIC_EPITHET));
278
                        }
279
                        if (record.has(SCIENTIFIC_NAME)){
280
                            name.setTitleCache(record.getString(SCIENTIFIC_NAME), true);
281
                        }
282
                    }
283
                    DeterminationEvent detEvent = DeterminationEvent.NewInstance();
284

    
285
                    if (record.has(IDENTIFIED_BY)){
286
                        Person determiner = Person.NewTitledInstance(record.getString(IDENTIFIED_BY));
287
                        detEvent.setDeterminer(determiner);
288
                    }
289
                    detEvent.setTaxonName(name);
290
                    detEvent.setPreferredFlag(true);
291
                    derivedUnitFacade.addDetermination(detEvent);
292
                }
293

    
294
                // GPS location
295
                Point location = Point.NewInstance();
296
                derivedUnitFacade.setExactLocation(location);
297
                try {
298
                    if(record.has(LATITUDE)){
299
                        String lat = record.getString(LATITUDE);
300
                        location.setLatitudeByParsing(lat);
301
                    }
302
                    if(record.has(LONGITUDE)){
303
                        String lon = record.getString(LONGITUDE);
304
                        location.setLongitudeByParsing(lon);
305
                    }
306
                } catch (ParseException e) {
307
                    logger.error("Could not parse GPS coordinates", e);
308
                }
309
                if(record.has(GEOREFERENCE_PROTOCOL)){
310
                    String geo = record.getString(GEOREFERENCE_PROTOCOL);
311
                    ReferenceSystem referenceSystem = null;
312
                    //TODO: Is there another way than string comparison
313
                    //to check which reference system is used?
314
                    if(ReferenceSystem.WGS84().getLabel().contains(geo)){
315
                        referenceSystem = ReferenceSystem.WGS84();
316
                    }
317
                    else if(ReferenceSystem.GOOGLE_EARTH().getLabel().contains(geo)){
318
                        referenceSystem = ReferenceSystem.GOOGLE_EARTH();
319
                    }
320
                    else if(ReferenceSystem.GAZETTEER().getLabel().contains(geo)){
321
                        referenceSystem = ReferenceSystem.GAZETTEER();
322
                    }
323
                    location.setReferenceSystem(referenceSystem);
324
                }
325

    
326
                if(record.has(ELEVATION)){
327
                    try {
328
                        //parse integer and strip of unit
329
                        string = record.getString(ELEVATION);
330
                        int length = string.length();
331
                        StringBuilder builder = new StringBuilder();
332
                        for(int i=0;i<length;i++){
333
                            if(Character.isDigit(string.charAt(i))){
334
                                builder.append(string.charAt(i));
335
                            }
336
                            else{
337
                                break;
338
                            }
339
                        }
340
                        derivedUnitFacade.setAbsoluteElevation(Integer.parseInt(builder.toString()));
341
                    } catch (NumberFormatException e) {
342
                        logger.warn("Could not parse elevation", e);
343
                    }
344
                }
345

    
346
                //Date (Gathering Period)
347
                TimePeriod timePeriod = TimePeriod.NewInstance();
348
                derivedUnitFacade.setGatheringPeriod(timePeriod);
349
                //TODO what happens with eventDate??
350
                if(record.has(YEAR)){
351
                    timePeriod.setStartYear(record.getInt(YEAR));
352
                }
353
                if(record.has(MONTH)){
354
                    timePeriod.setStartMonth(record.getInt(MONTH));
355
                }
356
                if(record.has(DAY)){
357
                    timePeriod.setStartDay(record.getInt(DAY));
358
                }
359
                if(record.has(RECORDED_BY)){
360
                    Person person = Person.NewTitledInstance(record.getString(RECORDED_BY));
361
                    //FIXME check data base if collector already present
362
                    derivedUnitFacade.setCollector(person);
363
                }
364

    
365
                //collector number (fieldNumber OR recordNumber)
366
                if(record.has(FIELD_NUMBER)){
367
                    derivedUnitFacade.setFieldNumber(record.getString(FIELD_NUMBER));
368
                }
369
                //collector number (fieldNumber OR recordNumber)
370
                if(record.has(RECORD_NUMBER)){
371
                    derivedUnitFacade.setFieldNumber(record.getString(RECORD_NUMBER));
372
                }
373

    
374
                if(record.has(EVENT_REMARKS)){
375
                    derivedUnitFacade.setGatheringEventDescription(record.getString(EVENT_REMARKS));
376
                }
377
                if(record.has(OCCURRENCE_REMARKS)){
378
                    derivedUnitFacade.setEcology(record.getString(OCCURRENCE_REMARKS));
379
                }
380
                if(record.has(COLLECTION_CODE)){
381
                    String collectionCode = record.getString(COLLECTION_CODE);
382
                    tripleId[2] = collectionCode;
383
                    //FIXME: check data base for existing collections
384
                    eu.etaxonomy.cdm.model.occurrence.Collection collection = eu.etaxonomy.cdm.model.occurrence.Collection.NewInstance();
385
                    collection.setCode(collectionCode);
386
                    if(record.has(INSTITUTION_CODE)){
387
                        Institution institution = Institution.NewNamedInstance(record.getString(INSTITUTION_CODE));
388
                        institution.setCode(record.getString(INSTITUTION_CODE));
389
                        collection.setInstitute(institution);
390
                    }
391
                    derivedUnitFacade.setCollection(collection);
392
                }
393
                if(record.has(CATALOG_NUMBER)){
394
                    derivedUnitFacade.setCatalogNumber(record.getString(CATALOG_NUMBER));
395
                    derivedUnitFacade.setAccessionNumber(record.getString(CATALOG_NUMBER));
396
                    tripleId[0]= record.getString(CATALOG_NUMBER);
397
                }
398
                if(record.has(INSTITUTION_CODE)){
399
                    derivedUnitFacade.setAccessionNumber(record.getString(INSTITUTION_CODE));
400
                    tripleId[1]= record.getString(INSTITUTION_CODE);
401
                }
402

    
403
                if (record.has(OCCURENCE_ID)){
404
                    IdentifiableSource source = IdentifiableSource.NewDataImportInstance((record.getString(OCCURENCE_ID)));
405
                    derivedUnitFacade.addSource(source);
406
                }
407

    
408
                if (record.has(MULTIMEDIA)){
409
                    //http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
410
                    JSONArray multimediaArray = record.getJSONArray(MULTIMEDIA);
411
                    JSONObject mediaRecord;
412
                    Media media;
413
                    URI uri = null;
414
                    CdmImageInfo imageInf = null;
415
                    MediaRepresentation representation = null;
416
                    SpecimenOrObservationType type = null;
417
                    for(Object object:multimediaArray){
418
                        //parse every record
419
                       media = Media.NewInstance();
420
                       uri = null;
421
                       imageInf = null;
422

    
423
                        if(object instanceof JSONObject){
424
                            mediaRecord = (JSONObject) object;
425

    
426
                            if (mediaRecord.has("identifier")){
427
                                try {
428
                                    uri = new URI(mediaRecord.getString("identifier"));
429
                                    imageInf = CdmImageInfo.NewInstance(uri, 0);
430
                                } catch (URISyntaxException |IOException | HttpException e) {
431
                                    e.printStackTrace();
432
                                }
433
                               // media.addIdentifier(mediaRecord.getString("identifier"), null);
434
                            }
435
                            if (mediaRecord.has("references")){
436

    
437

    
438
                            }
439
                            if (mediaRecord.has("format")){
440

    
441
                            }
442
                            if (mediaRecord.has("type")){
443
                                if (mediaRecord.get("type").equals("StillImage")){
444
                                    type = SpecimenOrObservationType.StillImage;
445
                                }
446
                            }
447
                        }
448
                        ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInf);
449
                        representation = MediaRepresentation.NewInstance();
450

    
451
                        representation.addRepresentationPart(imageFile);
452
                        media.addRepresentation(representation);
453

    
454
                        derivedUnitFacade.addDerivedUnitMedia(media);
455
                    }
456
                    //identifier=http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
457
                   //references=http://ww2.bgbm.org/herbarium/view_biocase.cfm?SpecimenPK=136628
458
                    //format=image/jpeg
459
                    //type=StillImage
460
                }
461

    
462
                // create dataset URL
463
                URI uri = null;
464
                try {
465
                    uri = UriUtils.createUri(new URL(GbifQueryServiceWrapper.BASE_URL), "/v1/dataset/"+dataSetKey+"/endpoint", null, null);
466
                } catch (MalformedURLException e) {
467
                    logger.error("Endpoint URI could not be created!", e);
468
                } catch (URISyntaxException e) {
469
                    logger.error("Endpoint URI could not be created!", e);
470
                }
471
                results.add(new GbifResponse(derivedUnitFacade, uri, dataSetProtocol, tripleId, name));
472
            }
473
        }
474
        return results;
475
    }
476

    
477
    public static DataSetResponse parseOriginalDataSetUri(InputStream inputStream) throws IOException {
478
        StringWriter stringWriter = new StringWriter();
479
        IOUtils.copy(inputStream, stringWriter, Charset.defaultCharset());
480
        return parseOriginalDataSetUri(stringWriter.toString());
481
    }
482

    
483
    public static DataSetResponse parseOriginalDataSetUri(String jsonString) {
484
        DataSetResponse response = new DataSetResponse();
485
        JSONArray jsonArray = JSONArray.fromObject(jsonString);
486
        Object next = jsonArray.iterator().next();
487
        if(next instanceof JSONObject){
488
            JSONObject jsonObject = (JSONObject)next;
489
            if(jsonObject.has(URL)){
490
                response.setEndpoint(URI.create(jsonObject.getString(URL)));
491
            }
492
            if(jsonObject.has(TYPE)){
493
                response.setProtocol(GbifDataSetProtocol.parseProtocol(jsonObject.getString(TYPE)));
494
            }
495
        }
496
        return response;
497
    }
498
}
(3-3/6)