Project

General

Profile

Download (24 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2014 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.ext.occurrence.gbif;
10

    
11
import java.io.IOException;
12
import java.io.InputStream;
13
import java.io.StringWriter;
14
import java.net.MalformedURLException;
15
import java.net.URI;
16
import java.net.URISyntaxException;
17
import java.net.URL;
18
import java.text.ParseException;
19
import java.util.ArrayList;
20
import java.util.Collection;
21

    
22
import org.apache.commons.io.IOUtils;
23
import org.apache.http.HttpException;
24
import org.apache.log4j.Logger;
25

    
26
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
27
import eu.etaxonomy.cdm.common.UriUtils;
28
import eu.etaxonomy.cdm.common.media.ImageInfo;
29
import eu.etaxonomy.cdm.model.agent.Institution;
30
import eu.etaxonomy.cdm.model.agent.Person;
31
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
32
import eu.etaxonomy.cdm.model.common.TimePeriod;
33
import eu.etaxonomy.cdm.model.location.Country;
34
import eu.etaxonomy.cdm.model.location.Point;
35
import eu.etaxonomy.cdm.model.location.ReferenceSystem;
36
import eu.etaxonomy.cdm.model.media.ImageFile;
37
import eu.etaxonomy.cdm.model.media.Media;
38
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
39
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
40
import eu.etaxonomy.cdm.model.name.Rank;
41
import eu.etaxonomy.cdm.model.name.TaxonName;
42
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
43
import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
44
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
45
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
46
import net.sf.json.JSONArray;
47
import net.sf.json.JSONObject;
48

    
49
/**
50
 * Utility class which provides the functionality to convert a JSON response
51
 * resulting from a GBIF query for occurrences to the corresponding CDM entities.
52
 * @author pplitzner
53
 * @date 22.05.2014
54
 *
55
 */
56
public class GbifJsonOccurrenceParser {
57

    
58
    private static final Logger logger = Logger.getLogger(GbifJsonOccurrenceParser.class);
59

    
60
    private static final String DATASET_KEY = "datasetKey";
61
    private static final String DATASET_PROTOCOL = "protocol";
62

    
63
    private static final String KEY = "key";
64
    private static final String URL = "url";
65
    private static final String TYPE = "type";
66

    
67
    private static final String COUNTRY_CODE = "countryCode";
68
    private static final String LOCALITY = "locality";
69
    private static final String LONGITUDE = "decimalLongitude";
70
    private static final String LATITUDE = "decimalLatitude";
71
    private static final String GEOREFERENCE_PROTOCOL = "georeferenceProtocol";//reference system
72
    private static final String VERBATIM_ELEVATION = "verbatimElevation";
73
    private static final String YEAR = "year";
74
    private static final String MONTH = "month";
75
    private static final String DAY = "day";
76
    private static final String EVENT_DATE= "eventDate";
77
    private static final String RECORDED_BY= "recordedBy";//collector
78
    private static final String RECORD_NUMBER = "recordNumber";//collector number
79
    private static final String FIELD_NUMBER = "fieldNumber";//collector number
80
    private static final String EVENT_REMARKS = "eventRemarks";//gathering event description
81
    private static final String OCCURRENCE_REMARKS = "occurrenceRemarks";//ecology
82
    private static final String COLLECTION_CODE = "collectionCode";
83
    private static final String CATALOG_NUMBER = "catalogNumber";//accession number
84
    private static final String INSTITUTION_CODE = "institutionCode";
85

    
86

    
87
    protected static final String PUBLISHING_ORG_KEY = "publishingOrgKey";
88
    protected static final String PUBLISHING_COUNTRY = "publishingCountry";
89

    
90
    protected static final String EXTENSIONS = "extensions";
91
    protected static final String BASIS_OF_RECORD = "basisOfRecord";
92
    protected static final String INDIVIDUAL_COUNT = "individualCount";
93
    protected static final String TAXONKEY = "taxonKey";
94
    protected static final String KINGDOM_KEY = "kingdomKey";
95
    protected static final String PHYLUM_KEY = "phylumKey";
96
    protected static final String CLASS_KEY = "classKey";
97
    protected static final String ORDER_KEY = "orderKey";
98
    protected static final String FAMILY_KEY = "familyKey";
99
    protected static final String GENUS_KEY = "genusKey";
100
    protected static final String SPECIES_KEY = "speciesKey";
101
    protected static final String SCIENTIFIC_NAME = "scientificName";
102
    protected static final String KINGDOM =  "kingdom";
103
    protected static final String PHYLUM = "phylum";
104
    protected static final String ORDER = "order";
105
    protected static final String FAMILY  = "family";
106
    protected static final String GENUS = "genus";
107
    protected static final String SPECIES = "species";
108
    protected static final String GENERIC_NAME = "genericName";
109
    protected static final String SPECIFIC_EPITHET = "specificEpithet";
110
    protected static final String INFRASPECIFIC_EPITHET = "infraspecificEpithet";
111
    protected static final String TAXON_RANK = "taxonRank";
112
    protected static final String DATE_IDENTIFIED = "dateIdentified";
113
    protected static final String SCIENTIFIC_NAME_AUTHORSHIP = "scientificNameAuthorship";
114

    
115
    protected static final String ELEVATION = "elevation";
116
    protected static final String CONITNENT = "continent";
117
    protected static final String STATE_PROVINCE = "stateProvince";
118

    
119

    
120

    
121

    
122
    protected static final String ISSUES = "issues";
123
    protected static final String LAST_INTERPRETED = "lastInterpreted";
124
    protected static final String IDENTIFIERS = "identifiers";
125
    protected static final String FACTS = "facts";
126
    protected static final String RELATIONS = "relations";
127
    protected static final String GEODETICDATUM = "geodeticDatum";
128
    protected static final String CLASS = "class";
129

    
130
    protected static final String COUNTRY = "country";
131
    protected static final String NOMENCLATURAL_STATUS = "nomenclaturalStatus";
132
    protected static final String RIGHTSHOLDER = "rightsHolder";
133
    protected static final String IDEMTIFIER = "identifier";
134

    
135
    protected static final String NOMENCLATURALCODE = "nomenclaturalCode";
136
    protected static final String COUNTY = "county";
137

    
138
    protected static final String DATASET_NAME = "datasetName";
139
    protected static final String GBIF_ID = "gbifID";
140

    
141
    protected static final String OCCURENCE_ID = "occurrenceID";
142

    
143
    protected static final String TAXON_ID = "taxonID";
144
    protected static final String LICENCE = "license";
145

    
146
    protected static final String OWNER_INSTITUTION_CODE = "ownerInstitutionCode";
147
    protected static final String BIBLIOGRAPHIC_CITATION = "bibliographicCitation";
148
    protected static final String IDENTIFIED_BY = "identifiedBy";
149
    protected static final String COLLECTION_ID = "collectionID";
150

    
151
    private static final String PLANTAE = "Plantae";
152

    
153
    private static final String ANIMALIA = "Animalia";
154

    
155
    private static final String FUNGI = "Fungi";
156

    
157
    private static final String BACTERIA = "Bacteria";
158

    
159
    private static final String MULTIMEDIA = "media";
160

    
161

    
162

    
163

    
164

    
165

    
166
    /**
167
     * Parses the given {@link String} for occurrences.<br>
168
     * Note: The data structure of the GBIF response should not be changed.
169
     * @param jsonString JSON data as a String
170
     * @return the found occurrences as a collection of {@link GbifResponse}
171
     */
172
    public static Collection<GbifResponse> parseJsonRecords(String jsonString) {
173
        return parseJsonRecords(JSONObject.fromObject(jsonString));
174
    }
175

    
176
    /**
177
     * Parses the given {@link InputStream} for occurrences.
178
     * @param jsonString JSON data as an InputStream
179
     * @return the found occurrences as a collection of {@link GbifResponse}
180
     */
181
    public static Collection<GbifResponse> parseJsonRecords(InputStream inputStream) throws IOException{
182
        StringWriter stringWriter = new StringWriter();
183
        IOUtils.copy(inputStream, stringWriter);
184
        return parseJsonRecords(stringWriter.toString());
185
    }
186

    
187
    /**
188
     * Parses the given {@link JSONObject} for occurrences.<br>
189
     * Note: The data structure of the GBIF response should not be changed.
190
     * @param jsonString JSON data as an JSONObject
191
     * @return the found occurrences as a collection of {@link GbifResponse}
192
     */
193
    public static Collection<GbifResponse> parseJsonRecords(JSONObject jsonObject){
194
        return parseJsonRecords(jsonObject.getJSONArray("results"));
195
    }
196

    
197
    /**
198
     * Parses the given {@link JSONArray} for occurrences.
199
     * @param jsonString JSON data as an {@link JSONArray}
200
     * @return the found occurrences as a collection of {@link GbifResponse}
201
     */
202
    private static Collection<GbifResponse> parseJsonRecords(JSONArray jsonArray) {
203
        Collection<GbifResponse> results = new ArrayList<GbifResponse>();
204
        String[] tripleId = new String[3];
205
        String string;
206
        for(Object o:jsonArray){
207
            //parse every record
208
            tripleId = new String[3];
209
            if(o instanceof JSONObject){
210
                String dataSetKey = null;
211
                GbifDataSetProtocol dataSetProtocol = null;
212
                DerivedUnitFacade derivedUnitFacade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen);
213
                TaxonName name = null;
214
                JSONObject record = (JSONObject)o;
215

    
216
                if(record.has(DATASET_PROTOCOL)){
217
                    dataSetProtocol = GbifDataSetProtocol.parseProtocol(record.getString(DATASET_PROTOCOL));
218
                }
219
                if(record.has(DATASET_KEY)){
220
                    dataSetKey = record.getString(DATASET_KEY);
221
                }
222
                if(record.has(COUNTRY_CODE)){
223
                    string = record.getString(COUNTRY_CODE);
224
                    Country country = Country.getCountryByIso3166A2(string);
225
                    if(country!=null){
226
                        derivedUnitFacade.setCountry(country);
227
                    }
228
                }
229
                if(record.has(LOCALITY)){
230
                    string = record.getString(LOCALITY);
231
                    derivedUnitFacade.setLocality(string);
232
                }
233

    
234
                if (record.has("species")){
235
                    Rank rank = null;
236

    
237
                    if (record.has(TAXON_RANK)){
238
                        string= record.getString(TAXON_RANK);
239
                        try {
240
                            rank = Rank.getRankByName(string);
241
                        } catch (UnknownCdmTypeException e) {
242
                            // TODO Auto-generated catch block
243
                            e.printStackTrace();
244
                        }
245
                    }
246
                    if (rank != null){
247
                        if (record.has(NOMENCLATURALCODE)){
248
                            string = record.getString(NOMENCLATURALCODE);
249

    
250
                            if (string.equals(NomenclaturalCode.ICZN.getTitleCache())){
251
                                name = TaxonNameFactory.NewZoologicalInstance(rank);
252
                            } else if (string.equals(NomenclaturalCode.ICNAFP.getTitleCache())) {
253
                                name = TaxonNameFactory.NewBotanicalInstance(rank);
254
                            } else if (string.equals(NomenclaturalCode.ICNB.getTitleCache())){
255
                                name = TaxonNameFactory.NewBacterialInstance(rank);
256
                            } else if (string.equals(NomenclaturalCode.ICNCP.getTitleCache())){
257
                                name = TaxonNameFactory.NewCultivarInstance(rank);
258
                            } else if (string.equals(NomenclaturalCode.ICVCN.getTitleCache())){
259
                                name = TaxonNameFactory.NewViralInstance(rank);
260
                            } else if (string.equals("ICN")){
261
                                name = TaxonNameFactory.NewBotanicalInstance(rank);
262
                            }
263
                        }else {
264
                            if (record.has(KINGDOM)){
265
                                if (record.getString(KINGDOM).equals(PLANTAE)){
266
                                    name = TaxonNameFactory.NewBotanicalInstance(rank);
267
                                } else if (record.getString(KINGDOM).equals(ANIMALIA)){
268
                                    name = TaxonNameFactory.NewZoologicalInstance(rank);
269
                                } else if (record.getString(KINGDOM).equals(FUNGI)){
270
                                    name = TaxonNameFactory.NewBotanicalInstance(rank);
271
                                } else if (record.getString(KINGDOM).equals(BACTERIA)){
272
                                    name = TaxonNameFactory.NewBacterialInstance(rank);
273
                                } else{
274
                                    name = TaxonNameFactory.NewNonViralInstance(rank);
275
                                }
276
                            } else{
277
                                name = TaxonNameFactory.NewNonViralInstance(rank);
278
                            }
279
                        }
280
                        if (name == null){
281
                            name = TaxonNameFactory.NewNonViralInstance(rank);
282
                        }
283
                        if (record.has(GENUS)){
284
                            name.setGenusOrUninomial(record.getString(GENUS));
285
                        }
286
                        if (record.has(SPECIFIC_EPITHET)){
287
                            name.setSpecificEpithet(record.getString(SPECIFIC_EPITHET));
288
                        }
289
                        if (record.has(INFRASPECIFIC_EPITHET)){
290
                            name.setInfraSpecificEpithet(record.getString(INFRASPECIFIC_EPITHET));
291
                        }
292
                        if (record.has(SCIENTIFIC_NAME)){
293
                            name.setTitleCache(record.getString(SCIENTIFIC_NAME), true);
294
                        }
295

    
296
                    }
297
                    DeterminationEvent detEvent = DeterminationEvent.NewInstance();
298

    
299
                    if (record.has(IDENTIFIED_BY)){
300
                        Person determiner = Person.NewTitledInstance(record.getString(IDENTIFIED_BY));
301
                        detEvent.setDeterminer(determiner);
302

    
303
                    }
304
                    detEvent.setTaxonName(name);
305
                    detEvent.setPreferredFlag(true);
306
                    derivedUnitFacade.addDetermination(detEvent);
307

    
308
                }
309

    
310

    
311

    
312
                // GPS location
313
                Point location = Point.NewInstance();
314
                derivedUnitFacade.setExactLocation(location);
315
                try {
316
                    if(record.has(LATITUDE)){
317
                        String lat = record.getString(LATITUDE);
318
                        location.setLatitudeByParsing(lat);
319
                    }
320
                    if(record.has(LONGITUDE)){
321
                        String lon = record.getString(LONGITUDE);
322
                        location.setLongitudeByParsing(lon);
323
                    }
324
                } catch (ParseException e) {
325
                    logger.error("Could not parse GPS coordinates", e);
326
                }
327
                if(record.has(GEOREFERENCE_PROTOCOL)){
328
                    String geo = record.getString(GEOREFERENCE_PROTOCOL);
329
                    ReferenceSystem referenceSystem = null;
330
                    //TODO: Is there another way than string comparison
331
                    //to check which reference system is used?
332
                    if(ReferenceSystem.WGS84().getLabel().contains(geo)){
333
                        referenceSystem = ReferenceSystem.WGS84();
334
                    }
335
                    else if(ReferenceSystem.GOOGLE_EARTH().getLabel().contains(geo)){
336
                        referenceSystem = ReferenceSystem.GOOGLE_EARTH();
337
                    }
338
                    else if(ReferenceSystem.GAZETTEER().getLabel().contains(geo)){
339
                        referenceSystem = ReferenceSystem.GAZETTEER();
340
                    }
341
                    location.setReferenceSystem(referenceSystem);
342
                }
343

    
344
                if(record.has(ELEVATION)){
345
                    try {
346
                        //parse integer and strip of unit
347
                        string = record.getString(ELEVATION);
348
                        int length = string.length();
349
                        StringBuilder builder = new StringBuilder();
350
                        for(int i=0;i<length;i++){
351
                            if(Character.isDigit(string.charAt(i))){
352
                                builder.append(string.charAt(i));
353
                            }
354
                            else{
355
                                break;
356
                            }
357
                        }
358
                        derivedUnitFacade.setAbsoluteElevation(Integer.parseInt(builder.toString()));
359
                    } catch (NumberFormatException e) {
360
                        logger.warn("Could not parse elevation", e);
361
                    }
362
                }
363

    
364
                //Date (Gathering Period)
365
                TimePeriod timePeriod = TimePeriod.NewInstance();
366
                derivedUnitFacade.setGatheringPeriod(timePeriod);
367
                //TODO what happens with eventDate??
368
                if(record.has(YEAR)){
369
                    timePeriod.setStartYear(record.getInt(YEAR));
370
                }
371
                if(record.has(MONTH)){
372
                    timePeriod.setStartMonth(record.getInt(MONTH));
373
                }
374
                if(record.has(DAY)){
375
                    timePeriod.setStartDay(record.getInt(DAY));
376
                }
377
                if(record.has(RECORDED_BY)){
378
                    Person person = Person.NewTitledInstance(record.getString(RECORDED_BY));
379
                    //FIXME check data base if collector already present
380
                    derivedUnitFacade.setCollector(person);
381
                }
382

    
383
                //collector number (fieldNumber OR recordNumber)
384
                if(record.has(FIELD_NUMBER)){
385
                    derivedUnitFacade.setFieldNumber(record.getString(FIELD_NUMBER));
386
                }
387
                //collector number (fieldNumber OR recordNumber)
388
                if(record.has(RECORD_NUMBER)){
389
                    derivedUnitFacade.setFieldNumber(record.getString(RECORD_NUMBER));
390
                }
391

    
392
                if(record.has(EVENT_REMARKS)){
393
                    derivedUnitFacade.setGatheringEventDescription(record.getString(EVENT_REMARKS));
394
                }
395
                if(record.has(OCCURRENCE_REMARKS)){
396
                    derivedUnitFacade.setEcology(record.getString(OCCURRENCE_REMARKS));
397
                }
398
                if(record.has(COLLECTION_CODE)){
399
                    String collectionCode = record.getString(COLLECTION_CODE);
400
                    tripleId[2] = collectionCode;
401
                    //FIXME: check data base for existing collections
402
                    eu.etaxonomy.cdm.model.occurrence.Collection collection = eu.etaxonomy.cdm.model.occurrence.Collection.NewInstance();
403
                    collection.setCode(collectionCode);
404
                    if(record.has(INSTITUTION_CODE)){
405
                        Institution institution = Institution.NewNamedInstance(record.getString(INSTITUTION_CODE));
406
                        institution.setCode(record.getString(INSTITUTION_CODE));
407
                        collection.setInstitute(institution);
408
                    }
409
                    derivedUnitFacade.setCollection(collection);
410
                }
411
                if(record.has(CATALOG_NUMBER)){
412
                    derivedUnitFacade.setCatalogNumber(record.getString(CATALOG_NUMBER));
413
                    derivedUnitFacade.setAccessionNumber(record.getString(CATALOG_NUMBER));
414
                    tripleId[0]= record.getString(CATALOG_NUMBER);
415
                }
416
                if(record.has(INSTITUTION_CODE)){
417
                    derivedUnitFacade.setAccessionNumber(record.getString(INSTITUTION_CODE));
418
                    tripleId[1]= record.getString(INSTITUTION_CODE);
419
                }
420

    
421
                if (record.has(OCCURENCE_ID)){
422
                    IdentifiableSource source = IdentifiableSource.NewDataImportInstance((record.getString(OCCURENCE_ID)));
423
                    derivedUnitFacade.addSource(source);
424
                }
425

    
426
                if (record.has(MULTIMEDIA)){
427
                    //http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
428
                    JSONArray multimediaArray = record.getJSONArray(MULTIMEDIA);
429
                    JSONObject mediaRecord;
430
                    Media media;
431
                    URI uri = null;
432
                    ImageInfo imageInf = null;
433
                    MediaRepresentation representation = null;
434
                    SpecimenOrObservationType type = null;
435
                    for(Object object:multimediaArray){
436
                        //parse every record
437
                       media = Media.NewInstance();
438
                       uri = null;
439
                       imageInf = null;
440

    
441
                        if(object instanceof JSONObject){
442
                            mediaRecord = (JSONObject) object;
443

    
444
                            if (mediaRecord.has("identifier")){
445
                                try {
446
                                    uri = new URI(mediaRecord.getString("identifier"));
447
                                    imageInf = ImageInfo.NewInstance(uri, 0);
448
                                } catch (URISyntaxException |IOException | HttpException e) {
449
                                    e.printStackTrace();
450
                                }
451
                               // media.addIdentifier(mediaRecord.getString("identifier"), null);
452
                            }
453
                            if (mediaRecord.has("references")){
454

    
455

    
456
                            }
457
                            if (mediaRecord.has("format")){
458

    
459
                            }
460
                            if (mediaRecord.has("type")){
461
                                if (mediaRecord.get("type").equals("StillImage")){
462
                                    type = SpecimenOrObservationType.StillImage;
463
                                }
464
                            }
465

    
466
                            }
467
                            ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInf);
468
                            representation = MediaRepresentation.NewInstance();
469

    
470
                            representation.addRepresentationPart(imageFile);
471
                            media.addRepresentation(representation);
472

    
473
                            derivedUnitFacade.addDerivedUnitMedia(media);
474
                        }
475
                    //identifier=http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
476
                   //references=http://ww2.bgbm.org/herbarium/view_biocase.cfm?SpecimenPK=136628
477
                    //format=image/jpeg
478
                    //type=StillImage
479

    
480
                }
481

    
482
                // create dataset URL
483
                URI uri = null;
484
                try {
485
                    uri = UriUtils.createUri(new URL(GbifQueryServiceWrapper.BASE_URL), "/v1/dataset/"+dataSetKey+"/endpoint", null, null);
486
                } catch (MalformedURLException e) {
487
                    logger.error("Endpoint URI could not be created!", e);
488
                } catch (URISyntaxException e) {
489
                    logger.error("Endpoint URI could not be created!", e);
490
                }
491
                results.add(new GbifResponse(derivedUnitFacade, uri, dataSetProtocol, tripleId, name));
492
            }
493
        }
494
        return results;
495
    }
496

    
497
    public static DataSetResponse parseOriginalDataSetUri(InputStream inputStream) throws IOException {
498
        StringWriter stringWriter = new StringWriter();
499
        IOUtils.copy(inputStream, stringWriter);
500
        return parseOriginalDataSetUri(stringWriter.toString());
501
    }
502

    
503
    public static DataSetResponse parseOriginalDataSetUri(String jsonString) {
504
        DataSetResponse response = new DataSetResponse();
505
        JSONArray jsonArray = JSONArray.fromObject(jsonString);
506
        Object next = jsonArray.iterator().next();
507
        if(next instanceof JSONObject){
508
            JSONObject jsonObject = (JSONObject)next;
509
            if(jsonObject.has(URL)){
510
                response.setEndpoint(URI.create(jsonObject.getString(URL)));
511
            }
512
            if(jsonObject.has(TYPE)){
513
                response.setProtocol(GbifDataSetProtocol.parseProtocol(jsonObject.getString(TYPE)));
514
            }
515
        }
516
        return response;
517
    }
518

    
519
}
(3-3/6)