Project

General

Profile

Download (24.1 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2014 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.ext.occurrence.gbif;
10

    
11
import java.io.IOException;
12
import java.io.InputStream;
13
import java.io.StringWriter;
14
import java.net.MalformedURLException;
15
import java.net.URI;
16
import java.net.URISyntaxException;
17
import java.net.URL;
18
import java.text.ParseException;
19
import java.util.ArrayList;
20
import java.util.Collection;
21

    
22
import net.sf.json.JSONArray;
23
import net.sf.json.JSONObject;
24

    
25
import org.apache.commons.io.IOUtils;
26
import org.apache.http.HttpException;
27
import org.apache.log4j.Logger;
28

    
29
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
30
import eu.etaxonomy.cdm.common.UriUtils;
31
import eu.etaxonomy.cdm.common.media.ImageInfo;
32
import eu.etaxonomy.cdm.model.agent.Institution;
33
import eu.etaxonomy.cdm.model.agent.Person;
34
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
35
import eu.etaxonomy.cdm.model.common.TimePeriod;
36
import eu.etaxonomy.cdm.model.location.Country;
37
import eu.etaxonomy.cdm.model.location.Point;
38
import eu.etaxonomy.cdm.model.location.ReferenceSystem;
39
import eu.etaxonomy.cdm.model.media.ImageFile;
40
import eu.etaxonomy.cdm.model.media.Media;
41
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
42
import eu.etaxonomy.cdm.model.name.BacterialName;
43
import eu.etaxonomy.cdm.model.name.BotanicalName;
44
import eu.etaxonomy.cdm.model.name.CultivarPlantName;
45
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
46
import eu.etaxonomy.cdm.model.name.NonViralName;
47
import eu.etaxonomy.cdm.model.name.Rank;
48
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
49
import eu.etaxonomy.cdm.model.name.ViralName;
50
import eu.etaxonomy.cdm.model.name.ZoologicalName;
51
import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
52
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
53
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
54

    
55
/**
56
 * Utility class which provides the functionality to convert a JSON response
57
 * resulting from a GBIF query for occurrences to the corresponding CDM entities.
58
 * @author pplitzner
59
 * @date 22.05.2014
60
 *
61
 */
62
public class GbifJsonOccurrenceParser {
63

    
64
    private static final Logger logger = Logger.getLogger(GbifJsonOccurrenceParser.class);
65

    
66
    private static final String DATASET_KEY = "datasetKey";
67
    private static final String DATASET_PROTOCOL = "protocol";
68

    
69
    private static final String KEY = "key";
70
    private static final String URL = "url";
71
    private static final String TYPE = "type";
72

    
73
    private static final String COUNTRY_CODE = "countryCode";
74
    private static final String LOCALITY = "locality";
75
    private static final String LONGITUDE = "decimalLongitude";
76
    private static final String LATITUDE = "decimalLatitude";
77
    private static final String GEOREFERENCE_PROTOCOL = "georeferenceProtocol";//reference system
78
    private static final String VERBATIM_ELEVATION = "verbatimElevation";
79
    private static final String YEAR = "year";
80
    private static final String MONTH = "month";
81
    private static final String DAY = "day";
82
    private static final String EVENT_DATE= "eventDate";
83
    private static final String RECORDED_BY= "recordedBy";//collector
84
    private static final String RECORD_NUMBER = "recordNumber";//collector number
85
    private static final String FIELD_NUMBER = "fieldNumber";//collector number
86
    private static final String EVENT_REMARKS = "eventRemarks";//gathering event description
87
    private static final String OCCURRENCE_REMARKS = "occurrenceRemarks";//ecology
88
    private static final String COLLECTION_CODE = "collectionCode";
89
    private static final String CATALOG_NUMBER = "catalogNumber";//accession number
90
    private static final String INSTITUTION_CODE = "institutionCode";
91

    
92

    
93
    protected static final String PUBLISHING_ORG_KEY = "publishingOrgKey";
94
    protected static final String PUBLISHING_COUNTRY = "publishingCountry";
95

    
96
    protected static final String EXTENSIONS = "extensions";
97
    protected static final String BASIS_OF_RECORD = "basisOfRecord";
98
    protected static final String INDIVIDUAL_COUNT = "individualCount";
99
    protected static final String TAXONKEY = "taxonKey";
100
    protected static final String KINGDOM_KEY = "kingdomKey";
101
    protected static final String PHYLUM_KEY = "phylumKey";
102
    protected static final String CLASS_KEY = "classKey";
103
    protected static final String ORDER_KEY = "orderKey";
104
    protected static final String FAMILY_KEY = "familyKey";
105
    protected static final String GENUS_KEY = "genusKey";
106
    protected static final String SPECIES_KEY = "speciesKey";
107
    protected static final String SCIENTIFIC_NAME = "scientificName";
108
    protected static final String KINGDOM =  "kingdom";
109
    protected static final String PHYLUM = "phylum";
110
    protected static final String ORDER = "order";
111
    protected static final String FAMILY  = "family";
112
    protected static final String GENUS = "genus";
113
    protected static final String SPECIES = "species";
114
    protected static final String GENERIC_NAME = "genericName";
115
    protected static final String SPECIFIC_EPITHET = "specificEpithet";
116
    protected static final String INFRASPECIFIC_EPITHET = "infraspecificEpithet";
117
    protected static final String TAXON_RANK = "taxonRank";
118
    protected static final String DATE_IDENTIFIED = "dateIdentified";
119
    protected static final String SCIENTIFIC_NAME_AUTHORSHIP = "scientificNameAuthorship";
120

    
121
    protected static final String ELEVATION = "elevation";
122
    protected static final String CONITNENT = "continent";
123
    protected static final String STATE_PROVINCE = "stateProvince";
124

    
125

    
126

    
127

    
128
    protected static final String ISSUES = "issues";
129
    protected static final String LAST_INTERPRETED = "lastInterpreted";
130
    protected static final String IDENTIFIERS = "identifiers";
131
    protected static final String FACTS = "facts";
132
    protected static final String RELATIONS = "relations";
133
    protected static final String GEODETICDATUM = "geodeticDatum";
134
    protected static final String CLASS = "class";
135

    
136
    protected static final String COUNTRY = "country";
137
    protected static final String NOMENCLATURAL_STATUS = "nomenclaturalStatus";
138
    protected static final String RIGHTSHOLDER = "rightsHolder";
139
    protected static final String IDEMTIFIER = "identifier";
140

    
141
    protected static final String NOMENCLATURALCODE = "nomenclaturalCode";
142
    protected static final String COUNTY = "county";
143

    
144
    protected static final String DATASET_NAME = "datasetName";
145
    protected static final String GBIF_ID = "gbifID";
146

    
147
    protected static final String OCCURENCE_ID = "occurrenceID";
148

    
149
    protected static final String TAXON_ID = "taxonID";
150
    protected static final String LICENCE = "license";
151

    
152
    protected static final String OWNER_INSTITUTION_CODE = "ownerInstitutionCode";
153
    protected static final String BIBLIOGRAPHIC_CITATION = "bibliographicCitation";
154
    protected static final String IDENTIFIED_BY = "identifiedBy";
155
    protected static final String COLLECTION_ID = "collectionID";
156

    
157
    private static final String PLANTAE = "Plantae";
158

    
159
    private static final String ANIMALIA = "Animalia";
160

    
161
    private static final String FUNGI = "Fungi";
162

    
163
    private static final String BACTERIA = "Bacteria";
164

    
165
    private static final String MULTIMEDIA = "media";
166

    
167

    
168

    
169

    
170

    
171

    
172
    /**
173
     * Parses the given {@link String} for occurrences.<br>
174
     * Note: The data structure of the GBIF response should not be changed.
175
     * @param jsonString JSON data as a String
176
     * @return the found occurrences as a collection of {@link GbifResponse}
177
     */
178
    public static Collection<GbifResponse> parseJsonRecords(String jsonString) {
179
        return parseJsonRecords(JSONObject.fromObject(jsonString));
180
    }
181

    
182
    /**
183
     * Parses the given {@link InputStream} for occurrences.
184
     * @param jsonString JSON data as an InputStream
185
     * @return the found occurrences as a collection of {@link GbifResponse}
186
     */
187
    public static Collection<GbifResponse> parseJsonRecords(InputStream inputStream) throws IOException{
188
        StringWriter stringWriter = new StringWriter();
189
        IOUtils.copy(inputStream, stringWriter);
190
        return parseJsonRecords(stringWriter.toString());
191
    }
192

    
193
    /**
194
     * Parses the given {@link JSONObject} for occurrences.<br>
195
     * Note: The data structure of the GBIF response should not be changed.
196
     * @param jsonString JSON data as an JSONObject
197
     * @return the found occurrences as a collection of {@link GbifResponse}
198
     */
199
    public static Collection<GbifResponse> parseJsonRecords(JSONObject jsonObject){
200
        return parseJsonRecords(jsonObject.getJSONArray("results"));
201
    }
202

    
203
    /**
204
     * Parses the given {@link JSONArray} for occurrences.
205
     * @param jsonString JSON data as an {@link JSONArray}
206
     * @return the found occurrences as a collection of {@link GbifResponse}
207
     */
208
    private static Collection<GbifResponse> parseJsonRecords(JSONArray jsonArray) {
209
        Collection<GbifResponse> results = new ArrayList<GbifResponse>();
210
        String[] tripleId = new String[3];
211
        String string;
212
        for(Object o:jsonArray){
213
            //parse every record
214
            tripleId = new String[3];
215
            if(o instanceof JSONObject){
216
                String dataSetKey = null;
217
                GbifDataSetProtocol dataSetProtocol = null;
218
                DerivedUnitFacade derivedUnitFacade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen);
219
                TaxonNameBase name = null;
220
                JSONObject record = (JSONObject)o;
221

    
222
                if(record.has(DATASET_PROTOCOL)){
223
                    dataSetProtocol = GbifDataSetProtocol.parseProtocol(record.getString(DATASET_PROTOCOL));
224
                }
225
                if(record.has(DATASET_KEY)){
226
                    dataSetKey = record.getString(DATASET_KEY);
227
                }
228
                if(record.has(COUNTRY_CODE)){
229
                    string = record.getString(COUNTRY_CODE);
230
                    Country country = Country.getCountryByIso3166A2(string);
231
                    if(country!=null){
232
                        derivedUnitFacade.setCountry(country);
233
                    }
234
                }
235
                if(record.has(LOCALITY)){
236
                    string = record.getString(LOCALITY);
237
                    derivedUnitFacade.setLocality(string);
238
                }
239

    
240
                if (record.has("species")){
241
                    Rank rank = null;
242

    
243
                    if (record.has(TAXON_RANK)){
244
                        string= record.getString(TAXON_RANK);
245
                        try {
246
                            rank = Rank.getRankByName(string);
247
                        } catch (UnknownCdmTypeException e) {
248
                            // TODO Auto-generated catch block
249
                            e.printStackTrace();
250
                        }
251
                    }
252
                    if (rank != null){
253
                        if (record.has(NOMENCLATURALCODE)){
254
                            string = record.getString(NOMENCLATURALCODE);
255

    
256
                            if (string.equals(NomenclaturalCode.ICZN.getTitleCache())){
257
                                name = ZoologicalName.NewInstance(rank);
258
                            } else if (string.equals(NomenclaturalCode.ICNAFP.getTitleCache())) {
259
                                name = BotanicalName.NewInstance(rank);
260
                            } else if (string.equals(NomenclaturalCode.ICNB.getTitleCache())){
261
                                name = BacterialName.NewInstance(rank);
262
                            } else if (string.equals(NomenclaturalCode.ICNCP.getTitleCache())){
263
                                name = CultivarPlantName.NewInstance(rank);
264
                            } else if (string.equals(NomenclaturalCode.ICVCN.getTitleCache())){
265
                                name = ViralName.NewInstance(rank);
266
                            } else {
267
                                name = NonViralName.NewInstance(rank);
268
                            }
269
                        }else {
270
                            if (record.has(KINGDOM)){
271
                                if (record.getString(KINGDOM).equals(PLANTAE)){
272
                                    name = BotanicalName.NewInstance(rank);
273
                                } else if (record.getString(KINGDOM).equals(ANIMALIA)){
274
                                    name = ZoologicalName.NewInstance(rank);
275
                                } else if (record.getString(KINGDOM).equals(FUNGI)){
276
                                    name = NonViralName.NewInstance(rank);
277
                                } else if (record.getString(KINGDOM).equals(BACTERIA)){
278
                                    name = BacterialName.NewInstance(rank);
279
                                } else{
280
                                    name = NonViralName.NewInstance(rank);
281
                                }
282
                            } else{
283
                                name = NonViralName.NewInstance(rank);
284
                            }
285
                        }
286
                        if (record.has(GENUS)){
287
                            ((NonViralName)name).setGenusOrUninomial(record.getString(GENUS));
288
                        }
289
                        if (record.has(SPECIFIC_EPITHET)){
290
                            ((NonViralName)name).setSpecificEpithet(record.getString(SPECIFIC_EPITHET));
291
                        }
292
                        if (record.has(INFRASPECIFIC_EPITHET)){
293
                            ((NonViralName)name).setInfraSpecificEpithet(record.getString(INFRASPECIFIC_EPITHET));
294
                        }
295
                        if (record.has(SCIENTIFIC_NAME)){
296
                            name.setTitleCache(record.getString(SCIENTIFIC_NAME), true);
297
                        }
298

    
299
                    }
300
                    DeterminationEvent detEvent = DeterminationEvent.NewInstance();
301

    
302
                    if (record.has(IDENTIFIED_BY)){
303
                        Person determiner = Person.NewTitledInstance(record.getString(IDENTIFIED_BY));
304
                        detEvent.setDeterminer(determiner);
305

    
306
                    }
307
                    detEvent.setTaxonName(name);
308
                    detEvent.setPreferredFlag(true);
309
                    derivedUnitFacade.addDetermination(detEvent);
310

    
311
                }
312

    
313

    
314

    
315
                // GPS location
316
                Point location = Point.NewInstance();
317
                derivedUnitFacade.setExactLocation(location);
318
                try {
319
                    if(record.has(LATITUDE)){
320
                        String lat = record.getString(LATITUDE);
321
                        location.setLatitudeByParsing(lat);
322
                    }
323
                    if(record.has(LONGITUDE)){
324
                        String lon = record.getString(LONGITUDE);
325
                        location.setLongitudeByParsing(lon);
326
                    }
327
                } catch (ParseException e) {
328
                    logger.error("Could not parse GPS coordinates", e);
329
                }
330
                if(record.has(GEOREFERENCE_PROTOCOL)){
331
                    String geo = record.getString(GEOREFERENCE_PROTOCOL);
332
                    ReferenceSystem referenceSystem = null;
333
                    //TODO: Is there another way than string comparison
334
                    //to check which reference system is used?
335
                    if(ReferenceSystem.WGS84().getLabel().contains(geo)){
336
                        referenceSystem = ReferenceSystem.WGS84();
337
                    }
338
                    else if(ReferenceSystem.GOOGLE_EARTH().getLabel().contains(geo)){
339
                        referenceSystem = ReferenceSystem.GOOGLE_EARTH();
340
                    }
341
                    else if(ReferenceSystem.GAZETTEER().getLabel().contains(geo)){
342
                        referenceSystem = ReferenceSystem.GAZETTEER();
343
                    }
344
                    location.setReferenceSystem(referenceSystem);
345
                }
346

    
347
                if(record.has(ELEVATION)){
348
                    try {
349
                        //parse integer and strip of unit
350
                        string = record.getString(ELEVATION);
351
                        int length = string.length();
352
                        StringBuilder builder = new StringBuilder();
353
                        for(int i=0;i<length;i++){
354
                            if(Character.isDigit(string.charAt(i))){
355
                                builder.append(string.charAt(i));
356
                            }
357
                            else{
358
                                break;
359
                            }
360
                        }
361
                        derivedUnitFacade.setAbsoluteElevation(Integer.parseInt(builder.toString()));
362
                    } catch (NumberFormatException e) {
363
                        logger.warn("Could not parse elevation", e);
364
                    }
365
                }
366

    
367
                //Date (Gathering Period)
368
                TimePeriod timePeriod = TimePeriod.NewInstance();
369
                derivedUnitFacade.setGatheringPeriod(timePeriod);
370
                //TODO what happens with eventDate??
371
                if(record.has(YEAR)){
372
                    timePeriod.setStartYear(record.getInt(YEAR));
373
                }
374
                if(record.has(MONTH)){
375
                    timePeriod.setStartMonth(record.getInt(MONTH));
376
                }
377
                if(record.has(DAY)){
378
                    timePeriod.setStartDay(record.getInt(DAY));
379
                }
380
                if(record.has(RECORDED_BY)){
381
                    Person person = Person.NewTitledInstance(record.getString(RECORDED_BY));
382
                    //FIXME check data base if collector already present
383
                    derivedUnitFacade.setCollector(person);
384
                }
385

    
386
                //collector number (fieldNumber OR recordNumber)
387
                if(record.has(FIELD_NUMBER)){
388
                    derivedUnitFacade.setFieldNumber(record.getString(FIELD_NUMBER));
389
                }
390
                //collector number (fieldNumber OR recordNumber)
391
                if(record.has(RECORD_NUMBER)){
392
                    derivedUnitFacade.setFieldNumber(record.getString(RECORD_NUMBER));
393
                }
394

    
395
                if(record.has(EVENT_REMARKS)){
396
                    derivedUnitFacade.setGatheringEventDescription(record.getString(EVENT_REMARKS));
397
                }
398
                if(record.has(OCCURRENCE_REMARKS)){
399
                    derivedUnitFacade.setEcology(record.getString(OCCURRENCE_REMARKS));
400
                }
401
                if(record.has(COLLECTION_CODE)){
402
                    String collectionCode = record.getString(COLLECTION_CODE);
403
                    tripleId[2] = collectionCode;
404
                    //FIXME: check data base for existing collections
405
                    eu.etaxonomy.cdm.model.occurrence.Collection collection = eu.etaxonomy.cdm.model.occurrence.Collection.NewInstance();
406
                    collection.setCode(collectionCode);
407
                    if(record.has(INSTITUTION_CODE)){
408
                        Institution institution = Institution.NewNamedInstance(record.getString(INSTITUTION_CODE));
409
                        institution.setCode(record.getString(INSTITUTION_CODE));
410
                        collection.setInstitute(institution);
411
                    }
412
                    derivedUnitFacade.setCollection(collection);
413
                }
414
                if(record.has(CATALOG_NUMBER)){
415
                    derivedUnitFacade.setCatalogNumber(record.getString(CATALOG_NUMBER));
416
                    derivedUnitFacade.setAccessionNumber(record.getString(CATALOG_NUMBER));
417
                    tripleId[0]= record.getString(CATALOG_NUMBER);
418
                }
419
                if(record.has(INSTITUTION_CODE)){
420
                    derivedUnitFacade.setAccessionNumber(record.getString(INSTITUTION_CODE));
421
                    tripleId[1]= record.getString(INSTITUTION_CODE);
422
                }
423

    
424
                if (record.has(OCCURENCE_ID)){
425
                    IdentifiableSource source = IdentifiableSource.NewDataImportInstance((record.getString(OCCURENCE_ID)));
426
                    derivedUnitFacade.addSource(source);
427
                }
428

    
429
                if (record.has(MULTIMEDIA)){
430
                    //http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
431
                    JSONArray multimediaArray = record.getJSONArray(MULTIMEDIA);
432
                    JSONObject mediaRecord;
433
                    Media media;
434
                    URI uri = null;
435
                    ImageInfo imageInf = null;
436
                    MediaRepresentation representation = null;
437
                    SpecimenOrObservationType type = null;
438
                    for(Object object:multimediaArray){
439
                        //parse every record
440
                       media = Media.NewInstance();
441
                       uri = null;
442
                       imageInf = null;
443

    
444
                        if(object instanceof JSONObject){
445
                            mediaRecord = (JSONObject) object;
446

    
447
                            if (mediaRecord.has("identifier")){
448
                                try {
449
                                    uri = new URI(mediaRecord.getString("identifier"));
450
                                    imageInf = ImageInfo.NewInstance(uri, 0);
451

    
452
                                } catch (URISyntaxException |IOException | HttpException e) {
453
                                    // TODO Auto-generated catch block
454
                                    e.printStackTrace();
455
                                }
456
                               // media.addIdentifier(mediaRecord.getString("identifier"), null);
457
                            }
458
                            if (mediaRecord.has("references")){
459

    
460

    
461
                            }
462
                            if (mediaRecord.has("format")){
463

    
464
                            }
465
                            if (mediaRecord.has("type")){
466
                                if (mediaRecord.get("type").equals("StillImage")){
467
                                    type = SpecimenOrObservationType.StillImage;
468
                                }
469
                            }
470

    
471
                            }
472
                            ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInf);
473
                            representation = MediaRepresentation.NewInstance();
474

    
475
                            representation.addRepresentationPart(imageFile);
476
                            media.addRepresentation(representation);
477

    
478
                            derivedUnitFacade.addDerivedUnitMedia(media);
479
                        }
480
                    //identifier=http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
481
                   //references=http://ww2.bgbm.org/herbarium/view_biocase.cfm?SpecimenPK=136628
482
                    //format=image/jpeg
483
                    //type=StillImage
484

    
485
                }
486

    
487
                // create dataset URL
488
                URI uri = null;
489
                try {
490
                    uri = UriUtils.createUri(new URL(GbifQueryServiceWrapper.BASE_URL), "/v1/dataset/"+dataSetKey+"/endpoint", null, null);
491
                } catch (MalformedURLException e) {
492
                    logger.error("Endpoint URI could not be created!", e);
493
                } catch (URISyntaxException e) {
494
                    logger.error("Endpoint URI could not be created!", e);
495
                }
496
                results.add(new GbifResponse(derivedUnitFacade, uri, dataSetProtocol, tripleId, name));
497
            }
498
        }
499
        return results;
500
    }
501

    
502
    public static DataSetResponse parseOriginalDataSetUri(InputStream inputStream) throws IOException {
503
        StringWriter stringWriter = new StringWriter();
504
        IOUtils.copy(inputStream, stringWriter);
505
        return parseOriginalDataSetUri(stringWriter.toString());
506
    }
507

    
508
    public static DataSetResponse parseOriginalDataSetUri(String jsonString) {
509
        DataSetResponse response = new DataSetResponse();
510
        JSONArray jsonArray = JSONArray.fromObject(jsonString);
511
        Object next = jsonArray.iterator().next();
512
        if(next instanceof JSONObject){
513
            JSONObject jsonObject = (JSONObject)next;
514
            if(jsonObject.has(URL)){
515
                response.setEndpoint(URI.create(jsonObject.getString(URL)));
516
            }
517
            if(jsonObject.has(TYPE)){
518
                response.setProtocol(GbifDataSetProtocol.parseProtocol(jsonObject.getString(TYPE)));
519
            }
520
        }
521
        return response;
522
    }
523

    
524
}
(3-3/6)