Project

General

Profile

Download (23.9 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2014 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.ext.occurrence.gbif;
10

    
11
import java.io.IOException;
12
import java.io.InputStream;
13
import java.io.StringWriter;
14
import java.net.MalformedURLException;
15
import java.net.URI;
16
import java.net.URISyntaxException;
17
import java.net.URL;
18
import java.text.ParseException;
19
import java.util.ArrayList;
20
import java.util.Collection;
21

    
22
import org.apache.commons.io.IOUtils;
23
import org.apache.http.HttpException;
24
import org.apache.log4j.Logger;
25

    
26
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
27
import eu.etaxonomy.cdm.common.UriUtils;
28
import eu.etaxonomy.cdm.common.media.ImageInfo;
29
import eu.etaxonomy.cdm.model.agent.Institution;
30
import eu.etaxonomy.cdm.model.agent.Person;
31
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
32
import eu.etaxonomy.cdm.model.common.TimePeriod;
33
import eu.etaxonomy.cdm.model.location.Country;
34
import eu.etaxonomy.cdm.model.location.Point;
35
import eu.etaxonomy.cdm.model.location.ReferenceSystem;
36
import eu.etaxonomy.cdm.model.media.ImageFile;
37
import eu.etaxonomy.cdm.model.media.Media;
38
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
39
import eu.etaxonomy.cdm.model.name.BotanicalName;
40
import eu.etaxonomy.cdm.model.name.CultivarPlantName;
41
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
42
import eu.etaxonomy.cdm.model.name.Rank;
43
import eu.etaxonomy.cdm.model.name.TaxonNameBase;
44
import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
45
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
46
import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
47
import net.sf.json.JSONArray;
48
import net.sf.json.JSONObject;
49

    
50
/**
51
 * Utility class which provides the functionality to convert a JSON response
52
 * resulting from a GBIF query for occurrences to the corresponding CDM entities.
53
 * @author pplitzner
54
 * @date 22.05.2014
55
 *
56
 */
57
public class GbifJsonOccurrenceParser {
58

    
59
    private static final Logger logger = Logger.getLogger(GbifJsonOccurrenceParser.class);
60

    
61
    private static final String DATASET_KEY = "datasetKey";
62
    private static final String DATASET_PROTOCOL = "protocol";
63

    
64
    private static final String KEY = "key";
65
    private static final String URL = "url";
66
    private static final String TYPE = "type";
67

    
68
    private static final String COUNTRY_CODE = "countryCode";
69
    private static final String LOCALITY = "locality";
70
    private static final String LONGITUDE = "decimalLongitude";
71
    private static final String LATITUDE = "decimalLatitude";
72
    private static final String GEOREFERENCE_PROTOCOL = "georeferenceProtocol";//reference system
73
    private static final String VERBATIM_ELEVATION = "verbatimElevation";
74
    private static final String YEAR = "year";
75
    private static final String MONTH = "month";
76
    private static final String DAY = "day";
77
    private static final String EVENT_DATE= "eventDate";
78
    private static final String RECORDED_BY= "recordedBy";//collector
79
    private static final String RECORD_NUMBER = "recordNumber";//collector number
80
    private static final String FIELD_NUMBER = "fieldNumber";//collector number
81
    private static final String EVENT_REMARKS = "eventRemarks";//gathering event description
82
    private static final String OCCURRENCE_REMARKS = "occurrenceRemarks";//ecology
83
    private static final String COLLECTION_CODE = "collectionCode";
84
    private static final String CATALOG_NUMBER = "catalogNumber";//accession number
85
    private static final String INSTITUTION_CODE = "institutionCode";
86

    
87

    
88
    protected static final String PUBLISHING_ORG_KEY = "publishingOrgKey";
89
    protected static final String PUBLISHING_COUNTRY = "publishingCountry";
90

    
91
    protected static final String EXTENSIONS = "extensions";
92
    protected static final String BASIS_OF_RECORD = "basisOfRecord";
93
    protected static final String INDIVIDUAL_COUNT = "individualCount";
94
    protected static final String TAXONKEY = "taxonKey";
95
    protected static final String KINGDOM_KEY = "kingdomKey";
96
    protected static final String PHYLUM_KEY = "phylumKey";
97
    protected static final String CLASS_KEY = "classKey";
98
    protected static final String ORDER_KEY = "orderKey";
99
    protected static final String FAMILY_KEY = "familyKey";
100
    protected static final String GENUS_KEY = "genusKey";
101
    protected static final String SPECIES_KEY = "speciesKey";
102
    protected static final String SCIENTIFIC_NAME = "scientificName";
103
    protected static final String KINGDOM =  "kingdom";
104
    protected static final String PHYLUM = "phylum";
105
    protected static final String ORDER = "order";
106
    protected static final String FAMILY  = "family";
107
    protected static final String GENUS = "genus";
108
    protected static final String SPECIES = "species";
109
    protected static final String GENERIC_NAME = "genericName";
110
    protected static final String SPECIFIC_EPITHET = "specificEpithet";
111
    protected static final String INFRASPECIFIC_EPITHET = "infraspecificEpithet";
112
    protected static final String TAXON_RANK = "taxonRank";
113
    protected static final String DATE_IDENTIFIED = "dateIdentified";
114
    protected static final String SCIENTIFIC_NAME_AUTHORSHIP = "scientificNameAuthorship";
115

    
116
    protected static final String ELEVATION = "elevation";
117
    protected static final String CONITNENT = "continent";
118
    protected static final String STATE_PROVINCE = "stateProvince";
119

    
120

    
121

    
122

    
123
    protected static final String ISSUES = "issues";
124
    protected static final String LAST_INTERPRETED = "lastInterpreted";
125
    protected static final String IDENTIFIERS = "identifiers";
126
    protected static final String FACTS = "facts";
127
    protected static final String RELATIONS = "relations";
128
    protected static final String GEODETICDATUM = "geodeticDatum";
129
    protected static final String CLASS = "class";
130

    
131
    protected static final String COUNTRY = "country";
132
    protected static final String NOMENCLATURAL_STATUS = "nomenclaturalStatus";
133
    protected static final String RIGHTSHOLDER = "rightsHolder";
134
    protected static final String IDEMTIFIER = "identifier";
135

    
136
    protected static final String NOMENCLATURALCODE = "nomenclaturalCode";
137
    protected static final String COUNTY = "county";
138

    
139
    protected static final String DATASET_NAME = "datasetName";
140
    protected static final String GBIF_ID = "gbifID";
141

    
142
    protected static final String OCCURENCE_ID = "occurrenceID";
143

    
144
    protected static final String TAXON_ID = "taxonID";
145
    protected static final String LICENCE = "license";
146

    
147
    protected static final String OWNER_INSTITUTION_CODE = "ownerInstitutionCode";
148
    protected static final String BIBLIOGRAPHIC_CITATION = "bibliographicCitation";
149
    protected static final String IDENTIFIED_BY = "identifiedBy";
150
    protected static final String COLLECTION_ID = "collectionID";
151

    
152
    private static final String PLANTAE = "Plantae";
153

    
154
    private static final String ANIMALIA = "Animalia";
155

    
156
    private static final String FUNGI = "Fungi";
157

    
158
    private static final String BACTERIA = "Bacteria";
159

    
160
    private static final String MULTIMEDIA = "media";
161

    
162

    
163

    
164

    
165

    
166

    
167
    /**
168
     * Parses the given {@link String} for occurrences.<br>
169
     * Note: The data structure of the GBIF response should not be changed.
170
     * @param jsonString JSON data as a String
171
     * @return the found occurrences as a collection of {@link GbifResponse}
172
     */
173
    public static Collection<GbifResponse> parseJsonRecords(String jsonString) {
174
        return parseJsonRecords(JSONObject.fromObject(jsonString));
175
    }
176

    
177
    /**
178
     * Parses the given {@link InputStream} for occurrences.
179
     * @param jsonString JSON data as an InputStream
180
     * @return the found occurrences as a collection of {@link GbifResponse}
181
     */
182
    public static Collection<GbifResponse> parseJsonRecords(InputStream inputStream) throws IOException{
183
        StringWriter stringWriter = new StringWriter();
184
        IOUtils.copy(inputStream, stringWriter);
185
        return parseJsonRecords(stringWriter.toString());
186
    }
187

    
188
    /**
189
     * Parses the given {@link JSONObject} for occurrences.<br>
190
     * Note: The data structure of the GBIF response should not be changed.
191
     * @param jsonString JSON data as an JSONObject
192
     * @return the found occurrences as a collection of {@link GbifResponse}
193
     */
194
    public static Collection<GbifResponse> parseJsonRecords(JSONObject jsonObject){
195
        return parseJsonRecords(jsonObject.getJSONArray("results"));
196
    }
197

    
198
    /**
199
     * Parses the given {@link JSONArray} for occurrences.
200
     * @param jsonString JSON data as an {@link JSONArray}
201
     * @return the found occurrences as a collection of {@link GbifResponse}
202
     */
203
    private static Collection<GbifResponse> parseJsonRecords(JSONArray jsonArray) {
204
        Collection<GbifResponse> results = new ArrayList<GbifResponse>();
205
        String[] tripleId = new String[3];
206
        String string;
207
        for(Object o:jsonArray){
208
            //parse every record
209
            tripleId = new String[3];
210
            if(o instanceof JSONObject){
211
                String dataSetKey = null;
212
                GbifDataSetProtocol dataSetProtocol = null;
213
                DerivedUnitFacade derivedUnitFacade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen);
214
                TaxonNameBase name = null;
215
                JSONObject record = (JSONObject)o;
216

    
217
                if(record.has(DATASET_PROTOCOL)){
218
                    dataSetProtocol = GbifDataSetProtocol.parseProtocol(record.getString(DATASET_PROTOCOL));
219
                }
220
                if(record.has(DATASET_KEY)){
221
                    dataSetKey = record.getString(DATASET_KEY);
222
                }
223
                if(record.has(COUNTRY_CODE)){
224
                    string = record.getString(COUNTRY_CODE);
225
                    Country country = Country.getCountryByIso3166A2(string);
226
                    if(country!=null){
227
                        derivedUnitFacade.setCountry(country);
228
                    }
229
                }
230
                if(record.has(LOCALITY)){
231
                    string = record.getString(LOCALITY);
232
                    derivedUnitFacade.setLocality(string);
233
                }
234

    
235
                if (record.has("species")){
236
                    Rank rank = null;
237

    
238
                    if (record.has(TAXON_RANK)){
239
                        string= record.getString(TAXON_RANK);
240
                        try {
241
                            rank = Rank.getRankByName(string);
242
                        } catch (UnknownCdmTypeException e) {
243
                            // TODO Auto-generated catch block
244
                            e.printStackTrace();
245
                        }
246
                    }
247
                    if (rank != null){
248
                        if (record.has(NOMENCLATURALCODE)){
249
                            string = record.getString(NOMENCLATURALCODE);
250

    
251
                            if (string.equals(NomenclaturalCode.ICZN.getTitleCache())){
252
                                name = TaxonNameBase.NewZoologicalInstance(rank);
253
                            } else if (string.equals(NomenclaturalCode.ICNAFP.getTitleCache())) {
254
                                name = TaxonNameBase.NewBotanicalInstance(rank);
255
                            } else if (string.equals(NomenclaturalCode.ICNB.getTitleCache())){
256
                                name = TaxonNameBase.NewBacterialInstance(rank);
257
                            } else if (string.equals(NomenclaturalCode.ICNCP.getTitleCache())){
258
                                name = CultivarPlantName.NewInstance(rank);
259
                            } else if (string.equals(NomenclaturalCode.ICVCN.getTitleCache())){
260
                                name = TaxonNameBase.NewViralInstance(rank);
261
                            } else {
262
                            }
263
                        }else {
264
                            if (record.has(KINGDOM)){
265
                                if (record.getString(KINGDOM).equals(PLANTAE)){
266
                                    name = TaxonNameBase.NewBotanicalInstance(rank);
267
                                } else if (record.getString(KINGDOM).equals(ANIMALIA)){
268
                                    name = TaxonNameBase.NewZoologicalInstance(rank);
269
                                } else if (record.getString(KINGDOM).equals(FUNGI)){
270
                                    name = TaxonNameBase.NewBotanicalInstance(rank);
271
                                } else if (record.getString(KINGDOM).equals(BACTERIA)){
272
                                    name = TaxonNameBase.NewBacterialInstance(rank);
273
                                } else{
274
                                    name = TaxonNameBase.NewNonViralInstance(rank);
275
                                }
276
                            } else{
277
                                name = TaxonNameBase.NewNonViralInstance(rank);
278
                            }
279
                        }
280
                        if (record.has(GENUS)){
281
                            name.setGenusOrUninomial(record.getString(GENUS));
282
                        }
283
                        if (record.has(SPECIFIC_EPITHET)){
284
                            name.setSpecificEpithet(record.getString(SPECIFIC_EPITHET));
285
                        }
286
                        if (record.has(INFRASPECIFIC_EPITHET)){
287
                            name.setInfraSpecificEpithet(record.getString(INFRASPECIFIC_EPITHET));
288
                        }
289
                        if (record.has(SCIENTIFIC_NAME)){
290
                            name.setTitleCache(record.getString(SCIENTIFIC_NAME), true);
291
                        }
292

    
293
                    }
294
                    DeterminationEvent detEvent = DeterminationEvent.NewInstance();
295

    
296
                    if (record.has(IDENTIFIED_BY)){
297
                        Person determiner = Person.NewTitledInstance(record.getString(IDENTIFIED_BY));
298
                        detEvent.setDeterminer(determiner);
299

    
300
                    }
301
                    detEvent.setTaxonName(name);
302
                    detEvent.setPreferredFlag(true);
303
                    derivedUnitFacade.addDetermination(detEvent);
304

    
305
                }
306

    
307

    
308

    
309
                // GPS location
310
                Point location = Point.NewInstance();
311
                derivedUnitFacade.setExactLocation(location);
312
                try {
313
                    if(record.has(LATITUDE)){
314
                        String lat = record.getString(LATITUDE);
315
                        location.setLatitudeByParsing(lat);
316
                    }
317
                    if(record.has(LONGITUDE)){
318
                        String lon = record.getString(LONGITUDE);
319
                        location.setLongitudeByParsing(lon);
320
                    }
321
                } catch (ParseException e) {
322
                    logger.error("Could not parse GPS coordinates", e);
323
                }
324
                if(record.has(GEOREFERENCE_PROTOCOL)){
325
                    String geo = record.getString(GEOREFERENCE_PROTOCOL);
326
                    ReferenceSystem referenceSystem = null;
327
                    //TODO: Is there another way than string comparison
328
                    //to check which reference system is used?
329
                    if(ReferenceSystem.WGS84().getLabel().contains(geo)){
330
                        referenceSystem = ReferenceSystem.WGS84();
331
                    }
332
                    else if(ReferenceSystem.GOOGLE_EARTH().getLabel().contains(geo)){
333
                        referenceSystem = ReferenceSystem.GOOGLE_EARTH();
334
                    }
335
                    else if(ReferenceSystem.GAZETTEER().getLabel().contains(geo)){
336
                        referenceSystem = ReferenceSystem.GAZETTEER();
337
                    }
338
                    location.setReferenceSystem(referenceSystem);
339
                }
340

    
341
                if(record.has(ELEVATION)){
342
                    try {
343
                        //parse integer and strip of unit
344
                        string = record.getString(ELEVATION);
345
                        int length = string.length();
346
                        StringBuilder builder = new StringBuilder();
347
                        for(int i=0;i<length;i++){
348
                            if(Character.isDigit(string.charAt(i))){
349
                                builder.append(string.charAt(i));
350
                            }
351
                            else{
352
                                break;
353
                            }
354
                        }
355
                        derivedUnitFacade.setAbsoluteElevation(Integer.parseInt(builder.toString()));
356
                    } catch (NumberFormatException e) {
357
                        logger.warn("Could not parse elevation", e);
358
                    }
359
                }
360

    
361
                //Date (Gathering Period)
362
                TimePeriod timePeriod = TimePeriod.NewInstance();
363
                derivedUnitFacade.setGatheringPeriod(timePeriod);
364
                //TODO what happens with eventDate??
365
                if(record.has(YEAR)){
366
                    timePeriod.setStartYear(record.getInt(YEAR));
367
                }
368
                if(record.has(MONTH)){
369
                    timePeriod.setStartMonth(record.getInt(MONTH));
370
                }
371
                if(record.has(DAY)){
372
                    timePeriod.setStartDay(record.getInt(DAY));
373
                }
374
                if(record.has(RECORDED_BY)){
375
                    Person person = Person.NewTitledInstance(record.getString(RECORDED_BY));
376
                    //FIXME check data base if collector already present
377
                    derivedUnitFacade.setCollector(person);
378
                }
379

    
380
                //collector number (fieldNumber OR recordNumber)
381
                if(record.has(FIELD_NUMBER)){
382
                    derivedUnitFacade.setFieldNumber(record.getString(FIELD_NUMBER));
383
                }
384
                //collector number (fieldNumber OR recordNumber)
385
                if(record.has(RECORD_NUMBER)){
386
                    derivedUnitFacade.setFieldNumber(record.getString(RECORD_NUMBER));
387
                }
388

    
389
                if(record.has(EVENT_REMARKS)){
390
                    derivedUnitFacade.setGatheringEventDescription(record.getString(EVENT_REMARKS));
391
                }
392
                if(record.has(OCCURRENCE_REMARKS)){
393
                    derivedUnitFacade.setEcology(record.getString(OCCURRENCE_REMARKS));
394
                }
395
                if(record.has(COLLECTION_CODE)){
396
                    String collectionCode = record.getString(COLLECTION_CODE);
397
                    tripleId[2] = collectionCode;
398
                    //FIXME: check data base for existing collections
399
                    eu.etaxonomy.cdm.model.occurrence.Collection collection = eu.etaxonomy.cdm.model.occurrence.Collection.NewInstance();
400
                    collection.setCode(collectionCode);
401
                    if(record.has(INSTITUTION_CODE)){
402
                        Institution institution = Institution.NewNamedInstance(record.getString(INSTITUTION_CODE));
403
                        institution.setCode(record.getString(INSTITUTION_CODE));
404
                        collection.setInstitute(institution);
405
                    }
406
                    derivedUnitFacade.setCollection(collection);
407
                }
408
                if(record.has(CATALOG_NUMBER)){
409
                    derivedUnitFacade.setCatalogNumber(record.getString(CATALOG_NUMBER));
410
                    derivedUnitFacade.setAccessionNumber(record.getString(CATALOG_NUMBER));
411
                    tripleId[0]= record.getString(CATALOG_NUMBER);
412
                }
413
                if(record.has(INSTITUTION_CODE)){
414
                    derivedUnitFacade.setAccessionNumber(record.getString(INSTITUTION_CODE));
415
                    tripleId[1]= record.getString(INSTITUTION_CODE);
416
                }
417

    
418
                if (record.has(OCCURENCE_ID)){
419
                    IdentifiableSource source = IdentifiableSource.NewDataImportInstance((record.getString(OCCURENCE_ID)));
420
                    derivedUnitFacade.addSource(source);
421
                }
422

    
423
                if (record.has(MULTIMEDIA)){
424
                    //http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
425
                    JSONArray multimediaArray = record.getJSONArray(MULTIMEDIA);
426
                    JSONObject mediaRecord;
427
                    Media media;
428
                    URI uri = null;
429
                    ImageInfo imageInf = null;
430
                    MediaRepresentation representation = null;
431
                    SpecimenOrObservationType type = null;
432
                    for(Object object:multimediaArray){
433
                        //parse every record
434
                       media = Media.NewInstance();
435
                       uri = null;
436
                       imageInf = null;
437

    
438
                        if(object instanceof JSONObject){
439
                            mediaRecord = (JSONObject) object;
440

    
441
                            if (mediaRecord.has("identifier")){
442
                                try {
443
                                    uri = new URI(mediaRecord.getString("identifier"));
444
                                    imageInf = ImageInfo.NewInstance(uri, 0);
445

    
446
                                } catch (URISyntaxException |IOException | HttpException e) {
447
                                    // TODO Auto-generated catch block
448
                                    e.printStackTrace();
449
                                }
450
                               // media.addIdentifier(mediaRecord.getString("identifier"), null);
451
                            }
452
                            if (mediaRecord.has("references")){
453

    
454

    
455
                            }
456
                            if (mediaRecord.has("format")){
457

    
458
                            }
459
                            if (mediaRecord.has("type")){
460
                                if (mediaRecord.get("type").equals("StillImage")){
461
                                    type = SpecimenOrObservationType.StillImage;
462
                                }
463
                            }
464

    
465
                            }
466
                            ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInf);
467
                            representation = MediaRepresentation.NewInstance();
468

    
469
                            representation.addRepresentationPart(imageFile);
470
                            media.addRepresentation(representation);
471

    
472
                            derivedUnitFacade.addDerivedUnitMedia(media);
473
                        }
474
                    //identifier=http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
475
                   //references=http://ww2.bgbm.org/herbarium/view_biocase.cfm?SpecimenPK=136628
476
                    //format=image/jpeg
477
                    //type=StillImage
478

    
479
                }
480

    
481
                // create dataset URL
482
                URI uri = null;
483
                try {
484
                    uri = UriUtils.createUri(new URL(GbifQueryServiceWrapper.BASE_URL), "/v1/dataset/"+dataSetKey+"/endpoint", null, null);
485
                } catch (MalformedURLException e) {
486
                    logger.error("Endpoint URI could not be created!", e);
487
                } catch (URISyntaxException e) {
488
                    logger.error("Endpoint URI could not be created!", e);
489
                }
490
                results.add(new GbifResponse(derivedUnitFacade, uri, dataSetProtocol, tripleId, name));
491
            }
492
        }
493
        return results;
494
    }
495

    
496
    public static DataSetResponse parseOriginalDataSetUri(InputStream inputStream) throws IOException {
497
        StringWriter stringWriter = new StringWriter();
498
        IOUtils.copy(inputStream, stringWriter);
499
        return parseOriginalDataSetUri(stringWriter.toString());
500
    }
501

    
502
    public static DataSetResponse parseOriginalDataSetUri(String jsonString) {
503
        DataSetResponse response = new DataSetResponse();
504
        JSONArray jsonArray = JSONArray.fromObject(jsonString);
505
        Object next = jsonArray.iterator().next();
506
        if(next instanceof JSONObject){
507
            JSONObject jsonObject = (JSONObject)next;
508
            if(jsonObject.has(URL)){
509
                response.setEndpoint(URI.create(jsonObject.getString(URL)));
510
            }
511
            if(jsonObject.has(TYPE)){
512
                response.setProtocol(GbifDataSetProtocol.parseProtocol(jsonObject.getString(TYPE)));
513
            }
514
        }
515
        return response;
516
    }
517

    
518
}
(3-3/6)