2 * Copyright (C) 2014 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.ext
.occurrence
.gbif
;
11 import java
.io
.IOException
;
12 import java
.io
.InputStream
;
13 import java
.io
.StringWriter
;
14 import java
.net
.MalformedURLException
;
16 import java
.net
.URISyntaxException
;
18 import java
.text
.ParseException
;
19 import java
.util
.ArrayList
;
20 import java
.util
.Collection
;
22 import org
.apache
.commons
.io
.IOUtils
;
23 import org
.apache
.http
.HttpException
;
24 import org
.apache
.log4j
.Logger
;
26 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
27 import eu
.etaxonomy
.cdm
.common
.UriUtils
;
28 import eu
.etaxonomy
.cdm
.common
.media
.CdmImageInfo
;
29 import eu
.etaxonomy
.cdm
.model
.agent
.Institution
;
30 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
31 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
32 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
33 import eu
.etaxonomy
.cdm
.model
.location
.Country
;
34 import eu
.etaxonomy
.cdm
.model
.location
.Point
;
35 import eu
.etaxonomy
.cdm
.model
.location
.ReferenceSystem
;
36 import eu
.etaxonomy
.cdm
.model
.media
.ImageFile
;
37 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
38 import eu
.etaxonomy
.cdm
.model
.media
.MediaRepresentation
;
39 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
40 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
41 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
42 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameFactory
;
43 import eu
.etaxonomy
.cdm
.model
.occurrence
.DeterminationEvent
;
44 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
45 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
46 import net
.sf
.json
.JSONArray
;
47 import net
.sf
.json
.JSONObject
;
50 * Utility class which provides the functionality to convert a JSON response
51 * resulting from a GBIF query for occurrences to the corresponding CDM entities.
56 public class GbifJsonOccurrenceParser
{
58 private static final Logger logger
= Logger
.getLogger(GbifJsonOccurrenceParser
.class);
60 private static final String DATASET_KEY
= "datasetKey";
61 private static final String DATASET_PROTOCOL
= "protocol";
63 private static final String KEY
= "key";
64 private static final String URL
= "url";
65 private static final String TYPE
= "type";
67 private static final String COUNTRY_CODE
= "countryCode";
68 private static final String LOCALITY
= "locality";
69 private static final String LONGITUDE
= "decimalLongitude";
70 private static final String LATITUDE
= "decimalLatitude";
71 private static final String GEOREFERENCE_PROTOCOL
= "georeferenceProtocol";//reference system
72 private static final String VERBATIM_ELEVATION
= "verbatimElevation";
73 private static final String YEAR
= "year";
74 private static final String MONTH
= "month";
75 private static final String DAY
= "day";
76 private static final String EVENT_DATE
= "eventDate";
77 private static final String RECORDED_BY
= "recordedBy";//collector
78 private static final String RECORD_NUMBER
= "recordNumber";//collector number
79 private static final String FIELD_NUMBER
= "fieldNumber";//collector number
80 private static final String EVENT_REMARKS
= "eventRemarks";//gathering event description
81 private static final String OCCURRENCE_REMARKS
= "occurrenceRemarks";//ecology
82 private static final String COLLECTION_CODE
= "collectionCode";
83 private static final String CATALOG_NUMBER
= "catalogNumber";//accession number
84 private static final String INSTITUTION_CODE
= "institutionCode";
87 protected static final String PUBLISHING_ORG_KEY
= "publishingOrgKey";
88 protected static final String PUBLISHING_COUNTRY
= "publishingCountry";
90 protected static final String EXTENSIONS
= "extensions";
91 protected static final String BASIS_OF_RECORD
= "basisOfRecord";
92 protected static final String INDIVIDUAL_COUNT
= "individualCount";
93 protected static final String TAXONKEY
= "taxonKey";
94 protected static final String KINGDOM_KEY
= "kingdomKey";
95 protected static final String PHYLUM_KEY
= "phylumKey";
96 protected static final String CLASS_KEY
= "classKey";
97 protected static final String ORDER_KEY
= "orderKey";
98 protected static final String FAMILY_KEY
= "familyKey";
99 protected static final String GENUS_KEY
= "genusKey";
100 protected static final String SPECIES_KEY
= "speciesKey";
101 protected static final String SCIENTIFIC_NAME
= "scientificName";
102 protected static final String KINGDOM
= "kingdom";
103 protected static final String PHYLUM
= "phylum";
104 protected static final String ORDER
= "order";
105 protected static final String FAMILY
= "family";
106 protected static final String GENUS
= "genus";
107 protected static final String SPECIES
= "species";
108 protected static final String GENERIC_NAME
= "genericName";
109 protected static final String SPECIFIC_EPITHET
= "specificEpithet";
110 protected static final String INFRASPECIFIC_EPITHET
= "infraspecificEpithet";
111 protected static final String TAXON_RANK
= "taxonRank";
112 protected static final String DATE_IDENTIFIED
= "dateIdentified";
113 protected static final String SCIENTIFIC_NAME_AUTHORSHIP
= "scientificNameAuthorship";
115 protected static final String ELEVATION
= "elevation";
116 protected static final String CONITNENT
= "continent";
117 protected static final String STATE_PROVINCE
= "stateProvince";
122 protected static final String ISSUES
= "issues";
123 protected static final String LAST_INTERPRETED
= "lastInterpreted";
124 protected static final String IDENTIFIERS
= "identifiers";
125 protected static final String FACTS
= "facts";
126 protected static final String RELATIONS
= "relations";
127 protected static final String GEODETICDATUM
= "geodeticDatum";
128 protected static final String CLASS
= "class";
130 protected static final String COUNTRY
= "country";
131 protected static final String NOMENCLATURAL_STATUS
= "nomenclaturalStatus";
132 protected static final String RIGHTSHOLDER
= "rightsHolder";
133 protected static final String IDEMTIFIER
= "identifier";
135 protected static final String NOMENCLATURALCODE
= "nomenclaturalCode";
136 protected static final String COUNTY
= "county";
138 protected static final String DATASET_NAME
= "datasetName";
139 protected static final String GBIF_ID
= "gbifID";
141 protected static final String OCCURENCE_ID
= "occurrenceID";
143 protected static final String TAXON_ID
= "taxonID";
144 protected static final String LICENCE
= "license";
146 protected static final String OWNER_INSTITUTION_CODE
= "ownerInstitutionCode";
147 protected static final String BIBLIOGRAPHIC_CITATION
= "bibliographicCitation";
148 protected static final String IDENTIFIED_BY
= "identifiedBy";
149 protected static final String COLLECTION_ID
= "collectionID";
151 private static final String PLANTAE
= "Plantae";
153 private static final String ANIMALIA
= "Animalia";
155 private static final String FUNGI
= "Fungi";
157 private static final String BACTERIA
= "Bacteria";
159 private static final String MULTIMEDIA
= "media";
167 * Parses the given {@link String} for occurrences.<br>
168 * Note: The data structure of the GBIF response should not be changed.
169 * @param jsonString JSON data as a String
170 * @return the found occurrences as a collection of {@link GbifResponse}
172 public static Collection
<GbifResponse
> parseJsonRecords(String jsonString
) {
173 return parseJsonRecords(JSONObject
.fromObject(jsonString
));
177 * Parses the given {@link InputStream} for occurrences.
178 * @param jsonString JSON data as an InputStream
179 * @return the found occurrences as a collection of {@link GbifResponse}
181 public static Collection
<GbifResponse
> parseJsonRecords(InputStream inputStream
) throws IOException
{
182 StringWriter stringWriter
= new StringWriter();
183 IOUtils
.copy(inputStream
, stringWriter
);
184 return parseJsonRecords(stringWriter
.toString());
188 * Parses the given {@link JSONObject} for occurrences.<br>
189 * Note: The data structure of the GBIF response should not be changed.
190 * @param jsonString JSON data as an JSONObject
191 * @return the found occurrences as a collection of {@link GbifResponse}
193 public static Collection
<GbifResponse
> parseJsonRecords(JSONObject jsonObject
){
194 return parseJsonRecords(jsonObject
.getJSONArray("results"));
198 * Parses the given {@link JSONArray} for occurrences.
199 * @param jsonString JSON data as an {@link JSONArray}
200 * @return the found occurrences as a collection of {@link GbifResponse}
202 private static Collection
<GbifResponse
> parseJsonRecords(JSONArray jsonArray
) {
203 Collection
<GbifResponse
> results
= new ArrayList
<GbifResponse
>();
204 String
[] tripleId
= new String
[3];
206 for(Object o
:jsonArray
){
208 tripleId
= new String
[3];
209 if(o
instanceof JSONObject
){
210 String dataSetKey
= null;
211 GbifDataSetProtocol dataSetProtocol
= null;
212 DerivedUnitFacade derivedUnitFacade
= DerivedUnitFacade
.NewInstance(SpecimenOrObservationType
.PreservedSpecimen
);
213 TaxonName name
= null;
214 JSONObject record
= (JSONObject
)o
;
216 if(record
.has(DATASET_PROTOCOL
)){
217 dataSetProtocol
= GbifDataSetProtocol
.parseProtocol(record
.getString(DATASET_PROTOCOL
));
219 if(record
.has(DATASET_KEY
)){
220 dataSetKey
= record
.getString(DATASET_KEY
);
222 if(record
.has(COUNTRY_CODE
)){
223 string
= record
.getString(COUNTRY_CODE
);
224 Country country
= Country
.getCountryByIso3166A2(string
);
226 derivedUnitFacade
.setCountry(country
);
229 if(record
.has(LOCALITY
)){
230 string
= record
.getString(LOCALITY
);
231 derivedUnitFacade
.setLocality(string
);
234 if (record
.has("species")){
237 if (record
.has(TAXON_RANK
)){
238 string
= record
.getString(TAXON_RANK
);
240 rank
= Rank
.getRankByName(string
);
241 } catch (UnknownCdmTypeException e
) {
242 // TODO Auto-generated catch block
247 if (record
.has(NOMENCLATURALCODE
)){
248 string
= record
.getString(NOMENCLATURALCODE
);
250 if (string
.equals(NomenclaturalCode
.ICZN
.getTitleCache())){
251 name
= TaxonNameFactory
.NewZoologicalInstance(rank
);
252 } else if (string
.equals(NomenclaturalCode
.ICNAFP
.getTitleCache())) {
253 name
= TaxonNameFactory
.NewBotanicalInstance(rank
);
254 } else if (string
.equals(NomenclaturalCode
.ICNB
.getTitleCache())){
255 name
= TaxonNameFactory
.NewBacterialInstance(rank
);
256 } else if (string
.equals(NomenclaturalCode
.ICNCP
.getTitleCache())){
257 name
= TaxonNameFactory
.NewCultivarInstance(rank
);
258 } else if (string
.equals(NomenclaturalCode
.ICVCN
.getTitleCache())){
259 name
= TaxonNameFactory
.NewViralInstance(rank
);
260 } else if (string
.equals("ICN")){
261 name
= TaxonNameFactory
.NewBotanicalInstance(rank
);
264 if (record
.has(KINGDOM
)){
265 if (record
.getString(KINGDOM
).equals(PLANTAE
)){
266 name
= TaxonNameFactory
.NewBotanicalInstance(rank
);
267 } else if (record
.getString(KINGDOM
).equals(ANIMALIA
)){
268 name
= TaxonNameFactory
.NewZoologicalInstance(rank
);
269 } else if (record
.getString(KINGDOM
).equals(FUNGI
)){
270 name
= TaxonNameFactory
.NewBotanicalInstance(rank
);
271 } else if (record
.getString(KINGDOM
).equals(BACTERIA
)){
272 name
= TaxonNameFactory
.NewBacterialInstance(rank
);
274 name
= TaxonNameFactory
.NewNonViralInstance(rank
);
277 name
= TaxonNameFactory
.NewNonViralInstance(rank
);
281 name
= TaxonNameFactory
.NewNonViralInstance(rank
);
283 if (record
.has(GENUS
)){
284 name
.setGenusOrUninomial(record
.getString(GENUS
));
286 if (record
.has(SPECIFIC_EPITHET
)){
287 name
.setSpecificEpithet(record
.getString(SPECIFIC_EPITHET
));
289 if (record
.has(INFRASPECIFIC_EPITHET
)){
290 name
.setInfraSpecificEpithet(record
.getString(INFRASPECIFIC_EPITHET
));
292 if (record
.has(SCIENTIFIC_NAME
)){
293 name
.setTitleCache(record
.getString(SCIENTIFIC_NAME
), true);
297 DeterminationEvent detEvent
= DeterminationEvent
.NewInstance();
299 if (record
.has(IDENTIFIED_BY
)){
300 Person determiner
= Person
.NewTitledInstance(record
.getString(IDENTIFIED_BY
));
301 detEvent
.setDeterminer(determiner
);
304 detEvent
.setTaxonName(name
);
305 detEvent
.setPreferredFlag(true);
306 derivedUnitFacade
.addDetermination(detEvent
);
313 Point location
= Point
.NewInstance();
314 derivedUnitFacade
.setExactLocation(location
);
316 if(record
.has(LATITUDE
)){
317 String lat
= record
.getString(LATITUDE
);
318 location
.setLatitudeByParsing(lat
);
320 if(record
.has(LONGITUDE
)){
321 String lon
= record
.getString(LONGITUDE
);
322 location
.setLongitudeByParsing(lon
);
324 } catch (ParseException e
) {
325 logger
.error("Could not parse GPS coordinates", e
);
327 if(record
.has(GEOREFERENCE_PROTOCOL
)){
328 String geo
= record
.getString(GEOREFERENCE_PROTOCOL
);
329 ReferenceSystem referenceSystem
= null;
330 //TODO: Is there another way than string comparison
331 //to check which reference system is used?
332 if(ReferenceSystem
.WGS84().getLabel().contains(geo
)){
333 referenceSystem
= ReferenceSystem
.WGS84();
335 else if(ReferenceSystem
.GOOGLE_EARTH().getLabel().contains(geo
)){
336 referenceSystem
= ReferenceSystem
.GOOGLE_EARTH();
338 else if(ReferenceSystem
.GAZETTEER().getLabel().contains(geo
)){
339 referenceSystem
= ReferenceSystem
.GAZETTEER();
341 location
.setReferenceSystem(referenceSystem
);
344 if(record
.has(ELEVATION
)){
346 //parse integer and strip of unit
347 string
= record
.getString(ELEVATION
);
348 int length
= string
.length();
349 StringBuilder builder
= new StringBuilder();
350 for(int i
=0;i
<length
;i
++){
351 if(Character
.isDigit(string
.charAt(i
))){
352 builder
.append(string
.charAt(i
));
358 derivedUnitFacade
.setAbsoluteElevation(Integer
.parseInt(builder
.toString()));
359 } catch (NumberFormatException e
) {
360 logger
.warn("Could not parse elevation", e
);
364 //Date (Gathering Period)
365 TimePeriod timePeriod
= TimePeriod
.NewInstance();
366 derivedUnitFacade
.setGatheringPeriod(timePeriod
);
367 //TODO what happens with eventDate??
368 if(record
.has(YEAR
)){
369 timePeriod
.setStartYear(record
.getInt(YEAR
));
371 if(record
.has(MONTH
)){
372 timePeriod
.setStartMonth(record
.getInt(MONTH
));
375 timePeriod
.setStartDay(record
.getInt(DAY
));
377 if(record
.has(RECORDED_BY
)){
378 Person person
= Person
.NewTitledInstance(record
.getString(RECORDED_BY
));
379 //FIXME check data base if collector already present
380 derivedUnitFacade
.setCollector(person
);
383 //collector number (fieldNumber OR recordNumber)
384 if(record
.has(FIELD_NUMBER
)){
385 derivedUnitFacade
.setFieldNumber(record
.getString(FIELD_NUMBER
));
387 //collector number (fieldNumber OR recordNumber)
388 if(record
.has(RECORD_NUMBER
)){
389 derivedUnitFacade
.setFieldNumber(record
.getString(RECORD_NUMBER
));
392 if(record
.has(EVENT_REMARKS
)){
393 derivedUnitFacade
.setGatheringEventDescription(record
.getString(EVENT_REMARKS
));
395 if(record
.has(OCCURRENCE_REMARKS
)){
396 derivedUnitFacade
.setEcology(record
.getString(OCCURRENCE_REMARKS
));
398 if(record
.has(COLLECTION_CODE
)){
399 String collectionCode
= record
.getString(COLLECTION_CODE
);
400 tripleId
[2] = collectionCode
;
401 //FIXME: check data base for existing collections
402 eu
.etaxonomy
.cdm
.model
.occurrence
.Collection collection
= eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
.NewInstance();
403 collection
.setCode(collectionCode
);
404 if(record
.has(INSTITUTION_CODE
)){
405 Institution institution
= Institution
.NewNamedInstance(record
.getString(INSTITUTION_CODE
));
406 institution
.setCode(record
.getString(INSTITUTION_CODE
));
407 collection
.setInstitute(institution
);
409 derivedUnitFacade
.setCollection(collection
);
411 if(record
.has(CATALOG_NUMBER
)){
412 derivedUnitFacade
.setCatalogNumber(record
.getString(CATALOG_NUMBER
));
413 derivedUnitFacade
.setAccessionNumber(record
.getString(CATALOG_NUMBER
));
414 tripleId
[0]= record
.getString(CATALOG_NUMBER
);
416 if(record
.has(INSTITUTION_CODE
)){
417 derivedUnitFacade
.setAccessionNumber(record
.getString(INSTITUTION_CODE
));
418 tripleId
[1]= record
.getString(INSTITUTION_CODE
);
421 if (record
.has(OCCURENCE_ID
)){
422 IdentifiableSource source
= IdentifiableSource
.NewDataImportInstance((record
.getString(OCCURENCE_ID
)));
423 derivedUnitFacade
.addSource(source
);
426 if (record
.has(MULTIMEDIA
)){
427 //http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
428 JSONArray multimediaArray
= record
.getJSONArray(MULTIMEDIA
);
429 JSONObject mediaRecord
;
432 CdmImageInfo imageInf
= null;
433 MediaRepresentation representation
= null;
434 SpecimenOrObservationType type
= null;
435 for(Object object
:multimediaArray
){
437 media
= Media
.NewInstance();
441 if(object
instanceof JSONObject
){
442 mediaRecord
= (JSONObject
) object
;
444 if (mediaRecord
.has("identifier")){
446 uri
= new URI(mediaRecord
.getString("identifier"));
447 imageInf
= CdmImageInfo
.NewInstance(uri
, 0);
448 } catch (URISyntaxException
|IOException
| HttpException e
) {
451 // media.addIdentifier(mediaRecord.getString("identifier"), null);
453 if (mediaRecord
.has("references")){
457 if (mediaRecord
.has("format")){
460 if (mediaRecord
.has("type")){
461 if (mediaRecord
.get("type").equals("StillImage")){
462 type
= SpecimenOrObservationType
.StillImage
;
467 ImageFile imageFile
= ImageFile
.NewInstance(uri
, null, imageInf
);
468 representation
= MediaRepresentation
.NewInstance();
470 representation
.addRepresentationPart(imageFile
);
471 media
.addRepresentation(representation
);
473 derivedUnitFacade
.addDerivedUnitMedia(media
);
475 //identifier=http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
476 //references=http://ww2.bgbm.org/herbarium/view_biocase.cfm?SpecimenPK=136628
482 // create dataset URL
485 uri
= UriUtils
.createUri(new URL(GbifQueryServiceWrapper
.BASE_URL
), "/v1/dataset/"+dataSetKey
+"/endpoint", null, null);
486 } catch (MalformedURLException e
) {
487 logger
.error("Endpoint URI could not be created!", e
);
488 } catch (URISyntaxException e
) {
489 logger
.error("Endpoint URI could not be created!", e
);
491 results
.add(new GbifResponse(derivedUnitFacade
, uri
, dataSetProtocol
, tripleId
, name
));
497 public static DataSetResponse
parseOriginalDataSetUri(InputStream inputStream
) throws IOException
{
498 StringWriter stringWriter
= new StringWriter();
499 IOUtils
.copy(inputStream
, stringWriter
);
500 return parseOriginalDataSetUri(stringWriter
.toString());
503 public static DataSetResponse
parseOriginalDataSetUri(String jsonString
) {
504 DataSetResponse response
= new DataSetResponse();
505 JSONArray jsonArray
= JSONArray
.fromObject(jsonString
);
506 Object next
= jsonArray
.iterator().next();
507 if(next
instanceof JSONObject
){
508 JSONObject jsonObject
= (JSONObject
)next
;
509 if(jsonObject
.has(URL
)){
510 response
.setEndpoint(URI
.create(jsonObject
.getString(URL
)));
512 if(jsonObject
.has(TYPE
)){
513 response
.setProtocol(GbifDataSetProtocol
.parseProtocol(jsonObject
.getString(TYPE
)));