2 * Copyright (C) 2014 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.ext
.occurrence
.gbif
;
11 import java
.io
.IOException
;
12 import java
.io
.InputStream
;
13 import java
.io
.StringWriter
;
14 import java
.net
.MalformedURLException
;
15 import java
.net
.URISyntaxException
;
17 import java
.nio
.charset
.Charset
;
18 import java
.text
.ParseException
;
19 import java
.util
.ArrayList
;
20 import java
.util
.Collection
;
22 import org
.apache
.commons
.io
.IOUtils
;
23 import org
.apache
.http
.HttpException
;
24 import org
.apache
.logging
.log4j
.LogManager
;
25 import org
.apache
.logging
.log4j
.Logger
;
27 import eu
.etaxonomy
.cdm
.api
.facade
.DerivedUnitFacade
;
28 import eu
.etaxonomy
.cdm
.api
.service
.media
.MediaInfoFileReader
;
29 import eu
.etaxonomy
.cdm
.common
.URI
;
30 import eu
.etaxonomy
.cdm
.common
.UriUtils
;
31 import eu
.etaxonomy
.cdm
.common
.media
.CdmImageInfo
;
32 import eu
.etaxonomy
.cdm
.model
.agent
.Institution
;
33 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
34 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
35 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
36 import eu
.etaxonomy
.cdm
.model
.location
.Country
;
37 import eu
.etaxonomy
.cdm
.model
.location
.Point
;
38 import eu
.etaxonomy
.cdm
.model
.location
.ReferenceSystem
;
39 import eu
.etaxonomy
.cdm
.model
.media
.ImageFile
;
40 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
41 import eu
.etaxonomy
.cdm
.model
.media
.MediaRepresentation
;
42 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
43 import eu
.etaxonomy
.cdm
.model
.name
.Rank
;
44 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
45 import eu
.etaxonomy
.cdm
.model
.name
.TaxonNameFactory
;
46 import eu
.etaxonomy
.cdm
.model
.occurrence
.DeterminationEvent
;
47 import eu
.etaxonomy
.cdm
.model
.occurrence
.SpecimenOrObservationType
;
48 import eu
.etaxonomy
.cdm
.strategy
.exceptions
.UnknownCdmTypeException
;
49 import net
.sf
.json
.JSONArray
;
50 import net
.sf
.json
.JSONObject
;
53 * Utility class which provides the functionality to convert a JSON response
54 * resulting from a GBIF query for occurrences to the corresponding CDM entities.
59 public class GbifJsonOccurrenceParser
{
61 private static final Logger logger
= LogManager
.getLogger(GbifJsonOccurrenceParser
.class);
63 private static final String DATASET_KEY
= "datasetKey";
64 private static final String DATASET_PROTOCOL
= "protocol";
66 private static final String KEY
= "key";
67 private static final String URL
= "url";
68 private static final String TYPE
= "type";
70 private static final String COUNTRY_CODE
= "countryCode";
71 private static final String LOCALITY
= "locality";
72 private static final String LONGITUDE
= "decimalLongitude";
73 private static final String LATITUDE
= "decimalLatitude";
74 private static final String GEOREFERENCE_PROTOCOL
= "georeferenceProtocol";//reference system
75 private static final String VERBATIM_ELEVATION
= "verbatimElevation";
76 private static final String YEAR
= "year";
77 private static final String MONTH
= "month";
78 private static final String DAY
= "day";
79 private static final String EVENT_DATE
= "eventDate";
80 private static final String RECORDED_BY
= "recordedBy";//collector
81 private static final String RECORD_NUMBER
= "recordNumber";//collector number
82 private static final String FIELD_NUMBER
= "fieldNumber";//collector number
83 private static final String EVENT_REMARKS
= "eventRemarks";//gathering event description
84 private static final String OCCURRENCE_REMARKS
= "occurrenceRemarks";//ecology
85 private static final String COLLECTION_CODE
= "collectionCode";
86 private static final String CATALOG_NUMBER
= "catalogNumber";//accession number
87 private static final String INSTITUTION_CODE
= "institutionCode";
89 protected static final String PUBLISHING_ORG_KEY
= "publishingOrgKey";
90 protected static final String PUBLISHING_COUNTRY
= "publishingCountry";
92 protected static final String EXTENSIONS
= "extensions";
93 protected static final String BASIS_OF_RECORD
= "basisOfRecord";
94 protected static final String INDIVIDUAL_COUNT
= "individualCount";
95 protected static final String TAXONKEY
= "taxonKey";
96 protected static final String KINGDOM_KEY
= "kingdomKey";
97 protected static final String PHYLUM_KEY
= "phylumKey";
98 protected static final String CLASS_KEY
= "classKey";
99 protected static final String ORDER_KEY
= "orderKey";
100 protected static final String FAMILY_KEY
= "familyKey";
101 protected static final String GENUS_KEY
= "genusKey";
102 protected static final String SPECIES_KEY
= "speciesKey";
103 protected static final String SCIENTIFIC_NAME
= "scientificName";
104 protected static final String KINGDOM
= "kingdom";
105 protected static final String PHYLUM
= "phylum";
106 protected static final String ORDER
= "order";
107 protected static final String FAMILY
= "family";
108 protected static final String GENUS
= "genus";
109 protected static final String SPECIES
= "species";
110 protected static final String GENERIC_NAME
= "genericName";
111 protected static final String SPECIFIC_EPITHET
= "specificEpithet";
112 protected static final String INFRASPECIFIC_EPITHET
= "infraspecificEpithet";
113 protected static final String TAXON_RANK
= "taxonRank";
114 protected static final String DATE_IDENTIFIED
= "dateIdentified";
115 protected static final String SCIENTIFIC_NAME_AUTHORSHIP
= "scientificNameAuthorship";
117 protected static final String ELEVATION
= "elevation";
118 protected static final String CONITNENT
= "continent";
119 protected static final String STATE_PROVINCE
= "stateProvince";
121 protected static final String ISSUES
= "issues";
122 protected static final String LAST_INTERPRETED
= "lastInterpreted";
123 protected static final String IDENTIFIERS
= "identifiers";
124 protected static final String FACTS
= "facts";
125 protected static final String RELATIONS
= "relations";
126 protected static final String GEODETICDATUM
= "geodeticDatum";
127 protected static final String CLASS
= "class";
129 protected static final String COUNTRY
= "country";
130 protected static final String NOMENCLATURAL_STATUS
= "nomenclaturalStatus";
131 protected static final String RIGHTSHOLDER
= "rightsHolder";
132 protected static final String IDEMTIFIER
= "identifier";
134 protected static final String NOMENCLATURALCODE
= "nomenclaturalCode";
135 protected static final String COUNTY
= "county";
137 protected static final String DATASET_NAME
= "datasetName";
138 protected static final String GBIF_ID
= "gbifID";
140 protected static final String OCCURENCE_ID
= "occurrenceID";
142 protected static final String TAXON_ID
= "taxonID";
143 protected static final String LICENCE
= "license";
145 protected static final String OWNER_INSTITUTION_CODE
= "ownerInstitutionCode";
146 protected static final String BIBLIOGRAPHIC_CITATION
= "bibliographicCitation";
147 protected static final String IDENTIFIED_BY
= "identifiedBy";
148 protected static final String COLLECTION_ID
= "collectionID";
150 private static final String PLANTAE
= "Plantae";
151 private static final String ANIMALIA
= "Animalia";
152 private static final String FUNGI
= "Fungi";
153 private static final String BACTERIA
= "Bacteria";
154 private static final String MULTIMEDIA
= "media";
157 * Parses the given {@link String} for occurrences.<br>
158 * Note: The data structure of the GBIF response should not be changed.
159 * @param jsonString JSON data as a String
160 * @return the found occurrences as a collection of {@link GbifResponse}
162 public static Collection
<GbifResponse
> parseJsonRecords(String jsonString
) {
163 return parseJsonRecords(JSONObject
.fromObject(jsonString
));
167 * Parses the given {@link InputStream} for occurrences.
168 * @param jsonString JSON data as an InputStream
169 * @return the found occurrences as a collection of {@link GbifResponse}
171 public static Collection
<GbifResponse
> parseJsonRecords(InputStream inputStream
) throws IOException
{
172 StringWriter stringWriter
= new StringWriter();
173 IOUtils
.copy(inputStream
, stringWriter
, Charset
.defaultCharset());
174 return parseJsonRecords(stringWriter
.toString());
178 * Parses the given {@link JSONObject} for occurrences.<br>
179 * Note: The data structure of the GBIF response should not be changed.
180 * @param jsonString JSON data as an JSONObject
181 * @return the found occurrences as a collection of {@link GbifResponse}
183 public static Collection
<GbifResponse
> parseJsonRecords(JSONObject jsonObject
){
184 return parseJsonRecords(jsonObject
.getJSONArray("results"));
188 * Parses the given {@link JSONArray} for occurrences.
189 * @param jsonString JSON data as an {@link JSONArray}
190 * @return the found occurrences as a collection of {@link GbifResponse}
192 private static Collection
<GbifResponse
> parseJsonRecords(JSONArray jsonArray
) {
193 Collection
<GbifResponse
> results
= new ArrayList
<>();
194 String
[] tripleId
= new String
[3];
196 for(Object o
:jsonArray
){
198 tripleId
= new String
[3];
199 if(o
instanceof JSONObject
){
200 String dataSetKey
= null;
201 GbifDataSetProtocol dataSetProtocol
= null;
202 DerivedUnitFacade derivedUnitFacade
= DerivedUnitFacade
.NewInstance(SpecimenOrObservationType
.PreservedSpecimen
);
203 TaxonName name
= null;
204 JSONObject record
= (JSONObject
)o
;
206 if(record
.has(DATASET_PROTOCOL
)){
207 dataSetProtocol
= GbifDataSetProtocol
.parseProtocol(record
.getString(DATASET_PROTOCOL
));
209 if(record
.has(DATASET_KEY
)){
210 dataSetKey
= record
.getString(DATASET_KEY
);
212 if(record
.has(COUNTRY_CODE
)){
213 string
= record
.getString(COUNTRY_CODE
);
214 Country country
= Country
.getCountryByIso3166A2(string
);
216 derivedUnitFacade
.setCountry(country
);
219 if(record
.has(LOCALITY
)){
220 string
= record
.getString(LOCALITY
);
221 derivedUnitFacade
.setLocality(string
);
224 if (record
.has("species")){
227 if (record
.has(TAXON_RANK
)){
228 string
= record
.getString(TAXON_RANK
);
230 rank
= Rank
.getRankByLatinName(string
);
231 } catch (UnknownCdmTypeException e
) {
232 // TODO Auto-generated catch block
237 if (record
.has(NOMENCLATURALCODE
)){
238 string
= record
.getString(NOMENCLATURALCODE
);
240 if (string
.equals(NomenclaturalCode
.ICZN
.getTitleCache())){
241 name
= TaxonNameFactory
.NewZoologicalInstance(rank
);
242 } else if (string
.equals(NomenclaturalCode
.ICNAFP
.getTitleCache())) {
243 name
= TaxonNameFactory
.NewBotanicalInstance(rank
);
244 } else if (string
.equals(NomenclaturalCode
.ICNP
.getTitleCache())){
245 name
= TaxonNameFactory
.NewBacterialInstance(rank
);
246 } else if (string
.equals(NomenclaturalCode
.ICNCP
.getTitleCache())){
247 name
= TaxonNameFactory
.NewCultivarInstance(rank
);
248 } else if (string
.equals(NomenclaturalCode
.ICVCN
.getTitleCache())){
249 name
= TaxonNameFactory
.NewViralInstance(rank
);
250 } else if (string
.equals("ICN")){
251 name
= TaxonNameFactory
.NewBotanicalInstance(rank
);
254 if (record
.has(KINGDOM
)){
255 if (record
.getString(KINGDOM
).equals(PLANTAE
)){
256 name
= TaxonNameFactory
.NewBotanicalInstance(rank
);
257 } else if (record
.getString(KINGDOM
).equals(ANIMALIA
)){
258 name
= TaxonNameFactory
.NewZoologicalInstance(rank
);
259 } else if (record
.getString(KINGDOM
).equals(FUNGI
)){
260 name
= TaxonNameFactory
.NewBotanicalInstance(rank
);
261 } else if (record
.getString(KINGDOM
).equals(BACTERIA
)){
262 name
= TaxonNameFactory
.NewBacterialInstance(rank
);
264 name
= TaxonNameFactory
.NewNonViralInstance(rank
);
267 name
= TaxonNameFactory
.NewNonViralInstance(rank
);
271 name
= TaxonNameFactory
.NewNonViralInstance(rank
);
273 if (record
.has(GENUS
)){
274 name
.setGenusOrUninomial(record
.getString(GENUS
));
276 if (record
.has(SPECIFIC_EPITHET
)){
277 name
.setSpecificEpithet(record
.getString(SPECIFIC_EPITHET
));
279 if (record
.has(INFRASPECIFIC_EPITHET
)){
280 name
.setInfraSpecificEpithet(record
.getString(INFRASPECIFIC_EPITHET
));
282 if (record
.has(SCIENTIFIC_NAME
)){
283 name
.setTitleCache(record
.getString(SCIENTIFIC_NAME
), true);
286 DeterminationEvent detEvent
= DeterminationEvent
.NewInstance();
288 if (record
.has(IDENTIFIED_BY
)){
289 Person determiner
= Person
.NewTitledInstance(record
.getString(IDENTIFIED_BY
));
290 detEvent
.setDeterminer(determiner
);
292 detEvent
.setTaxonName(name
);
293 detEvent
.setPreferredFlag(true);
294 derivedUnitFacade
.addDetermination(detEvent
);
298 Point location
= Point
.NewInstance();
299 derivedUnitFacade
.setExactLocation(location
);
301 if(record
.has(LATITUDE
)){
302 String lat
= record
.getString(LATITUDE
);
303 location
.setLatitudeByParsing(lat
);
305 if(record
.has(LONGITUDE
)){
306 String lon
= record
.getString(LONGITUDE
);
307 location
.setLongitudeByParsing(lon
);
309 } catch (ParseException e
) {
310 logger
.error("Could not parse GPS coordinates", e
);
312 if(record
.has(GEOREFERENCE_PROTOCOL
)){
313 String geo
= record
.getString(GEOREFERENCE_PROTOCOL
);
314 ReferenceSystem referenceSystem
= null;
315 //TODO: Is there another way than string comparison
316 //to check which reference system is used?
317 if(ReferenceSystem
.WGS84().getLabel().contains(geo
)){
318 referenceSystem
= ReferenceSystem
.WGS84();
320 else if(ReferenceSystem
.GOOGLE_EARTH().getLabel().contains(geo
)){
321 referenceSystem
= ReferenceSystem
.GOOGLE_EARTH();
323 else if(ReferenceSystem
.GAZETTEER().getLabel().contains(geo
)){
324 referenceSystem
= ReferenceSystem
.GAZETTEER();
326 location
.setReferenceSystem(referenceSystem
);
329 if(record
.has(ELEVATION
)){
331 //parse integer and strip of unit
332 string
= record
.getString(ELEVATION
);
333 int length
= string
.length();
334 StringBuilder builder
= new StringBuilder();
335 for(int i
=0;i
<length
;i
++){
336 if(Character
.isDigit(string
.charAt(i
))){
337 builder
.append(string
.charAt(i
));
343 derivedUnitFacade
.setAbsoluteElevation(Integer
.parseInt(builder
.toString()));
344 } catch (NumberFormatException e
) {
345 logger
.warn("Could not parse elevation", e
);
349 //Date (Gathering Period)
350 TimePeriod timePeriod
= TimePeriod
.NewInstance();
351 derivedUnitFacade
.setGatheringPeriod(timePeriod
);
352 //TODO what happens with eventDate??
353 if(record
.has(YEAR
)){
354 timePeriod
.setStartYear(record
.getInt(YEAR
));
356 if(record
.has(MONTH
)){
357 timePeriod
.setStartMonth(record
.getInt(MONTH
));
360 timePeriod
.setStartDay(record
.getInt(DAY
));
362 if(record
.has(RECORDED_BY
)){
363 Person person
= Person
.NewTitledInstance(record
.getString(RECORDED_BY
));
364 //FIXME check data base if collector already present
365 derivedUnitFacade
.setCollector(person
);
368 //collector number (fieldNumber OR recordNumber)
369 if(record
.has(FIELD_NUMBER
)){
370 derivedUnitFacade
.setFieldNumber(record
.getString(FIELD_NUMBER
));
372 //collector number (fieldNumber OR recordNumber)
373 if(record
.has(RECORD_NUMBER
)){
374 derivedUnitFacade
.setFieldNumber(record
.getString(RECORD_NUMBER
));
377 if(record
.has(EVENT_REMARKS
)){
378 derivedUnitFacade
.setGatheringEventDescription(record
.getString(EVENT_REMARKS
));
380 if(record
.has(OCCURRENCE_REMARKS
)){
381 derivedUnitFacade
.setEcology(record
.getString(OCCURRENCE_REMARKS
));
383 if(record
.has(COLLECTION_CODE
)){
384 String collectionCode
= record
.getString(COLLECTION_CODE
);
385 tripleId
[2] = collectionCode
;
386 //FIXME: check data base for existing collections
387 eu
.etaxonomy
.cdm
.model
.occurrence
.Collection collection
= eu
.etaxonomy
.cdm
.model
.occurrence
.Collection
.NewInstance();
388 collection
.setCode(collectionCode
);
389 if(record
.has(INSTITUTION_CODE
)){
390 Institution institution
= Institution
.NewNamedInstance(record
.getString(INSTITUTION_CODE
));
391 institution
.setCode(record
.getString(INSTITUTION_CODE
));
392 collection
.setInstitute(institution
);
394 derivedUnitFacade
.setCollection(collection
);
396 if(record
.has(CATALOG_NUMBER
)){
397 derivedUnitFacade
.setCatalogNumber(record
.getString(CATALOG_NUMBER
));
398 derivedUnitFacade
.setAccessionNumber(record
.getString(CATALOG_NUMBER
));
399 tripleId
[0]= record
.getString(CATALOG_NUMBER
);
401 if(record
.has(INSTITUTION_CODE
)){
402 derivedUnitFacade
.setAccessionNumber(record
.getString(INSTITUTION_CODE
));
403 tripleId
[1]= record
.getString(INSTITUTION_CODE
);
406 if (record
.has(OCCURENCE_ID
)){
407 IdentifiableSource source
= IdentifiableSource
.NewDataImportInstance((record
.getString(OCCURENCE_ID
)));
408 derivedUnitFacade
.addSource(source
);
411 if (record
.has(MULTIMEDIA
)){
412 //http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
413 JSONArray multimediaArray
= record
.getJSONArray(MULTIMEDIA
);
414 JSONObject mediaRecord
;
415 SpecimenOrObservationType type
= null;
416 for(Object object
:multimediaArray
){
418 Media media
= Media
.NewInstance();
420 CdmImageInfo imageInf
= null;
422 if(object
instanceof JSONObject
){
423 mediaRecord
= (JSONObject
) object
;
425 if (mediaRecord
.has("identifier")){
427 uri
= new URI(mediaRecord
.getString("identifier"));
428 imageInf
= MediaInfoFileReader
.legacyFactoryMethod(uri
)
431 } catch (URISyntaxException
|IOException
| HttpException
| IllegalArgumentException e
) {
434 // media.addIdentifier(mediaRecord.getString("identifier"), null);
436 if (mediaRecord
.has("references")){
440 if (mediaRecord
.has("format")){
443 if (mediaRecord
.has("type")){
444 if (mediaRecord
.get("type").equals("StillImage")){
445 type
= SpecimenOrObservationType
.StillImage
;
449 ImageFile imageFile
= ImageFile
.NewInstance(uri
, null, imageInf
);
450 MediaRepresentation representation
= MediaRepresentation
.NewInstance();
452 representation
.addRepresentationPart(imageFile
);
453 media
.addRepresentation(representation
);
455 derivedUnitFacade
.addDerivedUnitMedia(media
);
457 //identifier=http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
458 //references=http://ww2.bgbm.org/herbarium/view_biocase.cfm?SpecimenPK=136628
463 // create dataset URL
466 uri
= UriUtils
.createUri(new URL(GbifQueryServiceWrapper
.BASE_URL
), "/v1/dataset/"+dataSetKey
+"/endpoint", null, null);
467 } catch (MalformedURLException e
) {
468 logger
.error("Endpoint URI could not be created!", e
);
469 } catch (URISyntaxException e
) {
470 logger
.error("Endpoint URI could not be created!", e
);
472 results
.add(new GbifResponse(derivedUnitFacade
, uri
, dataSetProtocol
, tripleId
, name
));
478 public static DataSetResponse
parseOriginalDataSetUri(InputStream inputStream
) throws IOException
{
479 StringWriter stringWriter
= new StringWriter();
480 IOUtils
.copy(inputStream
, stringWriter
, Charset
.defaultCharset());
481 return parseOriginalDataSetUri(stringWriter
.toString());
484 public static DataSetResponse
parseOriginalDataSetUri(String jsonString
) {
485 DataSetResponse response
= new DataSetResponse();
486 JSONArray jsonArray
= JSONArray
.fromObject(jsonString
);
487 Object next
= jsonArray
.iterator().next();
488 if(next
instanceof JSONObject
){
489 JSONObject jsonObject
= (JSONObject
)next
;
490 if(jsonObject
.has(URL
)){
491 response
.setEndpoint(URI
.create(jsonObject
.getString(URL
)));
493 if(jsonObject
.has(TYPE
)){
494 response
.setProtocol(GbifDataSetProtocol
.parseProtocol(jsonObject
.getString(TYPE
)));