cleanup
[cdmlib.git] / cdmlib-ext / src / main / java / eu / etaxonomy / cdm / ext / occurrence / gbif / GbifJsonOccurrenceParser.java
1 /**
2 * Copyright (C) 2014 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.ext.occurrence.gbif;
10
11 import java.io.IOException;
12 import java.io.InputStream;
13 import java.io.StringWriter;
14 import java.net.MalformedURLException;
15 import java.net.URISyntaxException;
16 import java.net.URL;
17 import java.nio.charset.Charset;
18 import java.text.ParseException;
19 import java.util.ArrayList;
20 import java.util.Collection;
21
22 import org.apache.commons.io.IOUtils;
23 import org.apache.http.HttpException;
24 import org.apache.logging.log4j.LogManager;
25 import org.apache.logging.log4j.Logger;
26
27 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
28 import eu.etaxonomy.cdm.api.service.media.MediaInfoFileReader;
29 import eu.etaxonomy.cdm.common.URI;
30 import eu.etaxonomy.cdm.common.UriUtils;
31 import eu.etaxonomy.cdm.common.media.CdmImageInfo;
32 import eu.etaxonomy.cdm.model.agent.Institution;
33 import eu.etaxonomy.cdm.model.agent.Person;
34 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
35 import eu.etaxonomy.cdm.model.common.TimePeriod;
36 import eu.etaxonomy.cdm.model.location.Country;
37 import eu.etaxonomy.cdm.model.location.Point;
38 import eu.etaxonomy.cdm.model.location.ReferenceSystem;
39 import eu.etaxonomy.cdm.model.media.ImageFile;
40 import eu.etaxonomy.cdm.model.media.Media;
41 import eu.etaxonomy.cdm.model.media.MediaRepresentation;
42 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
43 import eu.etaxonomy.cdm.model.name.Rank;
44 import eu.etaxonomy.cdm.model.name.TaxonName;
45 import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
46 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
47 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
48 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
49 import net.sf.json.JSONArray;
50 import net.sf.json.JSONObject;
51
52 /**
53 * Utility class which provides the functionality to convert a JSON response
54 * resulting from a GBIF query for occurrences to the corresponding CDM entities.
55 *
56 * @author pplitzner
57 * @since 22.05.2014
58 */
59 public class GbifJsonOccurrenceParser {
60
61 private static final Logger logger = LogManager.getLogger(GbifJsonOccurrenceParser.class);
62
63 private static final String DATASET_KEY = "datasetKey";
64 private static final String DATASET_PROTOCOL = "protocol";
65
66 private static final String KEY = "key";
67 private static final String URL = "url";
68 private static final String TYPE = "type";
69
70 private static final String COUNTRY_CODE = "countryCode";
71 private static final String LOCALITY = "locality";
72 private static final String LONGITUDE = "decimalLongitude";
73 private static final String LATITUDE = "decimalLatitude";
74 private static final String GEOREFERENCE_PROTOCOL = "georeferenceProtocol";//reference system
75 private static final String VERBATIM_ELEVATION = "verbatimElevation";
76 private static final String YEAR = "year";
77 private static final String MONTH = "month";
78 private static final String DAY = "day";
79 private static final String EVENT_DATE= "eventDate";
80 private static final String RECORDED_BY= "recordedBy";//collector
81 private static final String RECORD_NUMBER = "recordNumber";//collector number
82 private static final String FIELD_NUMBER = "fieldNumber";//collector number
83 private static final String EVENT_REMARKS = "eventRemarks";//gathering event description
84 private static final String OCCURRENCE_REMARKS = "occurrenceRemarks";//ecology
85 private static final String COLLECTION_CODE = "collectionCode";
86 private static final String CATALOG_NUMBER = "catalogNumber";//accession number
87 private static final String INSTITUTION_CODE = "institutionCode";
88
89 protected static final String PUBLISHING_ORG_KEY = "publishingOrgKey";
90 protected static final String PUBLISHING_COUNTRY = "publishingCountry";
91
92 protected static final String EXTENSIONS = "extensions";
93 protected static final String BASIS_OF_RECORD = "basisOfRecord";
94 protected static final String INDIVIDUAL_COUNT = "individualCount";
95 protected static final String TAXONKEY = "taxonKey";
96 protected static final String KINGDOM_KEY = "kingdomKey";
97 protected static final String PHYLUM_KEY = "phylumKey";
98 protected static final String CLASS_KEY = "classKey";
99 protected static final String ORDER_KEY = "orderKey";
100 protected static final String FAMILY_KEY = "familyKey";
101 protected static final String GENUS_KEY = "genusKey";
102 protected static final String SPECIES_KEY = "speciesKey";
103 protected static final String SCIENTIFIC_NAME = "scientificName";
104 protected static final String KINGDOM = "kingdom";
105 protected static final String PHYLUM = "phylum";
106 protected static final String ORDER = "order";
107 protected static final String FAMILY = "family";
108 protected static final String GENUS = "genus";
109 protected static final String SPECIES = "species";
110 protected static final String GENERIC_NAME = "genericName";
111 protected static final String SPECIFIC_EPITHET = "specificEpithet";
112 protected static final String INFRASPECIFIC_EPITHET = "infraspecificEpithet";
113 protected static final String TAXON_RANK = "taxonRank";
114 protected static final String DATE_IDENTIFIED = "dateIdentified";
115 protected static final String SCIENTIFIC_NAME_AUTHORSHIP = "scientificNameAuthorship";
116
117 protected static final String ELEVATION = "elevation";
118 protected static final String CONITNENT = "continent";
119 protected static final String STATE_PROVINCE = "stateProvince";
120
121 protected static final String ISSUES = "issues";
122 protected static final String LAST_INTERPRETED = "lastInterpreted";
123 protected static final String IDENTIFIERS = "identifiers";
124 protected static final String FACTS = "facts";
125 protected static final String RELATIONS = "relations";
126 protected static final String GEODETICDATUM = "geodeticDatum";
127 protected static final String CLASS = "class";
128
129 protected static final String COUNTRY = "country";
130 protected static final String NOMENCLATURAL_STATUS = "nomenclaturalStatus";
131 protected static final String RIGHTSHOLDER = "rightsHolder";
132 protected static final String IDEMTIFIER = "identifier";
133
134 protected static final String NOMENCLATURALCODE = "nomenclaturalCode";
135 protected static final String COUNTY = "county";
136
137 protected static final String DATASET_NAME = "datasetName";
138 protected static final String GBIF_ID = "gbifID";
139
140 protected static final String OCCURENCE_ID = "occurrenceID";
141
142 protected static final String TAXON_ID = "taxonID";
143 protected static final String LICENCE = "license";
144
145 protected static final String OWNER_INSTITUTION_CODE = "ownerInstitutionCode";
146 protected static final String BIBLIOGRAPHIC_CITATION = "bibliographicCitation";
147 protected static final String IDENTIFIED_BY = "identifiedBy";
148 protected static final String COLLECTION_ID = "collectionID";
149
150 private static final String PLANTAE = "Plantae";
151 private static final String ANIMALIA = "Animalia";
152 private static final String FUNGI = "Fungi";
153 private static final String BACTERIA = "Bacteria";
154 private static final String MULTIMEDIA = "media";
155
156 /**
157 * Parses the given {@link String} for occurrences.<br>
158 * Note: The data structure of the GBIF response should not be changed.
159 * @param jsonString JSON data as a String
160 * @return the found occurrences as a collection of {@link GbifResponse}
161 */
162 public static Collection<GbifResponse> parseJsonRecords(String jsonString) {
163 return parseJsonRecords(JSONObject.fromObject(jsonString));
164 }
165
166 /**
167 * Parses the given {@link InputStream} for occurrences.
168 * @param jsonString JSON data as an InputStream
169 * @return the found occurrences as a collection of {@link GbifResponse}
170 */
171 public static Collection<GbifResponse> parseJsonRecords(InputStream inputStream) throws IOException{
172 StringWriter stringWriter = new StringWriter();
173 IOUtils.copy(inputStream, stringWriter, Charset.defaultCharset());
174 return parseJsonRecords(stringWriter.toString());
175 }
176
177 /**
178 * Parses the given {@link JSONObject} for occurrences.<br>
179 * Note: The data structure of the GBIF response should not be changed.
180 * @param jsonString JSON data as an JSONObject
181 * @return the found occurrences as a collection of {@link GbifResponse}
182 */
183 public static Collection<GbifResponse> parseJsonRecords(JSONObject jsonObject){
184 return parseJsonRecords(jsonObject.getJSONArray("results"));
185 }
186
187 /**
188 * Parses the given {@link JSONArray} for occurrences.
189 * @param jsonString JSON data as an {@link JSONArray}
190 * @return the found occurrences as a collection of {@link GbifResponse}
191 */
192 private static Collection<GbifResponse> parseJsonRecords(JSONArray jsonArray) {
193 Collection<GbifResponse> results = new ArrayList<>();
194 String[] tripleId = new String[3];
195 String string;
196 for(Object o:jsonArray){
197 //parse every record
198 tripleId = new String[3];
199 if(o instanceof JSONObject){
200 String dataSetKey = null;
201 GbifDataSetProtocol dataSetProtocol = null;
202 DerivedUnitFacade derivedUnitFacade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen);
203 TaxonName name = null;
204 JSONObject record = (JSONObject)o;
205
206 if(record.has(DATASET_PROTOCOL)){
207 dataSetProtocol = GbifDataSetProtocol.parseProtocol(record.getString(DATASET_PROTOCOL));
208 }
209 if(record.has(DATASET_KEY)){
210 dataSetKey = record.getString(DATASET_KEY);
211 }
212 if(record.has(COUNTRY_CODE)){
213 string = record.getString(COUNTRY_CODE);
214 Country country = Country.getCountryByIso3166A2(string);
215 if(country!=null){
216 derivedUnitFacade.setCountry(country);
217 }
218 }
219 if(record.has(LOCALITY)){
220 string = record.getString(LOCALITY);
221 derivedUnitFacade.setLocality(string);
222 }
223
224 if (record.has("species")){
225 Rank rank = null;
226
227 if (record.has(TAXON_RANK)){
228 string= record.getString(TAXON_RANK);
229 try {
230 rank = Rank.getRankByLatinName(string);
231 } catch (UnknownCdmTypeException e) {
232 // TODO Auto-generated catch block
233 e.printStackTrace();
234 }
235 }
236 if (rank != null){
237 if (record.has(NOMENCLATURALCODE)){
238 string = record.getString(NOMENCLATURALCODE);
239
240 if (string.equals(NomenclaturalCode.ICZN.getTitleCache())){
241 name = TaxonNameFactory.NewZoologicalInstance(rank);
242 } else if (string.equals(NomenclaturalCode.ICNAFP.getTitleCache())) {
243 name = TaxonNameFactory.NewBotanicalInstance(rank);
244 } else if (string.equals(NomenclaturalCode.ICNP.getTitleCache())){
245 name = TaxonNameFactory.NewBacterialInstance(rank);
246 } else if (string.equals(NomenclaturalCode.ICNCP.getTitleCache())){
247 name = TaxonNameFactory.NewCultivarInstance(rank);
248 } else if (string.equals(NomenclaturalCode.ICVCN.getTitleCache())){
249 name = TaxonNameFactory.NewViralInstance(rank);
250 } else if (string.equals("ICN")){
251 name = TaxonNameFactory.NewBotanicalInstance(rank);
252 }
253 }else {
254 if (record.has(KINGDOM)){
255 if (record.getString(KINGDOM).equals(PLANTAE)){
256 name = TaxonNameFactory.NewBotanicalInstance(rank);
257 } else if (record.getString(KINGDOM).equals(ANIMALIA)){
258 name = TaxonNameFactory.NewZoologicalInstance(rank);
259 } else if (record.getString(KINGDOM).equals(FUNGI)){
260 name = TaxonNameFactory.NewBotanicalInstance(rank);
261 } else if (record.getString(KINGDOM).equals(BACTERIA)){
262 name = TaxonNameFactory.NewBacterialInstance(rank);
263 } else{
264 name = TaxonNameFactory.NewNonViralInstance(rank);
265 }
266 } else{
267 name = TaxonNameFactory.NewNonViralInstance(rank);
268 }
269 }
270 if (name == null){
271 name = TaxonNameFactory.NewNonViralInstance(rank);
272 }
273 if (record.has(GENUS)){
274 name.setGenusOrUninomial(record.getString(GENUS));
275 }
276 if (record.has(SPECIFIC_EPITHET)){
277 name.setSpecificEpithet(record.getString(SPECIFIC_EPITHET));
278 }
279 if (record.has(INFRASPECIFIC_EPITHET)){
280 name.setInfraSpecificEpithet(record.getString(INFRASPECIFIC_EPITHET));
281 }
282 if (record.has(SCIENTIFIC_NAME)){
283 name.setTitleCache(record.getString(SCIENTIFIC_NAME), true);
284 }
285 }
286 DeterminationEvent detEvent = DeterminationEvent.NewInstance();
287
288 if (record.has(IDENTIFIED_BY)){
289 Person determiner = Person.NewTitledInstance(record.getString(IDENTIFIED_BY));
290 detEvent.setDeterminer(determiner);
291 }
292 detEvent.setTaxonName(name);
293 detEvent.setPreferredFlag(true);
294 derivedUnitFacade.addDetermination(detEvent);
295 }
296
297 // GPS location
298 Point location = Point.NewInstance();
299 derivedUnitFacade.setExactLocation(location);
300 try {
301 if(record.has(LATITUDE)){
302 String lat = record.getString(LATITUDE);
303 location.setLatitudeByParsing(lat);
304 }
305 if(record.has(LONGITUDE)){
306 String lon = record.getString(LONGITUDE);
307 location.setLongitudeByParsing(lon);
308 }
309 } catch (ParseException e) {
310 logger.error("Could not parse GPS coordinates", e);
311 }
312 if(record.has(GEOREFERENCE_PROTOCOL)){
313 String geo = record.getString(GEOREFERENCE_PROTOCOL);
314 ReferenceSystem referenceSystem = null;
315 //TODO: Is there another way than string comparison
316 //to check which reference system is used?
317 if(ReferenceSystem.WGS84().getLabel().contains(geo)){
318 referenceSystem = ReferenceSystem.WGS84();
319 }
320 else if(ReferenceSystem.GOOGLE_EARTH().getLabel().contains(geo)){
321 referenceSystem = ReferenceSystem.GOOGLE_EARTH();
322 }
323 else if(ReferenceSystem.GAZETTEER().getLabel().contains(geo)){
324 referenceSystem = ReferenceSystem.GAZETTEER();
325 }
326 location.setReferenceSystem(referenceSystem);
327 }
328
329 if(record.has(ELEVATION)){
330 try {
331 //parse integer and strip of unit
332 string = record.getString(ELEVATION);
333 int length = string.length();
334 StringBuilder builder = new StringBuilder();
335 for(int i=0;i<length;i++){
336 if(Character.isDigit(string.charAt(i))){
337 builder.append(string.charAt(i));
338 }
339 else{
340 break;
341 }
342 }
343 derivedUnitFacade.setAbsoluteElevation(Integer.parseInt(builder.toString()));
344 } catch (NumberFormatException e) {
345 logger.warn("Could not parse elevation", e);
346 }
347 }
348
349 //Date (Gathering Period)
350 TimePeriod timePeriod = TimePeriod.NewInstance();
351 derivedUnitFacade.setGatheringPeriod(timePeriod);
352 //TODO what happens with eventDate??
353 if(record.has(YEAR)){
354 timePeriod.setStartYear(record.getInt(YEAR));
355 }
356 if(record.has(MONTH)){
357 timePeriod.setStartMonth(record.getInt(MONTH));
358 }
359 if(record.has(DAY)){
360 timePeriod.setStartDay(record.getInt(DAY));
361 }
362 if(record.has(RECORDED_BY)){
363 Person person = Person.NewTitledInstance(record.getString(RECORDED_BY));
364 //FIXME check data base if collector already present
365 derivedUnitFacade.setCollector(person);
366 }
367
368 //collector number (fieldNumber OR recordNumber)
369 if(record.has(FIELD_NUMBER)){
370 derivedUnitFacade.setFieldNumber(record.getString(FIELD_NUMBER));
371 }
372 //collector number (fieldNumber OR recordNumber)
373 if(record.has(RECORD_NUMBER)){
374 derivedUnitFacade.setFieldNumber(record.getString(RECORD_NUMBER));
375 }
376
377 if(record.has(EVENT_REMARKS)){
378 derivedUnitFacade.setGatheringEventDescription(record.getString(EVENT_REMARKS));
379 }
380 if(record.has(OCCURRENCE_REMARKS)){
381 derivedUnitFacade.setEcology(record.getString(OCCURRENCE_REMARKS));
382 }
383 if(record.has(COLLECTION_CODE)){
384 String collectionCode = record.getString(COLLECTION_CODE);
385 tripleId[2] = collectionCode;
386 //FIXME: check data base for existing collections
387 eu.etaxonomy.cdm.model.occurrence.Collection collection = eu.etaxonomy.cdm.model.occurrence.Collection.NewInstance();
388 collection.setCode(collectionCode);
389 if(record.has(INSTITUTION_CODE)){
390 Institution institution = Institution.NewNamedInstance(record.getString(INSTITUTION_CODE));
391 institution.setCode(record.getString(INSTITUTION_CODE));
392 collection.setInstitute(institution);
393 }
394 derivedUnitFacade.setCollection(collection);
395 }
396 if(record.has(CATALOG_NUMBER)){
397 derivedUnitFacade.setCatalogNumber(record.getString(CATALOG_NUMBER));
398 derivedUnitFacade.setAccessionNumber(record.getString(CATALOG_NUMBER));
399 tripleId[0]= record.getString(CATALOG_NUMBER);
400 }
401 if(record.has(INSTITUTION_CODE)){
402 derivedUnitFacade.setAccessionNumber(record.getString(INSTITUTION_CODE));
403 tripleId[1]= record.getString(INSTITUTION_CODE);
404 }
405
406 if (record.has(OCCURENCE_ID)){
407 IdentifiableSource source = IdentifiableSource.NewDataImportInstance((record.getString(OCCURENCE_ID)));
408 derivedUnitFacade.addSource(source);
409 }
410
411 if (record.has(MULTIMEDIA)){
412 //http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
413 JSONArray multimediaArray = record.getJSONArray(MULTIMEDIA);
414 JSONObject mediaRecord;
415 SpecimenOrObservationType type = null;
416 for(Object object:multimediaArray){
417 //parse every record
418 Media media = Media.NewInstance();
419 URI uri = null;
420 CdmImageInfo imageInf = null;
421
422 if(object instanceof JSONObject){
423 mediaRecord = (JSONObject) object;
424
425 if (mediaRecord.has("identifier")){
426 try {
427 uri = new URI(mediaRecord.getString("identifier"));
428 imageInf = MediaInfoFileReader.legacyFactoryMethod(uri)
429 .readBaseInfo()
430 .getCdmImageInfo();
431 } catch (URISyntaxException |IOException | HttpException | IllegalArgumentException e) {
432 e.printStackTrace();
433 }
434 // media.addIdentifier(mediaRecord.getString("identifier"), null);
435 }
436 if (mediaRecord.has("references")){
437
438
439 }
440 if (mediaRecord.has("format")){
441
442 }
443 if (mediaRecord.has("type")){
444 if (mediaRecord.get("type").equals("StillImage")){
445 type = SpecimenOrObservationType.StillImage;
446 }
447 }
448 }
449 ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInf);
450 MediaRepresentation representation = MediaRepresentation.NewInstance();
451
452 representation.addRepresentationPart(imageFile);
453 media.addRepresentation(representation);
454
455 derivedUnitFacade.addDerivedUnitMedia(media);
456 }
457 //identifier=http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
458 //references=http://ww2.bgbm.org/herbarium/view_biocase.cfm?SpecimenPK=136628
459 //format=image/jpeg
460 //type=StillImage
461 }
462
463 // create dataset URL
464 URI uri = null;
465 try {
466 uri = UriUtils.createUri(new URL(GbifQueryServiceWrapper.BASE_URL), "/v1/dataset/"+dataSetKey+"/endpoint", null, null);
467 } catch (MalformedURLException e) {
468 logger.error("Endpoint URI could not be created!", e);
469 } catch (URISyntaxException e) {
470 logger.error("Endpoint URI could not be created!", e);
471 }
472 results.add(new GbifResponse(derivedUnitFacade, uri, dataSetProtocol, tripleId, name));
473 }
474 }
475 return results;
476 }
477
478 public static DataSetResponse parseOriginalDataSetUri(InputStream inputStream) throws IOException {
479 StringWriter stringWriter = new StringWriter();
480 IOUtils.copy(inputStream, stringWriter, Charset.defaultCharset());
481 return parseOriginalDataSetUri(stringWriter.toString());
482 }
483
484 public static DataSetResponse parseOriginalDataSetUri(String jsonString) {
485 DataSetResponse response = new DataSetResponse();
486 JSONArray jsonArray = JSONArray.fromObject(jsonString);
487 Object next = jsonArray.iterator().next();
488 if(next instanceof JSONObject){
489 JSONObject jsonObject = (JSONObject)next;
490 if(jsonObject.has(URL)){
491 response.setEndpoint(URI.create(jsonObject.getString(URL)));
492 }
493 if(jsonObject.has(TYPE)){
494 response.setProtocol(GbifDataSetProtocol.parseProtocol(jsonObject.getString(TYPE)));
495 }
496 }
497 return response;
498 }
499 }