ref #9148 change the name of CDM class ImageInfo to CdmImageInfo to avoid name calsh...
[cdmlib.git] / cdmlib-ext / src / main / java / eu / etaxonomy / cdm / ext / occurrence / gbif / GbifJsonOccurrenceParser.java
1 /**
2 * Copyright (C) 2014 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.ext.occurrence.gbif;
10
11 import java.io.IOException;
12 import java.io.InputStream;
13 import java.io.StringWriter;
14 import java.net.MalformedURLException;
15 import java.net.URI;
16 import java.net.URISyntaxException;
17 import java.net.URL;
18 import java.text.ParseException;
19 import java.util.ArrayList;
20 import java.util.Collection;
21
22 import org.apache.commons.io.IOUtils;
23 import org.apache.http.HttpException;
24 import org.apache.log4j.Logger;
25
26 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
27 import eu.etaxonomy.cdm.common.UriUtils;
28 import eu.etaxonomy.cdm.common.media.CdmImageInfo;
29 import eu.etaxonomy.cdm.model.agent.Institution;
30 import eu.etaxonomy.cdm.model.agent.Person;
31 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
32 import eu.etaxonomy.cdm.model.common.TimePeriod;
33 import eu.etaxonomy.cdm.model.location.Country;
34 import eu.etaxonomy.cdm.model.location.Point;
35 import eu.etaxonomy.cdm.model.location.ReferenceSystem;
36 import eu.etaxonomy.cdm.model.media.ImageFile;
37 import eu.etaxonomy.cdm.model.media.Media;
38 import eu.etaxonomy.cdm.model.media.MediaRepresentation;
39 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
40 import eu.etaxonomy.cdm.model.name.Rank;
41 import eu.etaxonomy.cdm.model.name.TaxonName;
42 import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
43 import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
44 import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
45 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
46 import net.sf.json.JSONArray;
47 import net.sf.json.JSONObject;
48
49 /**
50 * Utility class which provides the functionality to convert a JSON response
51 * resulting from a GBIF query for occurrences to the corresponding CDM entities.
52 * @author pplitzner
53 * @since 22.05.2014
54 *
55 */
56 public class GbifJsonOccurrenceParser {
57
58 private static final Logger logger = Logger.getLogger(GbifJsonOccurrenceParser.class);
59
60 private static final String DATASET_KEY = "datasetKey";
61 private static final String DATASET_PROTOCOL = "protocol";
62
63 private static final String KEY = "key";
64 private static final String URL = "url";
65 private static final String TYPE = "type";
66
67 private static final String COUNTRY_CODE = "countryCode";
68 private static final String LOCALITY = "locality";
69 private static final String LONGITUDE = "decimalLongitude";
70 private static final String LATITUDE = "decimalLatitude";
71 private static final String GEOREFERENCE_PROTOCOL = "georeferenceProtocol";//reference system
72 private static final String VERBATIM_ELEVATION = "verbatimElevation";
73 private static final String YEAR = "year";
74 private static final String MONTH = "month";
75 private static final String DAY = "day";
76 private static final String EVENT_DATE= "eventDate";
77 private static final String RECORDED_BY= "recordedBy";//collector
78 private static final String RECORD_NUMBER = "recordNumber";//collector number
79 private static final String FIELD_NUMBER = "fieldNumber";//collector number
80 private static final String EVENT_REMARKS = "eventRemarks";//gathering event description
81 private static final String OCCURRENCE_REMARKS = "occurrenceRemarks";//ecology
82 private static final String COLLECTION_CODE = "collectionCode";
83 private static final String CATALOG_NUMBER = "catalogNumber";//accession number
84 private static final String INSTITUTION_CODE = "institutionCode";
85
86
87 protected static final String PUBLISHING_ORG_KEY = "publishingOrgKey";
88 protected static final String PUBLISHING_COUNTRY = "publishingCountry";
89
90 protected static final String EXTENSIONS = "extensions";
91 protected static final String BASIS_OF_RECORD = "basisOfRecord";
92 protected static final String INDIVIDUAL_COUNT = "individualCount";
93 protected static final String TAXONKEY = "taxonKey";
94 protected static final String KINGDOM_KEY = "kingdomKey";
95 protected static final String PHYLUM_KEY = "phylumKey";
96 protected static final String CLASS_KEY = "classKey";
97 protected static final String ORDER_KEY = "orderKey";
98 protected static final String FAMILY_KEY = "familyKey";
99 protected static final String GENUS_KEY = "genusKey";
100 protected static final String SPECIES_KEY = "speciesKey";
101 protected static final String SCIENTIFIC_NAME = "scientificName";
102 protected static final String KINGDOM = "kingdom";
103 protected static final String PHYLUM = "phylum";
104 protected static final String ORDER = "order";
105 protected static final String FAMILY = "family";
106 protected static final String GENUS = "genus";
107 protected static final String SPECIES = "species";
108 protected static final String GENERIC_NAME = "genericName";
109 protected static final String SPECIFIC_EPITHET = "specificEpithet";
110 protected static final String INFRASPECIFIC_EPITHET = "infraspecificEpithet";
111 protected static final String TAXON_RANK = "taxonRank";
112 protected static final String DATE_IDENTIFIED = "dateIdentified";
113 protected static final String SCIENTIFIC_NAME_AUTHORSHIP = "scientificNameAuthorship";
114
115 protected static final String ELEVATION = "elevation";
116 protected static final String CONITNENT = "continent";
117 protected static final String STATE_PROVINCE = "stateProvince";
118
119
120
121
122 protected static final String ISSUES = "issues";
123 protected static final String LAST_INTERPRETED = "lastInterpreted";
124 protected static final String IDENTIFIERS = "identifiers";
125 protected static final String FACTS = "facts";
126 protected static final String RELATIONS = "relations";
127 protected static final String GEODETICDATUM = "geodeticDatum";
128 protected static final String CLASS = "class";
129
130 protected static final String COUNTRY = "country";
131 protected static final String NOMENCLATURAL_STATUS = "nomenclaturalStatus";
132 protected static final String RIGHTSHOLDER = "rightsHolder";
133 protected static final String IDEMTIFIER = "identifier";
134
135 protected static final String NOMENCLATURALCODE = "nomenclaturalCode";
136 protected static final String COUNTY = "county";
137
138 protected static final String DATASET_NAME = "datasetName";
139 protected static final String GBIF_ID = "gbifID";
140
141 protected static final String OCCURENCE_ID = "occurrenceID";
142
143 protected static final String TAXON_ID = "taxonID";
144 protected static final String LICENCE = "license";
145
146 protected static final String OWNER_INSTITUTION_CODE = "ownerInstitutionCode";
147 protected static final String BIBLIOGRAPHIC_CITATION = "bibliographicCitation";
148 protected static final String IDENTIFIED_BY = "identifiedBy";
149 protected static final String COLLECTION_ID = "collectionID";
150
151 private static final String PLANTAE = "Plantae";
152
153 private static final String ANIMALIA = "Animalia";
154
155 private static final String FUNGI = "Fungi";
156
157 private static final String BACTERIA = "Bacteria";
158
159 private static final String MULTIMEDIA = "media";
160
161
162
163
164
165
166 /**
167 * Parses the given {@link String} for occurrences.<br>
168 * Note: The data structure of the GBIF response should not be changed.
169 * @param jsonString JSON data as a String
170 * @return the found occurrences as a collection of {@link GbifResponse}
171 */
172 public static Collection<GbifResponse> parseJsonRecords(String jsonString) {
173 return parseJsonRecords(JSONObject.fromObject(jsonString));
174 }
175
176 /**
177 * Parses the given {@link InputStream} for occurrences.
178 * @param jsonString JSON data as an InputStream
179 * @return the found occurrences as a collection of {@link GbifResponse}
180 */
181 public static Collection<GbifResponse> parseJsonRecords(InputStream inputStream) throws IOException{
182 StringWriter stringWriter = new StringWriter();
183 IOUtils.copy(inputStream, stringWriter);
184 return parseJsonRecords(stringWriter.toString());
185 }
186
187 /**
188 * Parses the given {@link JSONObject} for occurrences.<br>
189 * Note: The data structure of the GBIF response should not be changed.
190 * @param jsonString JSON data as an JSONObject
191 * @return the found occurrences as a collection of {@link GbifResponse}
192 */
193 public static Collection<GbifResponse> parseJsonRecords(JSONObject jsonObject){
194 return parseJsonRecords(jsonObject.getJSONArray("results"));
195 }
196
197 /**
198 * Parses the given {@link JSONArray} for occurrences.
199 * @param jsonString JSON data as an {@link JSONArray}
200 * @return the found occurrences as a collection of {@link GbifResponse}
201 */
202 private static Collection<GbifResponse> parseJsonRecords(JSONArray jsonArray) {
203 Collection<GbifResponse> results = new ArrayList<GbifResponse>();
204 String[] tripleId = new String[3];
205 String string;
206 for(Object o:jsonArray){
207 //parse every record
208 tripleId = new String[3];
209 if(o instanceof JSONObject){
210 String dataSetKey = null;
211 GbifDataSetProtocol dataSetProtocol = null;
212 DerivedUnitFacade derivedUnitFacade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.PreservedSpecimen);
213 TaxonName name = null;
214 JSONObject record = (JSONObject)o;
215
216 if(record.has(DATASET_PROTOCOL)){
217 dataSetProtocol = GbifDataSetProtocol.parseProtocol(record.getString(DATASET_PROTOCOL));
218 }
219 if(record.has(DATASET_KEY)){
220 dataSetKey = record.getString(DATASET_KEY);
221 }
222 if(record.has(COUNTRY_CODE)){
223 string = record.getString(COUNTRY_CODE);
224 Country country = Country.getCountryByIso3166A2(string);
225 if(country!=null){
226 derivedUnitFacade.setCountry(country);
227 }
228 }
229 if(record.has(LOCALITY)){
230 string = record.getString(LOCALITY);
231 derivedUnitFacade.setLocality(string);
232 }
233
234 if (record.has("species")){
235 Rank rank = null;
236
237 if (record.has(TAXON_RANK)){
238 string= record.getString(TAXON_RANK);
239 try {
240 rank = Rank.getRankByName(string);
241 } catch (UnknownCdmTypeException e) {
242 // TODO Auto-generated catch block
243 e.printStackTrace();
244 }
245 }
246 if (rank != null){
247 if (record.has(NOMENCLATURALCODE)){
248 string = record.getString(NOMENCLATURALCODE);
249
250 if (string.equals(NomenclaturalCode.ICZN.getTitleCache())){
251 name = TaxonNameFactory.NewZoologicalInstance(rank);
252 } else if (string.equals(NomenclaturalCode.ICNAFP.getTitleCache())) {
253 name = TaxonNameFactory.NewBotanicalInstance(rank);
254 } else if (string.equals(NomenclaturalCode.ICNB.getTitleCache())){
255 name = TaxonNameFactory.NewBacterialInstance(rank);
256 } else if (string.equals(NomenclaturalCode.ICNCP.getTitleCache())){
257 name = TaxonNameFactory.NewCultivarInstance(rank);
258 } else if (string.equals(NomenclaturalCode.ICVCN.getTitleCache())){
259 name = TaxonNameFactory.NewViralInstance(rank);
260 } else if (string.equals("ICN")){
261 name = TaxonNameFactory.NewBotanicalInstance(rank);
262 }
263 }else {
264 if (record.has(KINGDOM)){
265 if (record.getString(KINGDOM).equals(PLANTAE)){
266 name = TaxonNameFactory.NewBotanicalInstance(rank);
267 } else if (record.getString(KINGDOM).equals(ANIMALIA)){
268 name = TaxonNameFactory.NewZoologicalInstance(rank);
269 } else if (record.getString(KINGDOM).equals(FUNGI)){
270 name = TaxonNameFactory.NewBotanicalInstance(rank);
271 } else if (record.getString(KINGDOM).equals(BACTERIA)){
272 name = TaxonNameFactory.NewBacterialInstance(rank);
273 } else{
274 name = TaxonNameFactory.NewNonViralInstance(rank);
275 }
276 } else{
277 name = TaxonNameFactory.NewNonViralInstance(rank);
278 }
279 }
280 if (name == null){
281 name = TaxonNameFactory.NewNonViralInstance(rank);
282 }
283 if (record.has(GENUS)){
284 name.setGenusOrUninomial(record.getString(GENUS));
285 }
286 if (record.has(SPECIFIC_EPITHET)){
287 name.setSpecificEpithet(record.getString(SPECIFIC_EPITHET));
288 }
289 if (record.has(INFRASPECIFIC_EPITHET)){
290 name.setInfraSpecificEpithet(record.getString(INFRASPECIFIC_EPITHET));
291 }
292 if (record.has(SCIENTIFIC_NAME)){
293 name.setTitleCache(record.getString(SCIENTIFIC_NAME), true);
294 }
295
296 }
297 DeterminationEvent detEvent = DeterminationEvent.NewInstance();
298
299 if (record.has(IDENTIFIED_BY)){
300 Person determiner = Person.NewTitledInstance(record.getString(IDENTIFIED_BY));
301 detEvent.setDeterminer(determiner);
302
303 }
304 detEvent.setTaxonName(name);
305 detEvent.setPreferredFlag(true);
306 derivedUnitFacade.addDetermination(detEvent);
307
308 }
309
310
311
312 // GPS location
313 Point location = Point.NewInstance();
314 derivedUnitFacade.setExactLocation(location);
315 try {
316 if(record.has(LATITUDE)){
317 String lat = record.getString(LATITUDE);
318 location.setLatitudeByParsing(lat);
319 }
320 if(record.has(LONGITUDE)){
321 String lon = record.getString(LONGITUDE);
322 location.setLongitudeByParsing(lon);
323 }
324 } catch (ParseException e) {
325 logger.error("Could not parse GPS coordinates", e);
326 }
327 if(record.has(GEOREFERENCE_PROTOCOL)){
328 String geo = record.getString(GEOREFERENCE_PROTOCOL);
329 ReferenceSystem referenceSystem = null;
330 //TODO: Is there another way than string comparison
331 //to check which reference system is used?
332 if(ReferenceSystem.WGS84().getLabel().contains(geo)){
333 referenceSystem = ReferenceSystem.WGS84();
334 }
335 else if(ReferenceSystem.GOOGLE_EARTH().getLabel().contains(geo)){
336 referenceSystem = ReferenceSystem.GOOGLE_EARTH();
337 }
338 else if(ReferenceSystem.GAZETTEER().getLabel().contains(geo)){
339 referenceSystem = ReferenceSystem.GAZETTEER();
340 }
341 location.setReferenceSystem(referenceSystem);
342 }
343
344 if(record.has(ELEVATION)){
345 try {
346 //parse integer and strip of unit
347 string = record.getString(ELEVATION);
348 int length = string.length();
349 StringBuilder builder = new StringBuilder();
350 for(int i=0;i<length;i++){
351 if(Character.isDigit(string.charAt(i))){
352 builder.append(string.charAt(i));
353 }
354 else{
355 break;
356 }
357 }
358 derivedUnitFacade.setAbsoluteElevation(Integer.parseInt(builder.toString()));
359 } catch (NumberFormatException e) {
360 logger.warn("Could not parse elevation", e);
361 }
362 }
363
364 //Date (Gathering Period)
365 TimePeriod timePeriod = TimePeriod.NewInstance();
366 derivedUnitFacade.setGatheringPeriod(timePeriod);
367 //TODO what happens with eventDate??
368 if(record.has(YEAR)){
369 timePeriod.setStartYear(record.getInt(YEAR));
370 }
371 if(record.has(MONTH)){
372 timePeriod.setStartMonth(record.getInt(MONTH));
373 }
374 if(record.has(DAY)){
375 timePeriod.setStartDay(record.getInt(DAY));
376 }
377 if(record.has(RECORDED_BY)){
378 Person person = Person.NewTitledInstance(record.getString(RECORDED_BY));
379 //FIXME check data base if collector already present
380 derivedUnitFacade.setCollector(person);
381 }
382
383 //collector number (fieldNumber OR recordNumber)
384 if(record.has(FIELD_NUMBER)){
385 derivedUnitFacade.setFieldNumber(record.getString(FIELD_NUMBER));
386 }
387 //collector number (fieldNumber OR recordNumber)
388 if(record.has(RECORD_NUMBER)){
389 derivedUnitFacade.setFieldNumber(record.getString(RECORD_NUMBER));
390 }
391
392 if(record.has(EVENT_REMARKS)){
393 derivedUnitFacade.setGatheringEventDescription(record.getString(EVENT_REMARKS));
394 }
395 if(record.has(OCCURRENCE_REMARKS)){
396 derivedUnitFacade.setEcology(record.getString(OCCURRENCE_REMARKS));
397 }
398 if(record.has(COLLECTION_CODE)){
399 String collectionCode = record.getString(COLLECTION_CODE);
400 tripleId[2] = collectionCode;
401 //FIXME: check data base for existing collections
402 eu.etaxonomy.cdm.model.occurrence.Collection collection = eu.etaxonomy.cdm.model.occurrence.Collection.NewInstance();
403 collection.setCode(collectionCode);
404 if(record.has(INSTITUTION_CODE)){
405 Institution institution = Institution.NewNamedInstance(record.getString(INSTITUTION_CODE));
406 institution.setCode(record.getString(INSTITUTION_CODE));
407 collection.setInstitute(institution);
408 }
409 derivedUnitFacade.setCollection(collection);
410 }
411 if(record.has(CATALOG_NUMBER)){
412 derivedUnitFacade.setCatalogNumber(record.getString(CATALOG_NUMBER));
413 derivedUnitFacade.setAccessionNumber(record.getString(CATALOG_NUMBER));
414 tripleId[0]= record.getString(CATALOG_NUMBER);
415 }
416 if(record.has(INSTITUTION_CODE)){
417 derivedUnitFacade.setAccessionNumber(record.getString(INSTITUTION_CODE));
418 tripleId[1]= record.getString(INSTITUTION_CODE);
419 }
420
421 if (record.has(OCCURENCE_ID)){
422 IdentifiableSource source = IdentifiableSource.NewDataImportInstance((record.getString(OCCURENCE_ID)));
423 derivedUnitFacade.addSource(source);
424 }
425
426 if (record.has(MULTIMEDIA)){
427 //http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
428 JSONArray multimediaArray = record.getJSONArray(MULTIMEDIA);
429 JSONObject mediaRecord;
430 Media media;
431 URI uri = null;
432 CdmImageInfo imageInf = null;
433 MediaRepresentation representation = null;
434 SpecimenOrObservationType type = null;
435 for(Object object:multimediaArray){
436 //parse every record
437 media = Media.NewInstance();
438 uri = null;
439 imageInf = null;
440
441 if(object instanceof JSONObject){
442 mediaRecord = (JSONObject) object;
443
444 if (mediaRecord.has("identifier")){
445 try {
446 uri = new URI(mediaRecord.getString("identifier"));
447 imageInf = CdmImageInfo.NewInstance(uri, 0);
448 } catch (URISyntaxException |IOException | HttpException e) {
449 e.printStackTrace();
450 }
451 // media.addIdentifier(mediaRecord.getString("identifier"), null);
452 }
453 if (mediaRecord.has("references")){
454
455
456 }
457 if (mediaRecord.has("format")){
458
459 }
460 if (mediaRecord.has("type")){
461 if (mediaRecord.get("type").equals("StillImage")){
462 type = SpecimenOrObservationType.StillImage;
463 }
464 }
465
466 }
467 ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInf);
468 representation = MediaRepresentation.NewInstance();
469
470 representation.addRepresentationPart(imageFile);
471 media.addRepresentation(representation);
472
473 derivedUnitFacade.addDerivedUnitMedia(media);
474 }
475 //identifier=http://ww2.bgbm.org/herbarium/images/B/-W/08/53/B_-W_08537%20-00%201__3.jpg
476 //references=http://ww2.bgbm.org/herbarium/view_biocase.cfm?SpecimenPK=136628
477 //format=image/jpeg
478 //type=StillImage
479
480 }
481
482 // create dataset URL
483 URI uri = null;
484 try {
485 uri = UriUtils.createUri(new URL(GbifQueryServiceWrapper.BASE_URL), "/v1/dataset/"+dataSetKey+"/endpoint", null, null);
486 } catch (MalformedURLException e) {
487 logger.error("Endpoint URI could not be created!", e);
488 } catch (URISyntaxException e) {
489 logger.error("Endpoint URI could not be created!", e);
490 }
491 results.add(new GbifResponse(derivedUnitFacade, uri, dataSetProtocol, tripleId, name));
492 }
493 }
494 return results;
495 }
496
497 public static DataSetResponse parseOriginalDataSetUri(InputStream inputStream) throws IOException {
498 StringWriter stringWriter = new StringWriter();
499 IOUtils.copy(inputStream, stringWriter);
500 return parseOriginalDataSetUri(stringWriter.toString());
501 }
502
503 public static DataSetResponse parseOriginalDataSetUri(String jsonString) {
504 DataSetResponse response = new DataSetResponse();
505 JSONArray jsonArray = JSONArray.fromObject(jsonString);
506 Object next = jsonArray.iterator().next();
507 if(next instanceof JSONObject){
508 JSONObject jsonObject = (JSONObject)next;
509 if(jsonObject.has(URL)){
510 response.setEndpoint(URI.create(jsonObject.getString(URL)));
511 }
512 if(jsonObject.has(TYPE)){
513 response.setProtocol(GbifDataSetProtocol.parseProtocol(jsonObject.getString(TYPE)));
514 }
515 }
516 return response;
517 }
518
519 }