1
|
/**
|
2
|
* Copyright (C) 2018 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.greece;
|
10
|
|
11
|
import java.net.MalformedURLException;
|
12
|
import java.net.URI;
|
13
|
import java.text.ParseException;
|
14
|
import java.util.List;
|
15
|
import java.util.Map;
|
16
|
import java.util.UUID;
|
17
|
|
18
|
import org.apache.log4j.Logger;
|
19
|
import org.springframework.stereotype.Component;
|
20
|
|
21
|
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
|
22
|
import eu.etaxonomy.cdm.api.facade.DerivedUnitFacadeNotSupportedException;
|
23
|
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
|
24
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
25
|
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
|
26
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
|
27
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
28
|
import eu.etaxonomy.cdm.model.agent.Person;
|
29
|
import eu.etaxonomy.cdm.model.agent.Team;
|
30
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
31
|
import eu.etaxonomy.cdm.model.common.Language;
|
32
|
import eu.etaxonomy.cdm.model.common.TimePeriod;
|
33
|
import eu.etaxonomy.cdm.model.description.Feature;
|
34
|
import eu.etaxonomy.cdm.model.description.IndividualsAssociation;
|
35
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
36
|
import eu.etaxonomy.cdm.model.description.TextData;
|
37
|
import eu.etaxonomy.cdm.model.location.Country;
|
38
|
import eu.etaxonomy.cdm.model.media.Media;
|
39
|
import eu.etaxonomy.cdm.model.media.Rights;
|
40
|
import eu.etaxonomy.cdm.model.media.RightsType;
|
41
|
import eu.etaxonomy.cdm.model.name.Rank;
|
42
|
import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
|
43
|
import eu.etaxonomy.cdm.model.occurrence.Collection;
|
44
|
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
|
45
|
import eu.etaxonomy.cdm.model.occurrence.SpecimenOrObservationType;
|
46
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
47
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
48
|
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
|
49
|
|
50
|
/**
|
51
|
* @author a.mueller
|
52
|
* @since 21.08.2018
|
53
|
*/
|
54
|
@Component
|
55
|
public class GreeceWillingImport
|
56
|
extends SimpleExcelTaxonImport<GreeceWillingImportConfigurator>{
|
57
|
|
58
|
private static final String HERBARIUM_ID_NAMESPACE = "HerbariumID";
|
59
|
private static final String RDF_ID_NAMESPACE = "rdfID";
|
60
|
|
61
|
private static final long serialVersionUID = 8258914747643501550L;
|
62
|
|
63
|
private static final Logger logger = Logger.getLogger(GreeceWillingImport.class);
|
64
|
|
65
|
private ImportDeduplicationHelper<SimpleExcelTaxonImportState> dedupHelper;
|
66
|
|
67
|
private String lastCollectorNumber;
|
68
|
private UUID lastDerivedUnitUuid;
|
69
|
|
70
|
private String lastTaxonTitle;
|
71
|
private UUID lastTaxonDescription;
|
72
|
|
73
|
private int count = 1;
|
74
|
|
75
|
|
76
|
/**
|
77
|
* {@inheritDoc}
|
78
|
*/
|
79
|
@Override
|
80
|
protected void firstPass(SimpleExcelTaxonImportState<GreeceWillingImportConfigurator> state) {
|
81
|
try {
|
82
|
Map<String, String> record = state.getOriginalRecord();
|
83
|
|
84
|
String scientificName = record.get("ScientificName");
|
85
|
String stableIdentifier = record.get("ObjectURI");
|
86
|
String title = record.get("Title");
|
87
|
String titleDescription = record.get("TitleDescription");
|
88
|
|
89
|
String collector = record.get("Collector");
|
90
|
String collectorNumber = record.get("CollectorNumber");
|
91
|
|
92
|
String collectionDate = record.get("CollectionDate");
|
93
|
// String CollDateISO = record.get("CollDateISO");
|
94
|
String catalogNumber = record.get("CatalogNumber");
|
95
|
String locality = record.get("Locality");
|
96
|
String image = record.get("Image");
|
97
|
|
98
|
String latitude = record.get("Latitude");
|
99
|
String longitude = record.get("Longitude");
|
100
|
|
101
|
String rdfId = record.get(RDF_ID_NAMESPACE);
|
102
|
String herbariumId = record.get(HERBARIUM_ID_NAMESPACE);
|
103
|
|
104
|
String baseOfRecords = record.get("BaseOfRecords");
|
105
|
String collectionCode = record.get("CollectionCode");
|
106
|
String institutionCode = record.get("InstitutionCode");
|
107
|
|
108
|
TimePeriod date = TimePeriodParser.parseString(collectionDate);
|
109
|
if (date.getFreeText() != null){
|
110
|
System.out.println("Date could not be parsed: " + collectionDate + "; row: " + state.getCurrentLine());
|
111
|
}
|
112
|
|
113
|
// validate(state, "BaseOfRecords", "Specimen");
|
114
|
// validate(state, "InstitutionCode", "BGBM");
|
115
|
// validate(state, "CollectionCode", "B");
|
116
|
validate(state, "HigherGeography", "Greece");
|
117
|
validate(state, "Country", "Greece");
|
118
|
validate(state, "CountryCode", "GR");
|
119
|
|
120
|
//not used, but validate just in case
|
121
|
validate(state, "HUH_PURL", "NULL");
|
122
|
// validate(state, "DB", "JACQ");
|
123
|
validate(state, "CollDateISO", collectionDate);
|
124
|
|
125
|
// validate(state, "HerbariumID", collectionDate);
|
126
|
|
127
|
//open
|
128
|
// HerbariumID;
|
129
|
// HTML_URI
|
130
|
|
131
|
|
132
|
Reference sourceReference = getSourceReference(state);
|
133
|
|
134
|
Taxon taxon = getTaxonByName(state, scientificName);
|
135
|
verifyTaxon(state, taxon, record);
|
136
|
if (taxon == null){
|
137
|
System.out.println("Taxon not found for " + scientificName + "; row: " + state.getCurrentLine());
|
138
|
if (!state.getConfig().isH2()){
|
139
|
return;
|
140
|
}else{
|
141
|
taxon = Taxon.NewInstance(TaxonNameFactory.NewBotanicalInstance(null), getSourceReference(state));
|
142
|
taxon.getName().setTitleCache(title, true);
|
143
|
}
|
144
|
}
|
145
|
if (state.getConfig().isCheckNamesOnly()){
|
146
|
return;
|
147
|
}
|
148
|
|
149
|
DerivedUnit lastDerivedUnit = null;
|
150
|
if (collectorNumber.equals(lastCollectorNumber)){
|
151
|
lastDerivedUnit = (DerivedUnit)getOccurrenceService().find(lastDerivedUnitUuid);
|
152
|
}
|
153
|
|
154
|
DerivedUnitFacade facade;
|
155
|
String sourceId = rdfId;
|
156
|
String sourceNamespace = RDF_ID_NAMESPACE;
|
157
|
if (rdfId.equalsIgnoreCase("NULL")){
|
158
|
sourceId = herbariumId;
|
159
|
sourceNamespace = HERBARIUM_ID_NAMESPACE;
|
160
|
}
|
161
|
|
162
|
if (lastDerivedUnit == null){
|
163
|
if (baseOfRecords.equals("Specimen")){
|
164
|
facade = DerivedUnitFacade.NewPreservedSpecimenInstance();
|
165
|
}else if (baseOfRecords.equals("HumanObservation")){
|
166
|
facade = DerivedUnitFacade.NewInstance(SpecimenOrObservationType.Observation, null);
|
167
|
}else {
|
168
|
System.out.println("baseOfRecords of records not recognized: " + baseOfRecords + "; use preserved specimen as default");
|
169
|
facade = DerivedUnitFacade.NewPreservedSpecimenInstance();
|
170
|
}
|
171
|
facade.setFieldNumber(collectorNumber);
|
172
|
facade.setCountry(Country.GREECEHELLENICREPUBLIC());
|
173
|
facade.setLocality(locality);
|
174
|
try {
|
175
|
facade.setExactLocationByParsing(longitude, latitude, null, null);
|
176
|
} catch (ParseException e) {
|
177
|
e.printStackTrace();
|
178
|
}
|
179
|
facade.setCollector(getCollector(state, collector));
|
180
|
facade.getGatheringEvent(true).setTimeperiod(date);
|
181
|
facade.setPreferredStableUri(URI.create(stableIdentifier));
|
182
|
if (catalogNumber.startsWith("B")){
|
183
|
facade.setBarcode(catalogNumber);
|
184
|
}else{
|
185
|
facade.setCatalogNumber(catalogNumber);
|
186
|
}
|
187
|
facade.setCollection(getCollection(state, collectionCode, institutionCode));
|
188
|
this.addOriginalSource(facade.innerFieldUnit(), sourceId, sourceNamespace, sourceReference);
|
189
|
this.addOriginalSource(facade.innerDerivedUnit(), sourceId, sourceNamespace, sourceReference);
|
190
|
|
191
|
IndividualsAssociation specimen = IndividualsAssociation.NewInstance(facade.innerDerivedUnit());
|
192
|
if (baseOfRecords.equals("HumanObservation")){
|
193
|
specimen.setFeature(Feature.OBSERVATION());
|
194
|
}else if (baseOfRecords.equals("Specimen")){
|
195
|
specimen.setFeature(Feature.SPECIMEN());
|
196
|
}else{
|
197
|
System.out.println("Base of record not recognized for feature selection: " + baseOfRecords);
|
198
|
}
|
199
|
if (taxon != null ){
|
200
|
TaxonDescription description = getTaxonDescription(taxon, sourceReference, false, CREATE);
|
201
|
description.addElement(specimen);
|
202
|
}
|
203
|
lastCollectorNumber = collectorNumber;
|
204
|
lastTaxonTitle = title;
|
205
|
lastDerivedUnitUuid = specimen.getUuid();
|
206
|
count = 1;
|
207
|
}else{
|
208
|
try {
|
209
|
facade = DerivedUnitFacade.NewInstance(lastDerivedUnit);
|
210
|
} catch (DerivedUnitFacadeNotSupportedException e) {
|
211
|
System.out.println("Error in " + state.getCurrentLine());
|
212
|
e.printStackTrace();
|
213
|
return;
|
214
|
}
|
215
|
count++;
|
216
|
}
|
217
|
|
218
|
Media media = getMedia(state,title + " (Willing " + count + ")", titleDescription, image, date);
|
219
|
facade.addFieldObjectMedia(media);
|
220
|
|
221
|
TaxonDescription imageGallery = taxon.getOrCreateImageGallery(taxon.getName().getTitleCache());
|
222
|
TextData imageTextData;
|
223
|
if (imageGallery.getElements().isEmpty()){
|
224
|
imageTextData = TextData.NewInstance(Feature.IMAGE());
|
225
|
imageGallery.addElement(imageTextData);
|
226
|
}else{
|
227
|
imageTextData = (CdmBase.deproxy(imageGallery.getElements().iterator().next(), TextData.class));
|
228
|
}
|
229
|
imageTextData.addMedia(media);
|
230
|
|
231
|
// media.addPrimaryMediaSource(citation, microCitation);
|
232
|
this.addOriginalSource(media, sourceId, sourceNamespace, sourceReference);
|
233
|
|
234
|
|
235
|
// getDedupHelper(state).replaceAuthorNamesAndNomRef(state, name);
|
236
|
|
237
|
getOccurrenceService().saveOrUpdate(facade.baseUnit());
|
238
|
lastDerivedUnitUuid = facade.baseUnit().getUuid();
|
239
|
|
240
|
} catch (MalformedURLException e) {
|
241
|
logger.warn("An error occurred during import");
|
242
|
}
|
243
|
|
244
|
}
|
245
|
|
246
|
/**
|
247
|
* @param state
|
248
|
* @param record
|
249
|
*/
|
250
|
private void verifyTaxon(SimpleExcelTaxonImportState<GreeceWillingImportConfigurator> state,
|
251
|
Taxon taxon, Map<String, String> record) {
|
252
|
|
253
|
if (taxon==null){
|
254
|
return;
|
255
|
}
|
256
|
String genus = record.get("Genus");
|
257
|
String specificEpi = record.get("SpecificEpithet");
|
258
|
String family = record.get("Family");
|
259
|
|
260
|
if (!CdmUtils.nullSafeEqual(genus, taxon.getName().getGenusOrUninomial())){
|
261
|
System.out.println(" Genus and taxonNameGenus not equal: " +
|
262
|
genus + " <-> " + taxon.getName().getGenusOrUninomial() + "; row: " + state.getCurrentLine());
|
263
|
}
|
264
|
if (!CdmUtils.nullSafeEqual(specificEpi, taxon.getName().getSpecificEpithet())){
|
265
|
System.out.println(" SpecificEpi and taxonNameSpecificEpi not equal: " +
|
266
|
specificEpi + " <-> " + taxon.getName().getSpecificEpithet() + "; row: " + state.getCurrentLine());
|
267
|
}
|
268
|
while (taxon.getTaxonNodes().size()== 1 ){
|
269
|
Taxon parent = taxon.getTaxonNodes().iterator().next().getParent().getTaxon();
|
270
|
if (parent == null){
|
271
|
break;
|
272
|
}else{
|
273
|
if (parent.getName().getRank().equals(Rank.FAMILY()) && parent.getName().getGenusOrUninomial().equals(family)){
|
274
|
return;
|
275
|
}
|
276
|
taxon = parent;
|
277
|
}
|
278
|
}
|
279
|
System.out.println(" Family could not be verified: " + family + "; row: " + state.getCurrentLine());
|
280
|
}
|
281
|
|
282
|
private Collection bgbm;
|
283
|
/**
|
284
|
* @param state
|
285
|
* @return
|
286
|
*/
|
287
|
private Collection getCollection(SimpleExcelTaxonImportState<GreeceWillingImportConfigurator> state, String collectionCode, String institutionCode) {
|
288
|
if (bgbm == null){
|
289
|
List<Collection> results = getCollectionService().searchByCode(collectionCode);
|
290
|
if (results.size()> 1){
|
291
|
throw new RuntimeException("More then 1 collection found for 'B'");
|
292
|
}else if (results.isEmpty()){
|
293
|
Collection collection = Collection.NewInstance();
|
294
|
collection.setCode(collectionCode);
|
295
|
getCollectionService().save(collection);
|
296
|
System.out.println("Collection '"+collectionCode+"' did not exist. Created new one.");
|
297
|
return collection;
|
298
|
// throw new RuntimeException("No collection found for 'B'");
|
299
|
}
|
300
|
if ("B".equals(collectionCode) && !"".equals(institutionCode)
|
301
|
|| "HWilling".equals(collectionCode) && !"JACQ".equals(institutionCode)){
|
302
|
System.out.println("CollectionCode and InstitutionCode do not match expected values: " + collectionCode + "; " + institutionCode);
|
303
|
}
|
304
|
bgbm = results.get(0);
|
305
|
}
|
306
|
return bgbm;
|
307
|
}
|
308
|
|
309
|
/**
|
310
|
* @param state
|
311
|
* @param string
|
312
|
* @param string2
|
313
|
*/
|
314
|
private void validate(SimpleExcelTaxonImportState<GreeceWillingImportConfigurator> state, String attr,
|
315
|
String expectedValue) {
|
316
|
Map<String, String> record = state.getOriginalRecord();
|
317
|
String attrValue = record.get(attr);
|
318
|
if (!expectedValue.equalsIgnoreCase(attrValue)){
|
319
|
throw new RuntimeException("Attribute " + attr + " has not expected value " + expectedValue + " but "+ attrValue);
|
320
|
}
|
321
|
}
|
322
|
|
323
|
/**
|
324
|
* @param state
|
325
|
* @param title
|
326
|
* @param titleDescription
|
327
|
* @param date
|
328
|
* @param image
|
329
|
* @return
|
330
|
* @throws MalformedURLException
|
331
|
*/
|
332
|
private Media getMedia(SimpleExcelTaxonImportState<GreeceWillingImportConfigurator> state, String title,
|
333
|
String titleDescription, String imageUrl, TimePeriod date) throws MalformedURLException {
|
334
|
|
335
|
Person artist = getArtist(state);
|
336
|
String baseUrl = imageUrl.replace("http://ww2.bgbm.org/herbarium/images/Willing/GR/", "http://mediastorage.bgbm.org/fsi/server?type=image&source=Willing_GR/");
|
337
|
String thumbnail = baseUrl + "&width=240&profile=jpeg&quality=98";
|
338
|
|
339
|
String medium = baseUrl + "&width=350&profile=jpeg&quality=95";
|
340
|
Media media = getImageMedia(imageUrl, medium, thumbnail, true);
|
341
|
|
342
|
media.setMediaCreated(date);
|
343
|
media.setArtist(artist);
|
344
|
|
345
|
//copyright
|
346
|
Rights right = Rights.NewInstance();
|
347
|
right.setType(RightsType.COPYRIGHT());
|
348
|
right.setAgent(artist);
|
349
|
right = getDedupHelper(state).getExistingCopyright(state, right);
|
350
|
media.addRights(right);
|
351
|
|
352
|
if (isNotBlank(title)){
|
353
|
media.putTitle(Language.ENGLISH(), title);
|
354
|
}
|
355
|
if (isNotBlank(titleDescription)){
|
356
|
media.putDescription(Language.ENGLISH(), titleDescription);
|
357
|
}
|
358
|
|
359
|
return media;
|
360
|
}
|
361
|
|
362
|
|
363
|
Team willingCollector;
|
364
|
/**
|
365
|
* @param state
|
366
|
* @param collector
|
367
|
* @return
|
368
|
*/
|
369
|
private Team getCollector(SimpleExcelTaxonImportState<GreeceWillingImportConfigurator> state,
|
370
|
String collector) {
|
371
|
|
372
|
if (!"Willing,R. & Willing,E.".equals(collector)){
|
373
|
throw new RuntimeException("Unexpected collector: " + collector);
|
374
|
}
|
375
|
if (willingCollector == null){
|
376
|
UUID willingTeamUuid = UUID.fromString("ab3594a5-304f-4f19-bc8b-4a38c8abfad7");
|
377
|
willingCollector = (Team)getAgentService().find(willingTeamUuid);
|
378
|
if (willingCollector == null){
|
379
|
willingCollector = Team.NewTitledInstance("Willing, R. & Willing, E.", null);
|
380
|
getAgentService().save(willingCollector);
|
381
|
}
|
382
|
}
|
383
|
return willingCollector;
|
384
|
}
|
385
|
|
386
|
Person willingArtist;
|
387
|
private Person getArtist(SimpleExcelTaxonImportState<GreeceWillingImportConfigurator> state) {
|
388
|
|
389
|
if (willingArtist == null){
|
390
|
UUID willingArtistUuid = UUID.fromString("83ff66c7-4e51-4f6e-ac37-593a52ce3430");
|
391
|
willingArtist = (Person)getAgentService().find(willingArtistUuid);
|
392
|
if (willingArtist == null){
|
393
|
willingArtist = Person.NewTitledInstance("Willing, E.");
|
394
|
getAgentService().save(willingArtist);
|
395
|
}
|
396
|
}
|
397
|
return willingArtist;
|
398
|
}
|
399
|
|
400
|
/**
|
401
|
* @param state
|
402
|
* @param scientificName
|
403
|
* @return
|
404
|
*/
|
405
|
private Taxon getTaxonByName(SimpleExcelTaxonImportState<GreeceWillingImportConfigurator> state,
|
406
|
String scientificName) {
|
407
|
|
408
|
MatchingTaxonConfigurator config = MatchingTaxonConfigurator.NewInstance();
|
409
|
config.setTaxonNameTitle(scientificName);
|
410
|
config.setIncludeSynonyms(false);
|
411
|
Taxon result = getTaxonService().findBestMatchingTaxon(config);
|
412
|
return result;
|
413
|
}
|
414
|
|
415
|
/**
|
416
|
* @param state
|
417
|
* @return
|
418
|
*/
|
419
|
private ImportDeduplicationHelper<SimpleExcelTaxonImportState> getDedupHelper(SimpleExcelTaxonImportState<GreeceWillingImportConfigurator> state) {
|
420
|
if (this.dedupHelper == null){
|
421
|
dedupHelper = ImportDeduplicationHelper.NewInstance(this, state);
|
422
|
}
|
423
|
return this.dedupHelper;
|
424
|
}
|
425
|
}
|