1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.io.globis;
|
11
|
|
12
|
import java.io.IOException;
|
13
|
import java.net.MalformedURLException;
|
14
|
import java.sql.ResultSet;
|
15
|
import java.sql.SQLException;
|
16
|
import java.util.HashMap;
|
17
|
import java.util.HashSet;
|
18
|
import java.util.Map;
|
19
|
import java.util.Set;
|
20
|
import java.util.UUID;
|
21
|
import java.util.regex.Matcher;
|
22
|
import java.util.regex.Pattern;
|
23
|
|
24
|
import org.apache.http.client.ClientProtocolException;
|
25
|
import org.apache.logging.log4j.LogManager;
|
26
|
import org.apache.logging.log4j.Logger;
|
27
|
import org.springframework.stereotype.Component;
|
28
|
|
29
|
import eu.etaxonomy.cdm.common.URI;
|
30
|
import eu.etaxonomy.cdm.common.UriUtils;
|
31
|
import eu.etaxonomy.cdm.facade.DerivedUnitFacade;
|
32
|
import eu.etaxonomy.cdm.io.common.IOValidator;
|
33
|
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
|
34
|
import eu.etaxonomy.cdm.io.globis.validation.GlobisImageImportValidator;
|
35
|
import eu.etaxonomy.cdm.model.common.Annotation;
|
36
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
37
|
import eu.etaxonomy.cdm.model.common.Language;
|
38
|
import eu.etaxonomy.cdm.model.common.Marker;
|
39
|
import eu.etaxonomy.cdm.model.common.MarkerType;
|
40
|
import eu.etaxonomy.cdm.model.media.Media;
|
41
|
import eu.etaxonomy.cdm.model.name.IZoologicalName;
|
42
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
43
|
import eu.etaxonomy.cdm.model.occurrence.Collection;
|
44
|
import eu.etaxonomy.cdm.model.occurrence.DerivedUnit;
|
45
|
import eu.etaxonomy.cdm.model.occurrence.DeterminationEvent;
|
46
|
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
|
47
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
48
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
49
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
50
|
|
51
|
|
52
|
/**
|
53
|
* @author a.mueller
|
54
|
* @since 20.02.2010
|
55
|
*/
|
56
|
@Component
|
57
|
public class GlobisImageImport extends GlobisImportBase<Taxon> {
|
58
|
|
59
|
private static final long serialVersionUID = 5697033145326415146L;
|
60
|
private static final Logger logger = LogManager.getLogger();
|
61
|
|
62
|
private int modCount = 1000;
|
63
|
|
64
|
private UUID uuidArtNonSpecTaxMarkerType = UUID.fromString("be362085-0f5b-4314-96d1-78b9b129ef6d") ;
|
65
|
private static final String pluralString = "images";
|
66
|
private static final String dbTableName = "Einzelbilder";
|
67
|
private static final Class<?> cdmTargetClass = Media.class; //not needed
|
68
|
|
69
|
private static UUID uuidGartRef = UUID.fromString("af85470f-6e54-4304-9d29-fd117cd56161");
|
70
|
|
71
|
public GlobisImageImport(){
|
72
|
super(pluralString, dbTableName, cdmTargetClass);
|
73
|
}
|
74
|
|
75
|
@Override
|
76
|
protected String getIdQuery() {
|
77
|
String strRecordQuery =
|
78
|
" SELECT BildId " +
|
79
|
" FROM " + dbTableName;
|
80
|
return strRecordQuery;
|
81
|
}
|
82
|
|
83
|
@Override
|
84
|
protected String getRecordQuery(GlobisImportConfigurator config) {
|
85
|
String strRecordQuery =
|
86
|
" SELECT i.*, NULL as Created_When, NULL as Created_Who," +
|
87
|
" NULL as Updated_who, NULL as Updated_When, NULL as Notes, st.SpecCurrspecID " +
|
88
|
" FROM " + getTableName() + " i " +
|
89
|
" LEFT JOIN specTax st ON i.spectaxID = st.SpecTaxID " +
|
90
|
" WHERE ( i.BildId IN (" + ID_LIST_TOKEN + ") )";
|
91
|
return strRecordQuery;
|
92
|
}
|
93
|
|
94
|
@Override
|
95
|
public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, GlobisImportState state) {
|
96
|
boolean success = true;
|
97
|
|
98
|
Set<Media> objectsToSave = new HashSet<>();
|
99
|
|
100
|
@SuppressWarnings("unchecked")
|
101
|
Map<String, DerivedUnit> typeMap = partitioner.getObjectMap(TYPE_NAMESPACE);
|
102
|
|
103
|
@SuppressWarnings("unchecked")
|
104
|
Map<String, Taxon> taxonMap = partitioner.getObjectMap(TAXON_NAMESPACE);
|
105
|
@SuppressWarnings("unchecked")
|
106
|
Map<String, TaxonName> specTaxNameMap = partitioner.getObjectMap(SPEC_TAX_NAMESPACE);
|
107
|
|
108
|
ResultSet rs = partitioner.getResultSet();
|
109
|
|
110
|
Reference refGart = getReferenceService().find(uuidGartRef);
|
111
|
|
112
|
|
113
|
try {
|
114
|
|
115
|
int i = 0;
|
116
|
|
117
|
//for each record
|
118
|
while (rs.next()){
|
119
|
|
120
|
if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
|
121
|
|
122
|
Integer bildID = rs.getInt("BildID");
|
123
|
Integer spectaxID = nullSafeInt(rs, "spectaxID");
|
124
|
Integer taxonID = nullSafeInt(rs, "SpecCurrspecID");
|
125
|
String copyright = rs.getString("copyright");
|
126
|
String specimenId = rs.getString("specimenID");
|
127
|
String bemerkungen = rs.getString("Bemerkungen");
|
128
|
String artNotSpecTax = rs.getString("Art non spectax");
|
129
|
String motiv = rs.getString("Motiv");
|
130
|
|
131
|
//ignore:
|
132
|
// [file lab2], same as Dateiname04 but less data
|
133
|
// Dateipfad
|
134
|
|
135
|
Set<Media> recordMedia = new HashSet<>();
|
136
|
|
137
|
try {
|
138
|
|
139
|
makeAllMedia(state, rs, recordMedia, objectsToSave);
|
140
|
|
141
|
String title = null;
|
142
|
|
143
|
DerivedUnit specimen = null;
|
144
|
if (spectaxID != null){
|
145
|
//try to find type specimen
|
146
|
if (isNotBlank(motiv) && (motiv.startsWith("type specimen"))){
|
147
|
String collectionCode = transformCopyright2CollectionCode(copyright);
|
148
|
String id = GlobisSpecTaxImport.getTypeId(spectaxID, collectionCode);
|
149
|
specimen = typeMap.get(id);
|
150
|
}
|
151
|
|
152
|
//try to find specTaxName
|
153
|
IZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
|
154
|
if (specTaxTaxonName != null){
|
155
|
title = " taxon name " + specTaxTaxonName.getTitleCache();
|
156
|
}else{
|
157
|
title = " spectaxID " + spectaxID;
|
158
|
}
|
159
|
}else{
|
160
|
title = " name " + getNameFromFileOs(rs) + (isBlank(specimenId)? "" : " (specimenId: " + specimenId + ")");
|
161
|
}
|
162
|
|
163
|
//not type specimen
|
164
|
if (specimen == null){
|
165
|
specimen = DerivedUnit.NewPreservedSpecimenInstance();
|
166
|
specimen.setTitleCache("Specimen for " + title, true);
|
167
|
String collectionCode = transformCopyright2CollectionCode(copyright);
|
168
|
//TODO
|
169
|
Collection collection = getCollection(collectionCode);
|
170
|
specimen.setCollection(collection);
|
171
|
}
|
172
|
|
173
|
//source
|
174
|
specimen.addSource(OriginalSourceType.Import, String.valueOf(bildID), IMAGE_NAMESPACE, state.getTransactionalSourceReference(), null);
|
175
|
|
176
|
//GART id (specimenID)
|
177
|
if (isNotBlank(specimenId)){
|
178
|
specimen.addSource(OriginalSourceType.Lineage, specimenId, "specimenId", refGart, null);
|
179
|
}
|
180
|
//bemerkungen
|
181
|
if (isNotBlank(bemerkungen)){
|
182
|
Annotation annotation = Annotation.NewInstance(bemerkungen, null, null);
|
183
|
specimen.addAnnotation(annotation);
|
184
|
}
|
185
|
//media
|
186
|
DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(specimen);
|
187
|
for (Media media: recordMedia){
|
188
|
facade.addDerivedUnitMedia(media);
|
189
|
}
|
190
|
//art non spectax
|
191
|
if (isNotBlank(artNotSpecTax)){
|
192
|
if (artNotSpecTax.equalsIgnoreCase("ja")){
|
193
|
MarkerType artNotSpecTaxMarker = getMarkerType(state, uuidArtNonSpecTaxMarkerType , "Art non spectax", "This marker is true if in the orginal data the 'Art non spectax' was 'ja'", null) ;
|
194
|
specimen.addMarker(Marker.NewInstance(artNotSpecTaxMarker, true));
|
195
|
}else{
|
196
|
logger.warn(artNotSpecTax + " is not a valid value for 'Art non spectax' (BildID: " + bildID + ")" );
|
197
|
}
|
198
|
}
|
199
|
|
200
|
if (spectaxID != null){
|
201
|
|
202
|
//add to image gallery (discuss if this is also needed if taxon is already added to type specimen
|
203
|
// Taxon taxon = taxonMap.get(String.valueOf(taxonID));
|
204
|
IZoologicalName specTaxTaxonName = specTaxNameMap.get(String.valueOf(spectaxID));
|
205
|
|
206
|
//
|
207
|
// if (taxon == null){
|
208
|
//// taxon = specTaxMap.get(String.valueOf(spectaxID));
|
209
|
//// specTaxName = specTaxMap.g
|
210
|
// }
|
211
|
// if (taxon == null){
|
212
|
// logger.warn("No taxon available for specTaxID: " + spectaxID);
|
213
|
// }else{
|
214
|
// name = CdmBase.deproxy(taxon.getName(), ZoologicalName.class);
|
215
|
// }
|
216
|
|
217
|
//TODO FIXME
|
218
|
|
219
|
if (specTaxTaxonName == null){
|
220
|
logger.warn("Name could not be found for spectaxID: " + spectaxID + " in BildID: " + bildID);
|
221
|
}else{
|
222
|
Taxon taxon = null;
|
223
|
for (Taxon specTaxTaxon: specTaxTaxonName.getTaxa()){
|
224
|
taxon = specTaxTaxon;
|
225
|
}
|
226
|
if (taxon == null){
|
227
|
//FIXME
|
228
|
Reference undefinedSec = null;
|
229
|
taxon = Taxon.NewInstance(specTaxTaxonName, undefinedSec);
|
230
|
}
|
231
|
|
232
|
DeterminationEvent.NewInstance(taxon, specimen);
|
233
|
|
234
|
}
|
235
|
|
236
|
|
237
|
|
238
|
|
239
|
// if (taxon != null){
|
240
|
// TaxonDescription taxonDescription = getTaxonDescription(taxon, true, true);
|
241
|
// if (taxonDescription.getElements().size() == 0){
|
242
|
// TextData textData = TextData.NewInstance(Feature.IMAGE());
|
243
|
// taxonDescription.addElement(textData);
|
244
|
// }
|
245
|
// Set<DescriptionElementBase> elements = taxonDescription.getElements();
|
246
|
// TextData textData = CdmBase.deproxy(elements.iterator().next(), TextData.class);
|
247
|
// for (Media media: recordMedia){
|
248
|
// textData.addMedia(media);
|
249
|
// }
|
250
|
// }
|
251
|
}
|
252
|
|
253
|
} catch (Exception e) {
|
254
|
logger.warn("Exception in Einzelbilder: bildID " + bildID + ". " + e.getMessage());
|
255
|
e.printStackTrace();
|
256
|
}
|
257
|
|
258
|
}
|
259
|
|
260
|
logger.info(pluralString + " to save: " + objectsToSave.size());
|
261
|
getMediaService().save(objectsToSave);
|
262
|
|
263
|
return success;
|
264
|
} catch (SQLException e) {
|
265
|
logger.error("SQLException:" + e);
|
266
|
return false;
|
267
|
}
|
268
|
}
|
269
|
|
270
|
private Collection getCollection(String collectionCode) {
|
271
|
//TODO
|
272
|
return null;
|
273
|
}
|
274
|
|
275
|
private String getNameFromFileOs(ResultSet rs) throws SQLException {
|
276
|
String fileOS = rs.getString("file OS");
|
277
|
Pattern pattern = Pattern.compile("(.+)(_.{4}(-.{1,3})?(_Nr\\d{3,4})?_.{2,3}\\.jpg)");
|
278
|
Matcher matcher = pattern.matcher(fileOS);
|
279
|
if (matcher.matches()){
|
280
|
String match = matcher.group(1);
|
281
|
return match;
|
282
|
}else{
|
283
|
logger.warn("FileOS does not match: " + fileOS);
|
284
|
return fileOS;
|
285
|
}
|
286
|
}
|
287
|
|
288
|
private void makeAllMedia(GlobisImportState state, ResultSet rs, Set<Media> recordMedia, Set<Media> objectsToSave) throws SQLException{
|
289
|
//make image path
|
290
|
String pathShort = rs.getString("Dateipfad_kurz");
|
291
|
String fileOS = rs.getString("file OS");
|
292
|
pathShort= pathShort.replace(fileOS, "");
|
293
|
String newPath = state.getConfig().getImageBaseUrl();
|
294
|
String path = pathShort.replace("image:Webversionen/", newPath);
|
295
|
|
296
|
Media singleMedia = makeMedia(state, rs, "file OS", "Legende 1", path, objectsToSave );
|
297
|
recordMedia.add(singleMedia);
|
298
|
singleMedia = makeMedia(state, rs, "Dateinamen02", "Legende 2", path, objectsToSave );
|
299
|
recordMedia.add(singleMedia);
|
300
|
singleMedia = makeMedia(state, rs, "Dateinamen03", "Legende 3", path, objectsToSave );
|
301
|
recordMedia.add(singleMedia);
|
302
|
singleMedia = makeMedia(state, rs, "Dateinamen04", "Legende 4", path, objectsToSave );
|
303
|
recordMedia.add(singleMedia);
|
304
|
|
305
|
}
|
306
|
|
307
|
private Media makeMedia(GlobisImportState state, ResultSet rs, String fileNameAttr, String legendAttr, String path, Set<Media> objectsToSave) throws SQLException {
|
308
|
Media media = null;
|
309
|
String fileName = rs.getString(fileNameAttr);
|
310
|
String legend = rs.getString(legendAttr);
|
311
|
Integer bildID = rs.getInt("BildID");
|
312
|
|
313
|
String uriStr = path+fileName;
|
314
|
uriStr = uriStr.replace(" ", "%20");
|
315
|
|
316
|
URI uri = URI.create(uriStr);
|
317
|
|
318
|
// Media media = ImageInfo.NewInstanceWithMetaData(uri, null);
|
319
|
|
320
|
try {
|
321
|
boolean readMediaData = state.getConfig().isDoReadMediaData();
|
322
|
if (isBlank(legend) && readMediaData){
|
323
|
if (UriUtils.isOk(UriUtils.getResponse(uri, null))){
|
324
|
logger.warn("Image exists but legend is null " + uri + ", bildID" + bildID );
|
325
|
}else{
|
326
|
return null;
|
327
|
}
|
328
|
}
|
329
|
|
330
|
media = this.getImageMedia(uri.toString(), readMediaData);
|
331
|
media.putTitle(Language.ENGLISH(), legend);
|
332
|
this.doIdCreatedUpdatedNotes(state, media, rs, bildID, IMAGE_NAMESPACE);
|
333
|
|
334
|
objectsToSave.add(media);
|
335
|
|
336
|
|
337
|
} catch (MalformedURLException e) {
|
338
|
e.printStackTrace();
|
339
|
} catch (ClientProtocolException e) {
|
340
|
e.printStackTrace();
|
341
|
} catch (IOException e) {
|
342
|
e.printStackTrace();
|
343
|
}
|
344
|
|
345
|
return media;
|
346
|
}
|
347
|
|
348
|
private String transformCopyright2CollectionCode(String copyright){
|
349
|
|
350
|
if (isBlank(copyright)){
|
351
|
return "";
|
352
|
}else if(copyright.matches("Museum f.?r Naturkunde der Humboldt-Universit.?t, Berlin")){
|
353
|
return "MFNB";
|
354
|
}else if(copyright.matches("Staatliches Museum f.?r Tierkunde Dresden")){
|
355
|
return "SMTD";
|
356
|
}else if(copyright.equals("Natural History Museum, London")){
|
357
|
return "BMNH";
|
358
|
}else if(copyright.matches("Zoologische Staatssammlung M.?nchen")){
|
359
|
return "ZSSM";
|
360
|
}else if(copyright.matches("Staatliches Museum f.?r Naturkunde Karlsruhe")){
|
361
|
return "SMNK";
|
362
|
}else if(copyright.matches("Deutsches Entomologisches Institut M.?ncheberg")){
|
363
|
return "DEIE";
|
364
|
}else if(copyright.equals("Forschungsinstitut und Naturmuseum Senckenberg")){
|
365
|
return "SMFM";
|
366
|
}else if(copyright.matches("Mus.?um National d.?Histoire Naturelle, Paris")){
|
367
|
return "MNHN";
|
368
|
}else if(copyright.equals("Naturhistorisches Museum Wien")){
|
369
|
return "NHMW";
|
370
|
}else if(copyright.equals("Naturhistoriska Riksmuseet Stockholm")){
|
371
|
return "NRMS";
|
372
|
}else if(copyright.matches("Staatliches Museum f.?r Naturkunde Stuttgart")){
|
373
|
return "SMNS";
|
374
|
}else if(copyright.equals("United States National Museum of Natural History, Washington")){
|
375
|
return "USNM";
|
376
|
}else if(copyright.matches("Zentrum f.?r Biodokumentation des Saarlandes")){
|
377
|
return "ZFBS";
|
378
|
}else if(copyright.equals("Zoological Museum, University of Copenhagen")){
|
379
|
return "ZMUC";
|
380
|
}else if(copyright.equals("Zoologisches Forschungsinstitut und Museum \"Alexander Koenig\", Bonn")){
|
381
|
return "ZFMK";
|
382
|
}else if(copyright.equals("Zoologisches Forschungsmuseum \"Alexander Koenig\", Bonn")){
|
383
|
return "ZFMK";
|
384
|
}else if(copyright.matches("Zoologisches Institut der Martin-Luther-Universit.?t Halle-Wittenberg")){
|
385
|
return "ZIUH";
|
386
|
}else if(copyright.matches("Zoologisches Institut Universit.?t T.?bingen")){
|
387
|
return "ZIUT";
|
388
|
}else{
|
389
|
logger.warn("Unknown copyright entry: " + copyright);
|
390
|
return "";
|
391
|
}
|
392
|
}
|
393
|
|
394
|
|
395
|
@Override
|
396
|
public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, GlobisImportState state) {
|
397
|
|
398
|
String nameSpace;
|
399
|
Set<String> idSet;
|
400
|
Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
|
401
|
try{
|
402
|
Set<String> currSpecIdSet = new HashSet<>();
|
403
|
Set<String> specTaxIdSet = new HashSet<>();
|
404
|
Set<String> typeIdSet = new HashSet<>();
|
405
|
|
406
|
while (rs.next()){
|
407
|
handleForeignKey(rs, currSpecIdSet, "SpecCurrspecID");
|
408
|
handleForeignKey(rs, specTaxIdSet, "spectaxID");
|
409
|
handleTypeKey(rs, typeIdSet, "spectaxID", "copyright");
|
410
|
}
|
411
|
|
412
|
//specTax map
|
413
|
nameSpace = SPEC_TAX_NAMESPACE;
|
414
|
idSet = specTaxIdSet;
|
415
|
Map<String, TaxonName> specTaxNameMap = getCommonService().getSourcedObjectsByIdInSourceC(TaxonName.class, idSet, nameSpace);
|
416
|
result.put(nameSpace, specTaxNameMap);
|
417
|
|
418
|
// //taxon map
|
419
|
// nameSpace = TAXON_NAMESPACE;
|
420
|
// idSet = currSpecIdSet;
|
421
|
// Map<String, Taxon> taxonMap = getCommonService().getSourcedObjectsByIdInSourceC(Taxon.class, idSet, nameSpace);
|
422
|
// result.put(nameSpace, taxonMap);
|
423
|
|
424
|
|
425
|
//type map
|
426
|
nameSpace = GlobisImportBase.TYPE_NAMESPACE;
|
427
|
idSet = typeIdSet;
|
428
|
Map<String, DerivedUnit> typeMap = getCommonService().getSourcedObjectsByIdInSourceC(DerivedUnit.class, idSet, nameSpace);
|
429
|
result.put(nameSpace, typeMap);
|
430
|
|
431
|
|
432
|
} catch (SQLException e) {
|
433
|
throw new RuntimeException(e);
|
434
|
}
|
435
|
return result;
|
436
|
}
|
437
|
|
438
|
private void handleTypeKey(ResultSet rs, Set<String> idSet, String specTaxIdAttr, String copyrightAttr) throws SQLException {
|
439
|
Integer specTaxId = nullSafeInt(rs, specTaxIdAttr);
|
440
|
if (specTaxId != null){
|
441
|
String copyright = rs.getString(copyrightAttr);
|
442
|
if (isNotBlank(copyright)){
|
443
|
String id = GlobisSpecTaxImport.getTypeId(specTaxId, transformCopyright2CollectionCode(copyright));
|
444
|
idSet.add(id);
|
445
|
}
|
446
|
}
|
447
|
}
|
448
|
|
449
|
@Override
|
450
|
protected boolean doCheck(GlobisImportState state){
|
451
|
IOValidator<GlobisImportState> validator = new GlobisImageImportValidator();
|
452
|
return validator.validate(state);
|
453
|
}
|
454
|
|
455
|
@Override
|
456
|
protected boolean isIgnore(GlobisImportState state){
|
457
|
return ! state.getConfig().isDoImages();
|
458
|
}
|
459
|
|
460
|
@Override
|
461
|
protected void doInvoke(GlobisImportState state) {
|
462
|
Reference refGart = ReferenceFactory.newGeneric();
|
463
|
refGart.setTitleCache("GART", true);
|
464
|
refGart.setUuid(uuidGartRef);
|
465
|
getReferenceService().saveOrUpdate(refGart);
|
466
|
super.doInvoke(state);
|
467
|
}
|
468
|
}
|