1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
|
10
|
package eu.etaxonomy.cdm.app.cyprus;
|
11
|
|
12
|
import java.io.File;
|
13
|
import java.io.IOException;
|
14
|
import java.net.MalformedURLException;
|
15
|
import java.net.URI;
|
16
|
import java.net.URISyntaxException;
|
17
|
import java.util.ArrayList;
|
18
|
import java.util.HashMap;
|
19
|
import java.util.HashSet;
|
20
|
import java.util.Iterator;
|
21
|
import java.util.List;
|
22
|
import java.util.Map;
|
23
|
import java.util.Set;
|
24
|
import java.util.regex.Matcher;
|
25
|
import java.util.regex.Pattern;
|
26
|
|
27
|
import org.apache.commons.imaging.ImageReadException;
|
28
|
import org.apache.commons.imaging.Imaging;
|
29
|
import org.apache.commons.imaging.common.GenericImageMetadata.GenericImageMetadataItem;
|
30
|
import org.apache.commons.imaging.common.ImageMetadata;
|
31
|
import org.apache.commons.imaging.common.ImageMetadata.ImageMetadataItem;
|
32
|
import org.apache.log4j.Logger;
|
33
|
import org.joda.time.DateTime;
|
34
|
import org.joda.time.format.DateTimeFormat;
|
35
|
import org.joda.time.format.DateTimeFormatter;
|
36
|
import org.springframework.transaction.TransactionStatus;
|
37
|
|
38
|
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
|
39
|
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
|
40
|
import eu.etaxonomy.cdm.app.common.CdmDestinations;
|
41
|
import eu.etaxonomy.cdm.common.UTF8;
|
42
|
import eu.etaxonomy.cdm.common.media.CdmImageInfo;
|
43
|
import eu.etaxonomy.cdm.database.DbSchemaValidation;
|
44
|
import eu.etaxonomy.cdm.database.ICdmDataSource;
|
45
|
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
|
46
|
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
|
47
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
48
|
import eu.etaxonomy.cdm.model.agent.AgentBase;
|
49
|
import eu.etaxonomy.cdm.model.agent.Institution;
|
50
|
import eu.etaxonomy.cdm.model.agent.Person;
|
51
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
52
|
import eu.etaxonomy.cdm.model.common.Language;
|
53
|
import eu.etaxonomy.cdm.model.common.TimePeriod;
|
54
|
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
|
55
|
import eu.etaxonomy.cdm.model.description.Feature;
|
56
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
57
|
import eu.etaxonomy.cdm.model.description.TextData;
|
58
|
import eu.etaxonomy.cdm.model.media.ImageFile;
|
59
|
import eu.etaxonomy.cdm.model.media.Media;
|
60
|
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
|
61
|
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
|
62
|
import eu.etaxonomy.cdm.model.media.Rights;
|
63
|
import eu.etaxonomy.cdm.model.media.RightsType;
|
64
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
65
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
66
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
67
|
|
68
|
/**
|
69
|
* Creates CDM Media from images stored in the given path.
|
70
|
*
|
71
|
* Note: Currently adapted to also change from Scaler IIF API to default Scaler API.
|
72
|
* Note2: updateMetadata() still needs to be adapted to support 3 MediaRepresentations
|
73
|
*
|
74
|
* @author a.mueller
|
75
|
* @since 05.2017
|
76
|
*/
|
77
|
public class CyprusImagesActivator {
|
78
|
private static final Logger logger = Logger.getLogger(CyprusImagesActivator.class);
|
79
|
|
80
|
static final ICdmDataSource cdmDestination = CdmDestinations.local_cyprus();
|
81
|
// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_cyprus();
|
82
|
// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_production_cyprus();
|
83
|
|
84
|
static boolean testOnly = false;
|
85
|
static boolean update_notCreate = false;
|
86
|
//if true, data will always be updated, if false, only missing data will be updated
|
87
|
static boolean forceUpdate = false;
|
88
|
|
89
|
private static final String path = "//media/digitalimages/EditWP6/Zypern/photos/";
|
90
|
private static final String oldUrlPath = "https://pictures.bgbm.org/digilib/Scaler/IIIF/Cyprus!";
|
91
|
private static final String newUrlPath = "https://pictures.bgbm.org/digilib/Scaler?fn=Cyprus/";
|
92
|
private static final String oldPostfix = "/full/full/0/default.jpg";
|
93
|
private static final String newPostfix = "&mo=file";
|
94
|
private static final String mediumPostfix ="&mo=fit&dw=400&dh=400&uvfix=1";
|
95
|
private static final String smallPostfix ="&mo=fit&dw=200&dh=200&uvfix=1";
|
96
|
|
97
|
private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper;
|
98
|
|
99
|
private void doImport(ICdmDataSource cdmDestination){
|
100
|
|
101
|
CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
|
102
|
TransactionStatus tx = app.startTransaction();
|
103
|
|
104
|
deduplicationHelper = (ImportDeduplicationHelper)ImportDeduplicationHelper.NewInstance(app);
|
105
|
|
106
|
File file = new File(path);
|
107
|
String[] fileList = file.list();
|
108
|
Set<String> notFound = new HashSet<>();
|
109
|
|
110
|
String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)";
|
111
|
Pattern pattern = Pattern.compile(regEx);
|
112
|
|
113
|
String start = "O"; //O
|
114
|
String end = "Q"; //Q
|
115
|
String startLetter = "";
|
116
|
|
117
|
for (String fileName : fileList){
|
118
|
if(fileName.compareToIgnoreCase(start) < 0 || fileName.compareToIgnoreCase(end) >= 0){
|
119
|
continue;
|
120
|
}
|
121
|
Matcher matcher = pattern.matcher(fileName);
|
122
|
if (matcher.matches() ){
|
123
|
// System.out.println(fileName);
|
124
|
if (!fileName.substring(0,3).equals(startLetter)){
|
125
|
startLetter = fileName.substring(0,3);
|
126
|
System.out.println(startLetter);
|
127
|
}
|
128
|
String taxonName = matcher.group(1);
|
129
|
taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " ");
|
130
|
Taxon taxon = getAcceptedTaxon(app, taxonName);
|
131
|
if (taxon == null){
|
132
|
if (!notFound.contains(taxonName)){
|
133
|
notFound.add(taxonName);
|
134
|
logger.warn("Taxon not found: " + taxonName);
|
135
|
}
|
136
|
}else{
|
137
|
try {
|
138
|
handleTaxon(app, taxon, fileName);
|
139
|
} catch (Exception e) {
|
140
|
logger.error("Unhandled exception ("+e.getMessage()+") when reading file " + fileName +". File not imported: ");
|
141
|
e.printStackTrace();
|
142
|
}
|
143
|
}
|
144
|
}else{
|
145
|
if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){
|
146
|
logger.warn("Incorrect filename:" + fileName);
|
147
|
}else{
|
148
|
System.out.println("Not clear yet: " + fileName);
|
149
|
}
|
150
|
}
|
151
|
}
|
152
|
|
153
|
// app.getTaxonService().saveOrUpdate(taxaToSave);
|
154
|
|
155
|
if (testOnly){
|
156
|
tx.setRollbackOnly();
|
157
|
}
|
158
|
app.commitTransaction(tx);
|
159
|
}
|
160
|
|
161
|
private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) {
|
162
|
Map<String, Media> existingUrls = getAllExistingUrls(taxon);
|
163
|
String pathToOldImage = oldUrlPath + fileName + oldPostfix;
|
164
|
|
165
|
String pathToFullImage = newUrlPath + fileName + newPostfix;
|
166
|
String pathToMediumImage = newUrlPath + fileName + mediumPostfix;
|
167
|
String pathToSmallImage = newUrlPath + fileName + smallPostfix;
|
168
|
|
169
|
if (containsAll(existingUrls, pathToFullImage, pathToMediumImage, pathToSmallImage)){
|
170
|
return;
|
171
|
}else{
|
172
|
Media media;
|
173
|
if (containsAny(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage)){
|
174
|
media = getExistingMedia(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage);
|
175
|
if (media == null){
|
176
|
return;
|
177
|
}else if (media.getAllTitles().isEmpty()){
|
178
|
media.setTitleCache(null, false);
|
179
|
media.putTitle(Language.LATIN(), fileName);
|
180
|
}
|
181
|
}else{
|
182
|
media = Media.NewInstance();
|
183
|
makeMetaData(media, fileName, false);
|
184
|
makeTitle(media, fileName, false);
|
185
|
if (!testOnly){
|
186
|
makeTextData(fileName, media, taxon);
|
187
|
}
|
188
|
}
|
189
|
fillMediaWithAllRepresentations(media, pathToFullImage, pathToMediumImage, pathToSmallImage, pathToOldImage);
|
190
|
}
|
191
|
}
|
192
|
|
193
|
private Media getExistingMedia(Map<String, Media> existingUrls, String pathToFullImage, String pathToMediumImage,
|
194
|
String pathToSmallImage) {
|
195
|
Set<Media> result = new HashSet<>();
|
196
|
for(String existingUrl : existingUrls.keySet()){
|
197
|
if (existingUrl.equals(pathToFullImage) || existingUrl.equals(pathToMediumImage) ||
|
198
|
existingUrl.equals(pathToSmallImage)){
|
199
|
result.add(existingUrls.get(existingUrl));
|
200
|
}
|
201
|
}
|
202
|
if (result.isEmpty()){
|
203
|
logger.warn("Media for existing URL not found. This should not happen.");
|
204
|
return null;
|
205
|
}else if (result.size() > 1){
|
206
|
logger.warn("Existing URLs have more than 1 Media. This should not happen.");
|
207
|
return null;
|
208
|
}else{
|
209
|
return result.iterator().next();
|
210
|
}
|
211
|
}
|
212
|
|
213
|
/**
|
214
|
* <code>true</code> if all 3 paths exist in the URL set
|
215
|
*/
|
216
|
private boolean containsAll(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage,
|
217
|
String pathToSmallImage) {
|
218
|
Set<String> existingUrls = existingUrlMap.keySet();
|
219
|
return existingUrls.contains(pathToFullImage) && existingUrls.contains(pathToMediumImage)
|
220
|
&& existingUrls.contains(pathToSmallImage);
|
221
|
}
|
222
|
|
223
|
/**
|
224
|
* <code>true</code> if any of the 3 paths exists in the URL set
|
225
|
*/
|
226
|
private boolean containsAny(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage,
|
227
|
String pathToSmallImage) {
|
228
|
Set<String> existingUrls = existingUrlMap.keySet();
|
229
|
return existingUrls.contains(pathToFullImage) || existingUrls.contains(pathToMediumImage)
|
230
|
|| existingUrls.contains(pathToSmallImage);
|
231
|
}
|
232
|
|
233
|
private void makeTitle(Media media, String fileName, boolean updateOnly) {
|
234
|
String title = fileName.replace("_s_"," subsp. ")
|
235
|
.replace("_"," ").replace(".jpg","").replace(".JPG","");
|
236
|
if ( (!updateOnly) || media.getAllTitles().isEmpty()){
|
237
|
media.putTitle(Language.LATIN(), title);
|
238
|
}
|
239
|
}
|
240
|
|
241
|
private void makeMetaData(Media media, String fileName, boolean updateOnly) {
|
242
|
|
243
|
File file = new File(path + fileName);
|
244
|
if (!file.exists()){
|
245
|
logger.warn("File for filename " + fileName + " does not exist.");
|
246
|
return;
|
247
|
}
|
248
|
|
249
|
Map<String, String> keywords = new HashMap<>();
|
250
|
String copyright = null;
|
251
|
String artistStr = null;
|
252
|
String created = null;
|
253
|
try{
|
254
|
// IImageMetadata metadata = Sanselan.getMetadata(file);
|
255
|
ImageMetadata metadata = Imaging.getMetadata(file);
|
256
|
List<? extends ImageMetadataItem> items = metadata.getItems();
|
257
|
for (ImageMetadataItem metadataItem : items){
|
258
|
// System.out.println(item.getKeyword() + ": " + item.getText());
|
259
|
if (metadataItem instanceof GenericImageMetadataItem){
|
260
|
GenericImageMetadataItem item = (GenericImageMetadataItem) metadataItem;
|
261
|
|
262
|
String keyword = item.getKeyword().toLowerCase();
|
263
|
String value =removeQuots(item.getText());
|
264
|
|
265
|
if("keywords".equals(keyword)){
|
266
|
String[] splits = value.split(":");
|
267
|
if (splits.length == 2){
|
268
|
keywords.put(splits[0].trim().toLowerCase(), splits[1].trim());
|
269
|
}else{
|
270
|
logger.warn("Keyword has not correct format and can not be parsed: " + value + " for file " + fileName);
|
271
|
}
|
272
|
}else if ("Copyright Notice".equalsIgnoreCase(keyword)){
|
273
|
copyright = value;
|
274
|
}else if ("artist".equals(keyword)){
|
275
|
artistStr = value;
|
276
|
}else if ("date time original".equalsIgnoreCase(item.getKeyword())){
|
277
|
created = value;
|
278
|
}
|
279
|
}
|
280
|
}
|
281
|
} catch (ImageReadException | IOException e1) {
|
282
|
logger.warn(" Problem (" + e1.getMessage() + ") when reading metadata from file: " + fileName);
|
283
|
e1.printStackTrace();
|
284
|
}
|
285
|
|
286
|
AgentBase<?> artistAgent = null;
|
287
|
Rights right = null;
|
288
|
DateTime createdDate = null;
|
289
|
String locality = null;
|
290
|
|
291
|
//artist
|
292
|
if (keywords.get("photographer") != null){
|
293
|
String artist = keywords.get("photographer");
|
294
|
artistAgent = makePerson(artist, fileName);
|
295
|
}
|
296
|
if (artistStr != null){
|
297
|
if (keywords.get("photographer") == null){
|
298
|
artistAgent = makePerson(artistStr, fileName);
|
299
|
}else if (!keywords.get("photographer").toLowerCase().replace(" ", "")
|
300
|
.contains(artistStr.toLowerCase().replace(" ", ""))){
|
301
|
logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName);
|
302
|
}
|
303
|
}
|
304
|
|
305
|
//locality
|
306
|
if (keywords.get("locality") != null){
|
307
|
locality = keywords.get("locality");
|
308
|
}
|
309
|
|
310
|
//copyright
|
311
|
if (copyright != null){
|
312
|
AgentBase<?> agent;
|
313
|
if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){
|
314
|
agent = Institution.NewNamedInstance(copyright);
|
315
|
}else{
|
316
|
agent = makePerson(copyright, fileName);
|
317
|
}
|
318
|
right = Rights.NewInstance(null, null, RightsType.COPYRIGHT());
|
319
|
right.setAgent(agent);
|
320
|
right = deduplicationHelper.getExistingCopyright(null, right);
|
321
|
}
|
322
|
|
323
|
//created
|
324
|
if (created != null){
|
325
|
DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
|
326
|
try {
|
327
|
createdDate = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(created);
|
328
|
} catch (Exception e) {
|
329
|
logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + created + " for file " + fileName);
|
330
|
}
|
331
|
}
|
332
|
|
333
|
boolean force = !updateOnly || forceUpdate;
|
334
|
//add to media
|
335
|
if (artistAgent != null && (force || media.getArtist() == null)){
|
336
|
media.setArtist(artistAgent);
|
337
|
}
|
338
|
if (right != null && (force || media.getRights().isEmpty())){
|
339
|
media.removeRights(right);
|
340
|
media.addRights(right);
|
341
|
}
|
342
|
if (createdDate != null && (force || media.getMediaCreated() == null)){
|
343
|
media.setMediaCreated(TimePeriod.NewInstance(createdDate));
|
344
|
}
|
345
|
if (locality != null && (force || media.getDescription(Language.ENGLISH()) == null)){
|
346
|
media.putDescription(Language.ENGLISH(), locality);
|
347
|
}
|
348
|
}
|
349
|
|
350
|
private Person makePerson(String artist, String fileName) {
|
351
|
artist = artist.trim();
|
352
|
String regEx = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
|
353
|
Matcher matcher = Pattern.compile(regEx).matcher(artist);
|
354
|
Person person = Person.NewInstance();
|
355
|
if (matcher.matches()){
|
356
|
person.setGivenName(matcher.group(1).trim());
|
357
|
person.setFamilyName(matcher.group(2).trim());
|
358
|
}else{
|
359
|
person.setTitleCache(artist, true);
|
360
|
logger.warn("Person could not be parsed: " + artist + " for file " + fileName);
|
361
|
}
|
362
|
|
363
|
person = deduplicationHelper.getExistingAuthor(null, person);
|
364
|
return person;
|
365
|
}
|
366
|
|
367
|
private String removeQuots(String text) {
|
368
|
if (text.startsWith("'") && text.endsWith("'")){
|
369
|
return text.substring(1, text.length() -1);
|
370
|
}else{
|
371
|
return text;
|
372
|
}
|
373
|
}
|
374
|
|
375
|
private void makeTextData(String fileStr, Media media, Taxon taxon) {
|
376
|
TaxonDescription imageGallery = taxon.getImageGallery(true);
|
377
|
TextData textData = null;
|
378
|
if (!imageGallery.getElements().isEmpty()){
|
379
|
DescriptionElementBase el = imageGallery.getElements().iterator().next();
|
380
|
if (el.isInstanceOf(TextData.class)){
|
381
|
textData = CdmBase.deproxy(el, TextData.class);
|
382
|
}else{
|
383
|
logger.warn("Image gallery had non-textdata description element: " + fileStr);
|
384
|
}
|
385
|
}
|
386
|
if (textData == null){
|
387
|
textData = TextData.NewInstance();
|
388
|
textData.setFeature(Feature.IMAGE());
|
389
|
}
|
390
|
imageGallery.addElement(textData);
|
391
|
textData.addMedia(media);
|
392
|
}
|
393
|
|
394
|
private void fillMediaWithAllRepresentations(Media media, String fullPath, String mediumPath, String smallPath, String oldFullPath){
|
395
|
Set<MediaRepresentation> existingRepresentations = new HashSet<>(media.getRepresentations());
|
396
|
makeMediaRepresentation(oldFullPath, media, existingRepresentations, fullPath);
|
397
|
makeMediaRepresentation(mediumPath, media, existingRepresentations, null);
|
398
|
makeMediaRepresentation(smallPath, media, existingRepresentations, null);
|
399
|
if(!existingRepresentations.isEmpty()){
|
400
|
logger.warn("Media contains existing representations which are not contained in the 3 paths: " + media.getTitleCache());
|
401
|
}
|
402
|
}
|
403
|
|
404
|
private void makeMediaRepresentation(String uriString, Media media,
|
405
|
Set<MediaRepresentation> existingRepresentations, String replaceUri) {
|
406
|
MediaRepresentation existingMediaRep = getExistingMediaRepresentation(uriString, existingRepresentations);
|
407
|
boolean readMediaData = true;
|
408
|
MediaRepresentation newMediaRep = makeMediaRepresentation(replaceUri != null? replaceUri : uriString, readMediaData);
|
409
|
if (existingMediaRep == null){
|
410
|
media.addRepresentation(newMediaRep);
|
411
|
}else{
|
412
|
existingRepresentations.remove(existingMediaRep);
|
413
|
mergeToExistingRepresentation(existingMediaRep, newMediaRep);
|
414
|
}
|
415
|
}
|
416
|
|
417
|
private void mergeToExistingRepresentation(MediaRepresentation existingMediaRep, MediaRepresentation newMediaRep) {
|
418
|
existingMediaRep.setMimeType(newMediaRep.getMimeType());
|
419
|
existingMediaRep.setSuffix(newMediaRep.getSuffix());
|
420
|
if(!existingMediaRep.getParts().isEmpty() && !newMediaRep.getParts().isEmpty()){
|
421
|
MediaRepresentationPart existingPart = existingMediaRep.getParts().iterator().next();
|
422
|
ImageFile newPart = (ImageFile)newMediaRep.getParts().iterator().next();
|
423
|
if(existingPart.isInstanceOf(ImageFile.class)){
|
424
|
ImageFile existingImage = CdmBase.deproxy(existingPart, ImageFile.class);
|
425
|
existingImage.setHeight(newPart.getHeight());
|
426
|
existingImage.setWidth(newPart.getWidth());
|
427
|
}else{
|
428
|
logger.warn("MediaRepresentationPart was not of type ImageFile. Height and width not merged: " + existingPart.getUri());
|
429
|
}
|
430
|
existingPart.setSize(newPart.getSize());
|
431
|
existingPart.setUri(newPart.getUri());
|
432
|
}
|
433
|
}
|
434
|
|
435
|
private MediaRepresentation getExistingMediaRepresentation(String uriString,
|
436
|
Set<MediaRepresentation> existingRepresentations) {
|
437
|
for (MediaRepresentation rep : existingRepresentations){
|
438
|
for (MediaRepresentationPart part : rep.getParts()){
|
439
|
if (part.getUri() != null && part.getUri().toString().equals(uriString)){
|
440
|
return rep;
|
441
|
}
|
442
|
}
|
443
|
}
|
444
|
return null;
|
445
|
}
|
446
|
|
447
|
/**
|
448
|
* Creates
|
449
|
* @see #READ_MEDIA_DATA
|
450
|
* @return
|
451
|
* @throws MalformedURLException
|
452
|
*/
|
453
|
protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException {
|
454
|
if( uriString == null){
|
455
|
return null;
|
456
|
} else {
|
457
|
uriString = uriString.replace(" ", "%20"); //replace whitespace
|
458
|
try {
|
459
|
MediaRepresentation representation = makeMediaRepresentation(uriString, readMediaData);
|
460
|
Media media = Media.NewInstance();
|
461
|
media.addRepresentation(representation);
|
462
|
|
463
|
if (uriStrThumb != null){
|
464
|
CdmImageInfo imageInfoThumb = null;
|
465
|
uriStrThumb = uriStrThumb.replace(" ", "%20"); //replace whitespace
|
466
|
URI uriThumb = new URI(uriStrThumb);
|
467
|
try {
|
468
|
if (readMediaData){
|
469
|
logger.info("Read media data from: " + uriThumb);
|
470
|
imageInfoThumb = CdmImageInfo.NewInstance(uriThumb, 0);
|
471
|
}
|
472
|
} catch (Exception e) {
|
473
|
String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " + e.getMessage();
|
474
|
logger.warn(message);
|
475
|
}
|
476
|
|
477
|
ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb);
|
478
|
MediaRepresentation reprThumb = MediaRepresentation.NewInstance();
|
479
|
if(imageInfoThumb != null){
|
480
|
reprThumb.setMimeType(imageInfoThumb.getMimeType());
|
481
|
reprThumb.setSuffix(imageInfoThumb.getSuffix());
|
482
|
}
|
483
|
reprThumb.addRepresentationPart(imageFileFhumb);
|
484
|
media.addRepresentation(reprThumb);
|
485
|
}
|
486
|
|
487
|
return media;
|
488
|
} catch (URISyntaxException e1) {
|
489
|
String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " + uriString;
|
490
|
logger.warn(message);
|
491
|
return null;
|
492
|
}
|
493
|
}
|
494
|
}
|
495
|
|
496
|
private MediaRepresentation makeMediaRepresentation(String uriString, boolean readMediaData) {
|
497
|
|
498
|
uriString = uriString.replace(" ", "%20"); //replace whitespace
|
499
|
CdmImageInfo imageInfo = null;
|
500
|
URI uri;
|
501
|
try {
|
502
|
uri = new URI(uriString);
|
503
|
} catch (URISyntaxException e1) {
|
504
|
logger.error("Malformed URI. Could not create media representation: " + uriString);
|
505
|
return null;
|
506
|
}
|
507
|
try {
|
508
|
if (readMediaData){
|
509
|
logger.info("Read media data from: " + uri);
|
510
|
imageInfo = CdmImageInfo.NewInstance(uri, 0);
|
511
|
}
|
512
|
} catch (Exception e) {
|
513
|
try {
|
514
|
//try again
|
515
|
imageInfo = CdmImageInfo.NewInstance(uri, 0);
|
516
|
} catch (Exception e1) {
|
517
|
String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " + e1.getMessage();
|
518
|
e1.printStackTrace();
|
519
|
logger.warn(message);
|
520
|
}
|
521
|
}
|
522
|
ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
|
523
|
|
524
|
MediaRepresentation representation = MediaRepresentation.NewInstance();
|
525
|
|
526
|
if(imageInfo != null){
|
527
|
representation.setMimeType(imageInfo.getMimeType());
|
528
|
representation.setSuffix(imageInfo.getSuffix());
|
529
|
}
|
530
|
representation.addRepresentationPart(imageFile);
|
531
|
return representation;
|
532
|
}
|
533
|
|
534
|
private Map<String, Media> getAllExistingUrls(Taxon taxon) {
|
535
|
Map<String, Media> result = new HashMap<>();
|
536
|
Set<TaxonDescription> descriptions = taxon.getDescriptions();
|
537
|
for (TaxonDescription td : descriptions){
|
538
|
if (td.isImageGallery()){
|
539
|
for (DescriptionElementBase deb : td.getElements()){
|
540
|
if (deb.isInstanceOf(TextData.class)){
|
541
|
TextData textData = CdmBase.deproxy(deb, TextData.class);
|
542
|
for (Media media :textData.getMedia()){
|
543
|
for (MediaRepresentation rep : media.getRepresentations()){
|
544
|
for (MediaRepresentationPart part : rep.getParts()){
|
545
|
URI uri = part.getUri();
|
546
|
if (uri != null){
|
547
|
String uriStr = uri.toString();
|
548
|
result.put(uriStr, media);
|
549
|
}
|
550
|
}
|
551
|
}
|
552
|
}
|
553
|
}
|
554
|
}
|
555
|
}
|
556
|
}
|
557
|
return result;
|
558
|
}
|
559
|
|
560
|
private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) {
|
561
|
|
562
|
MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
|
563
|
taxonNameStr = adaptName(taxonNameStr);
|
564
|
config.setTaxonNameTitle(taxonNameStr);
|
565
|
config.setIncludeSynonyms(false);
|
566
|
List<TaxonBase> list = app.getTaxonService().findTaxaByName(config);
|
567
|
if (list.isEmpty()){
|
568
|
// logger.warn("Taxon not found for media: " + taxonNameStr);
|
569
|
taxonNameStr = taxonNameStr.replaceFirst(" ", " " + UTF8.HYBRID.toString());
|
570
|
config.setTaxonNameTitle(taxonNameStr);
|
571
|
list = app.getTaxonService().findTaxaByName(config);
|
572
|
if (list.isEmpty()){
|
573
|
return null;
|
574
|
}else if (list.size() > 1){
|
575
|
logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr);
|
576
|
}
|
577
|
}
|
578
|
if (list.size()>1){
|
579
|
Iterator<TaxonBase> it = list.iterator();
|
580
|
while (it.hasNext()){
|
581
|
Taxon next = (Taxon)it.next();
|
582
|
if (next.getTaxonNodes().isEmpty() && !next.getTaxaForMisappliedName(true).isEmpty()){
|
583
|
it.remove();
|
584
|
}
|
585
|
}
|
586
|
if (list.size()>1){
|
587
|
logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node.");
|
588
|
it = list.iterator();
|
589
|
while (it.hasNext()){
|
590
|
Taxon next = (Taxon)it.next();
|
591
|
if (next.getTaxonNodes().isEmpty()){
|
592
|
it.remove();
|
593
|
}
|
594
|
}
|
595
|
if (list.size()>1){
|
596
|
logger.warn("Still more than 1 taxon found for media: " + taxonNameStr);
|
597
|
}else if (list.size() < 1){
|
598
|
logger.warn("After removing nodeless taxa no taxon was left: " + taxonNameStr);
|
599
|
return null;
|
600
|
}
|
601
|
}else if (list.size() < 1){
|
602
|
logger.warn("After removing misapplications no taxon was left: " + taxonNameStr);
|
603
|
return null;
|
604
|
}
|
605
|
}
|
606
|
TaxonBase<?> taxonBase = list.get(0);
|
607
|
Taxon result;
|
608
|
if (taxonBase.isInstanceOf(Synonym.class)){
|
609
|
result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
|
610
|
}else{
|
611
|
result = CdmBase.deproxy(taxonBase, Taxon.class);
|
612
|
}
|
613
|
return result;
|
614
|
}
|
615
|
|
616
|
private String adaptName(String taxonNameStr) {
|
617
|
// if (taxonNameStr.equals("Hypericum cerastoides")){
|
618
|
// taxonNameStr = "Hypericum cerastioides";
|
619
|
// }
|
620
|
return taxonNameStr;
|
621
|
}
|
622
|
|
623
|
private void test(){
|
624
|
File f = new File(path);
|
625
|
String[] list = f.list();
|
626
|
List<String> fullFileNames = new ArrayList<>();
|
627
|
for (String fileName : list){
|
628
|
fullFileNames.add(path + fileName);
|
629
|
if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){
|
630
|
System.out.println(fileName);
|
631
|
}
|
632
|
}
|
633
|
}
|
634
|
|
635
|
private void updateMetadata(ICdmDataSource cdmDestination){
|
636
|
CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
|
637
|
TransactionStatus tx = app.startTransaction();
|
638
|
|
639
|
deduplicationHelper = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewInstance(app);
|
640
|
|
641
|
List<Media> list = app.getMediaService().list(Media.class, null, null, null, null);
|
642
|
for (Media media : list){
|
643
|
String fileName = getUrlStringForMedia(media);
|
644
|
if (fileName.startsWith(newUrlPath)){
|
645
|
//TODO not yet adapted to new image server URLs
|
646
|
fileName = fileName.replace(newUrlPath, "");
|
647
|
if (fileName.equals("Acinos_exiguus_C1.jpg")){ //for debugging only
|
648
|
// System.out.println(fileName);
|
649
|
makeMetaData(media, fileName, true);
|
650
|
makeTitle(media, fileName, true);
|
651
|
}
|
652
|
}else{
|
653
|
logger.warn("Filename does not start with standard url path: " + fileName);
|
654
|
}
|
655
|
}
|
656
|
|
657
|
if (testOnly){
|
658
|
tx.setRollbackOnly();
|
659
|
}
|
660
|
app.commitTransaction(tx);
|
661
|
}
|
662
|
|
663
|
private String getUrlStringForMedia(Media media) {
|
664
|
String result = null;
|
665
|
for (MediaRepresentation rep : media.getRepresentations()){
|
666
|
for (MediaRepresentationPart part : rep.getParts()){
|
667
|
URI uri = part.getUri();
|
668
|
if (uri != null){
|
669
|
if (result != null){
|
670
|
//TODO this still needs to be adapted to the 3 representations of media
|
671
|
logger.warn("More than 1 uri exists for media "+ media.getId());
|
672
|
}else{
|
673
|
result = uri.toString();
|
674
|
}
|
675
|
}
|
676
|
}
|
677
|
}
|
678
|
return result;
|
679
|
}
|
680
|
|
681
|
public static void main(String[] args) {
|
682
|
CyprusImagesActivator me = new CyprusImagesActivator();
|
683
|
if (update_notCreate){
|
684
|
me.updateMetadata(cdmDestination);
|
685
|
}else{
|
686
|
me.doImport(cdmDestination);
|
687
|
}
|
688
|
// me.test();
|
689
|
System.exit(0);
|
690
|
}
|
691
|
}
|