Project

General

Profile

« Previous | Next » 

Revision 2f9e52e3

Added by Andreas Müller over 2 years ago

Add CichorieaImageUpdateActivator

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/app/wp6/cichorieae/CichorieaImageUpdateActivator.java
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.app.wp6.cichorieae;
10

  
11
import java.io.File;
12
import java.net.MalformedURLException;
13
import java.net.URISyntaxException;
14
import java.util.ArrayList;
15
import java.util.HashMap;
16
import java.util.HashSet;
17
import java.util.Iterator;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.Set;
21
import java.util.regex.Matcher;
22
import java.util.regex.Pattern;
23

  
24
import org.apache.log4j.Logger;
25
import org.joda.time.DateTime;
26
import org.joda.time.format.DateTimeFormat;
27
import org.joda.time.format.DateTimeFormatter;
28
import org.springframework.transaction.TransactionStatus;
29

  
30
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
31
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
32
import eu.etaxonomy.cdm.api.service.media.MediaInfoFactory;
33
import eu.etaxonomy.cdm.api.service.media.MediaInfoFileReader;
34
import eu.etaxonomy.cdm.app.common.CdmDestinations;
35
import eu.etaxonomy.cdm.common.URI;
36
import eu.etaxonomy.cdm.common.UTF8;
37
import eu.etaxonomy.cdm.common.media.CdmImageInfo;
38
import eu.etaxonomy.cdm.database.DbSchemaValidation;
39
import eu.etaxonomy.cdm.database.ICdmDataSource;
40
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
41
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
42
import eu.etaxonomy.cdm.model.agent.AgentBase;
43
import eu.etaxonomy.cdm.model.agent.Institution;
44
import eu.etaxonomy.cdm.model.agent.Person;
45
import eu.etaxonomy.cdm.model.common.CdmBase;
46
import eu.etaxonomy.cdm.model.common.Language;
47
import eu.etaxonomy.cdm.model.common.TimePeriod;
48
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
49
import eu.etaxonomy.cdm.model.description.Feature;
50
import eu.etaxonomy.cdm.model.description.TaxonDescription;
51
import eu.etaxonomy.cdm.model.description.TextData;
52
import eu.etaxonomy.cdm.model.media.ImageFile;
53
import eu.etaxonomy.cdm.model.media.Media;
54
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
55
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
56
import eu.etaxonomy.cdm.model.media.Rights;
57
import eu.etaxonomy.cdm.model.media.RightsType;
58
import eu.etaxonomy.cdm.model.taxon.Synonym;
59
import eu.etaxonomy.cdm.model.taxon.Taxon;
60
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
61

  
62
/**
63
 * TODO copied from cyprus
64
 *
65
 * Creates CDM Media from images stored in the given path.
66
 *
67
 * Note: Currently adapted to also change from Scaler IIF API to default Scaler API.
68
 * Note2: updateMetadata() still needs to be adapted to support 3 MediaRepresentations
69
 *
70
 * @author a.mueller
71
 * @since 05.2017
72
 */
73
public class CichorieaImageUpdateActivator {
74

  
75
	private static final Logger logger = Logger.getLogger(CichorieaImageUpdateActivator.class);
76

  
77
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_local_cichorieae();
78
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_cyprus();
79
	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_production_cichorieae();
80

  
81
	static boolean testOnly = false;
82
	static boolean update_notCreate = true;
83
	//if true, data will always be updated, if false, only missing data will be updated
84
	static boolean forceUpdate = true;
85
	static boolean sizeOnly = true;
86

  
87
    private static final String path = "//media/digitalimages/EditWP6/Cichorieae/photos/";
88
    private static final String oldUrlPath = "https://pictures.bgbm.org/digilib/Scaler/IIIF/Cichorieae!";
89
    private static final String newUrlPath = "https://pictures.bgbm.org/digilib/Scaler?fn=Cichorieae/";
90
    private static final String oldPostfix = "/full/full/0/default.jpg";
91
    private static final String newPostfix = "&mo=file";
92
    private static final String mediumPostfix ="&mo=fit&dw=400&dh=400&uvfix=1";
93
    private static final String smallPostfix ="&mo=fit&dw=200&dh=200&uvfix=1";
94

  
95
    private ImportDeduplicationHelper deduplicationHelper;
96

  
97
    private void doImport(ICdmDataSource cdmDestination){
98

  
99
		CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
100
		TransactionStatus tx = app.startTransaction();
101

  
102
		deduplicationHelper = ImportDeduplicationHelper.NewInstance(app, null);
103

  
104
        File file = new File(path);
105
        String[] fileList = file.list();
106
        Set<String> notFound = new HashSet<>();
107

  
108
        String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)";
109
        Pattern pattern = Pattern.compile(regEx);
110

  
111
        String start = "O";  //O
112
        String end = "Q";      //Q
113
        String startLetter = "";
114

  
115
        for (String fileName : fileList){
116
            if(fileName.compareToIgnoreCase(start) < 0 || fileName.compareToIgnoreCase(end) >= 0){
117
                continue;
118
            }
119
            Matcher matcher = pattern.matcher(fileName);
120
            if (matcher.matches() ){
121
//                System.out.println(fileName);
122
                if (!fileName.substring(0,3).equals(startLetter)){
123
                    startLetter = fileName.substring(0,3);
124
                    System.out.println(startLetter);
125
                }
126
                String taxonName = matcher.group(1);
127
                taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " ");
128
                Taxon taxon = getAcceptedTaxon(app, taxonName);
129
                if (taxon == null){
130
                    if (!notFound.contains(taxonName)){
131
                        notFound.add(taxonName);
132
                        logger.warn("Taxon not found: " + taxonName);
133
                    }
134
                }else{
135
                    try {
136
                        handleTaxon(app, taxon, fileName);
137
                    } catch (Exception e) {
138
                        logger.error("Unhandled exception ("+e.getMessage()+") when reading file " + fileName +". File not imported: ");
139
                        e.printStackTrace();
140
                    }
141
                }
142
            }else{
143
                if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){
144
                    logger.warn("Incorrect filename:" + fileName);
145
                }else{
146
                    System.out.println("Not clear yet: " + fileName);
147
                }
148
            }
149
        }
150

  
151
//		app.getTaxonService().saveOrUpdate(taxaToSave);
152

  
153
		if (testOnly){
154
		    tx.setRollbackOnly();
155
		}
156
		app.commitTransaction(tx);
157
	}
158

  
159
    private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) {
160
        Map<String, Media> existingUrls = getAllExistingUrls(taxon);
161
        String pathToOldImage = oldUrlPath + fileName + oldPostfix;
162

  
163
        String pathToFullImage = newUrlPath + fileName + newPostfix;
164
        String pathToMediumImage = newUrlPath + fileName + mediumPostfix;
165
        String pathToSmallImage = newUrlPath + fileName + smallPostfix;
166

  
167
        if (containsAll(existingUrls, pathToFullImage, pathToMediumImage, pathToSmallImage)){
168
            return;
169
        }else{
170
            Media media;
171
            if (containsAny(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage)){
172
                media = getExistingMedia(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage);
173
                if (media == null){
174
                    return;
175
                }else if (media.getAllTitles().isEmpty()){
176
                    media.setTitleCache(null, false);
177
                    media.putTitle(Language.LATIN(), fileName);
178
                }
179
            }else{
180
                media = Media.NewInstance();
181
                makeMetaData(media, fileName, null, false, sizeOnly);
182

  
183
                makeTitle(media, fileName, false);
184
                if (!testOnly){
185
                    makeTextData(fileName, media, taxon);
186
                }
187
            }
188
            fillMediaWithAllRepresentations(media, pathToFullImage, pathToMediumImage, pathToSmallImage, pathToOldImage);
189
        }
190
    }
191

  
192
    private Media getExistingMedia(Map<String, Media> existingUrls, String pathToFullImage, String pathToMediumImage,
193
            String pathToSmallImage) {
194
        Set<Media> result = new HashSet<>();
195
        for(String existingUrl : existingUrls.keySet()){
196
            if (existingUrl.equals(pathToFullImage) || existingUrl.equals(pathToMediumImage) ||
197
                    existingUrl.equals(pathToSmallImage)){
198
                result.add(existingUrls.get(existingUrl));
199
            }
200
        }
201
        if (result.isEmpty()){
202
            logger.warn("Media for existing URL not found. This should not happen.");
203
            return null;
204
        }else if (result.size() > 1){
205
            logger.warn("Existing URLs have more than 1 Media. This should not happen.");
206
            return null;
207
        }else{
208
            return result.iterator().next();
209
        }
210
    }
211

  
212
    /**
213
     * <code>true</code> if all 3 paths exist in the URL set
214
     */
215
    private boolean containsAll(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage,
216
            String pathToSmallImage) {
217
        Set<String> existingUrls = existingUrlMap.keySet();
218
        return existingUrls.contains(pathToFullImage) && existingUrls.contains(pathToMediumImage)
219
                && existingUrls.contains(pathToSmallImage);
220
    }
221

  
222
    /**
223
     * <code>true</code> if any of the 3 paths exists in the URL set
224
     */
225
    private boolean containsAny(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage,
226
            String pathToSmallImage) {
227
        Set<String> existingUrls = existingUrlMap.keySet();
228
        return existingUrls.contains(pathToFullImage) || existingUrls.contains(pathToMediumImage)
229
                || existingUrls.contains(pathToSmallImage);
230
    }
231

  
232
    private void makeTitle(Media media, String fileName, boolean updateOnly) {
233
        String title = fileName.replace("_s_"," subsp. ")
234
                .replace("_"," ").replace(".jpg","").replace(".JPG","");
235
        if ( (!updateOnly) || media.getAllTitles().isEmpty()){
236
            media.putTitle(Language.LATIN(), title);
237
        }
238
    }
239

  
240
    private void makeMetaData(Media media, String fileName, ImageFile part, boolean updateOnly, boolean sizeOnly) {
241

  
242
        URI uri = part.getUri();
243
        Map<String, String> keywords = new HashMap<>();
244
        String copyright = null;
245
        String artistStr = null;
246
        String created = null;
247
        try{
248
            MediaInfoFactory mediaFactory = new MediaInfoFactory();
249
            CdmImageInfo imageInfo;
250
            try {
251
                imageInfo = mediaFactory.cdmImageInfo(uri, !sizeOnly);
252
            } catch (Exception e) {
253
                URI lowerCaseUri = URI.create(uri.toString().replace(".JPG", ".jpg"));
254
                try {
255
                    imageInfo = mediaFactory.cdmImageInfo(lowerCaseUri, !sizeOnly);
256
                    part.setUri(lowerCaseUri);  //if no error arises we expect this to be the better URI
257
                } catch (Exception e1) {
258
                    logger.error("Metadata not readable: " + uri.toString());
259
                    return;
260
                }
261
            }
262

  
263
            //size
264
            makeSize(part, imageInfo);
265
            if (sizeOnly){
266
                return;
267
            }
268

  
269
            //additional metadata
270
            for (String metaDataKey : imageInfo.getMetaData().keySet()){
271
                String value = imageInfo.getMetaData().get(metaDataKey);
272
//                System.out.println(metaDataKey +  ":    " + value);
273
                value = removeQuots(value); //not sure if still necessary
274
                if ("Copyright Notice".equalsIgnoreCase(metaDataKey)){
275
                    copyright = value;
276
                }else if ("artist".equals(metaDataKey)){
277
                    artistStr = value;
278
                }else if ("DateTimeOriginal".equalsIgnoreCase(metaDataKey)){  //TODO seems not to exist anymore
279
                    created = value;
280
                }else{
281
                    keywords.put(metaDataKey.trim().toLowerCase(), value);
282
                }
283
            }
284
        } catch (Exception e1) {
285
            logger.warn("       Problem (" + e1.getMessage() + ") when reading metadata from uri: " + part);
286
            e1.printStackTrace();
287
            return;
288
        }
289

  
290
        AgentBase<?> artistAgent = null;
291
        Rights right = null;
292
        DateTime createdDate = null;
293
        String locality = null;
294

  
295
        //artist
296
        if (keywords.get("photographer") != null){
297
            String artist = keywords.get("photographer");
298
            artistAgent = getOrCreatePerson(artist, fileName);
299
        }
300
        if (artistStr != null){
301
            if (keywords.get("photographer") == null){
302
                artistAgent = getOrCreatePerson(artistStr, fileName);
303
            }else if (!keywords.get("photographer").toLowerCase().replace(" ", "")
304
                    .contains(artistStr.toLowerCase().replace(" ", ""))){
305
                logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName);
306
            }
307
        }
308

  
309
        //locality
310
        if (keywords.get("locality") != null){
311
            locality = keywords.get("locality");
312
        }
313

  
314
        //copyright
315
        if (copyright != null){
316
            AgentBase<?> agent;
317
            if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){
318
                agent = Institution.NewNamedInstance(copyright);
319
            }else{
320
                agent = getOrCreatePerson(copyright, fileName);
321
            }
322
            right = Rights.NewInstance(null, null, RightsType.COPYRIGHT());
323
            right.setAgent(agent);
324
            right = deduplicationHelper.getExistingCopyright(right);
325
        }
326

  
327
        //created
328
        if (created != null){
329
            DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
330
            try {
331
                createdDate = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(created);
332
            } catch (Exception e) {
333
                logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + created + " for file " + fileName);
334
            }
335
        }
336

  
337
        boolean force = !updateOnly || forceUpdate;
338
        //add to media
339
        if (artistAgent != null && (force || media.getArtist() == null)){
340
            media.setArtist(artistAgent);
341
        }
342
        if (right != null && (force || media.getRights().isEmpty())){
343
            media.removeRights(right);
344
            media.addRights(right);
345
        }
346
        if (createdDate != null && (force || media.getMediaCreated() == null)){
347
            media.setMediaCreated(TimePeriod.NewInstance(createdDate));
348
        }
349
        if (locality != null && (force || media.getDescription(Language.ENGLISH()) == null)){
350
            media.putDescription(Language.ENGLISH(), locality);
351
        }
352
    }
353

  
354
    private void makeSize(ImageFile part, CdmImageInfo imageInfo) {
355
        //h
356
        Integer height = part.getHeight();
357
        if (height == null || height != imageInfo.getHeight()){
358
            part.setHeight(imageInfo.getHeight());
359
        }
360
        //w
361
        Integer width = part.getWidth();
362
        if (width == null || width != imageInfo.getWidth()){
363
            part.setWidth(imageInfo.getWidth());
364
        }
365
        //s
366
        Integer size = part.getSize();
367
        if(size == null || size != imageInfo.getLength()){
368
            part.setSize((int)imageInfo.getLength());
369
        }
370
    }
371

  
372
    private Person getOrCreatePerson(String artist, String fileName) {
373
        artist = artist.trim();
374
        String regEx = "((?:[A-Z](?:\\.|[a-z\\-\u00E4\u00F6\u00FC]+) ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
375
        Matcher matcher = Pattern.compile(regEx).matcher(artist);
376
        Person person = Person.NewInstance();
377
        if (matcher.matches()){
378
            person.setGivenName(matcher.group(1).trim());
379
            person.setFamilyName(matcher.group(2).trim());
380
        }else{
381
            person.setTitleCache(artist, true);
382
            logger.warn("Person could not be parsed: " + artist + " for file " + fileName);
383
        }
384

  
385
        person = deduplicationHelper.getExistingAuthor(person);
386
        return person;
387
    }
388

  
389
    private String removeQuots(String text) {
390
        if (text.startsWith("'") && text.endsWith("'")){
391
            return text.substring(1, text.length() -1);
392
        }else{
393
            return text;
394
        }
395
    }
396

  
397
    private void makeTextData(String fileStr, Media media, Taxon taxon) {
398
        TaxonDescription imageGallery = taxon.getImageGallery(true);
399
        TextData textData = null;
400
        if (!imageGallery.getElements().isEmpty()){
401
            DescriptionElementBase el = imageGallery.getElements().iterator().next();
402
            if (el.isInstanceOf(TextData.class)){
403
                textData = CdmBase.deproxy(el, TextData.class);
404
            }else{
405
                logger.warn("Image gallery had non-textdata description element: " +  fileStr);
406
            }
407
        }
408
        if (textData == null){
409
            textData = TextData.NewInstance();
410
            textData.setFeature(Feature.IMAGE());
411
        }
412
        imageGallery.addElement(textData);
413
        textData.addMedia(media);
414
    }
415

  
416
    private void fillMediaWithAllRepresentations(Media media, String fullPath, String mediumPath, String smallPath, String oldFullPath){
417
        Set<MediaRepresentation> existingRepresentations = new HashSet<>(media.getRepresentations());
418
        makeMediaRepresentation(oldFullPath, media, existingRepresentations, fullPath);
419
        makeMediaRepresentation(mediumPath, media, existingRepresentations, null);
420
        makeMediaRepresentation(smallPath, media, existingRepresentations, null);
421
        if(!existingRepresentations.isEmpty()){
422
            logger.warn("Media contains existing representations which are not contained in the 3 paths: " + media.getTitleCache());
423
        }
424
    }
425

  
426
    private void makeMediaRepresentation(String uriString, Media media,
427
            Set<MediaRepresentation> existingRepresentations, String replaceUri) {
428
        MediaRepresentation existingMediaRep = getExistingMediaRepresentation(uriString, existingRepresentations);
429
        boolean readMediaData = true;
430
        MediaRepresentation newMediaRep = makeMediaRepresentation(replaceUri != null? replaceUri : uriString, readMediaData);
431
        if (existingMediaRep == null){
432
            media.addRepresentation(newMediaRep);
433
        }else{
434
            existingRepresentations.remove(existingMediaRep);
435
            mergeToExistingRepresentation(existingMediaRep, newMediaRep);
436
        }
437
    }
438

  
439
    private void mergeToExistingRepresentation(MediaRepresentation existingMediaRep, MediaRepresentation newMediaRep) {
440
        existingMediaRep.setMimeType(newMediaRep.getMimeType());
441
        existingMediaRep.setSuffix(newMediaRep.getSuffix());
442
        if(!existingMediaRep.getParts().isEmpty() && !newMediaRep.getParts().isEmpty()){
443
            MediaRepresentationPart existingPart = existingMediaRep.getParts().iterator().next();
444
            ImageFile newPart = (ImageFile)newMediaRep.getParts().iterator().next();
445
            if(existingPart.isInstanceOf(ImageFile.class)){
446
                ImageFile existingImage = CdmBase.deproxy(existingPart, ImageFile.class);
447
                existingImage.setHeight(newPart.getHeight());
448
                existingImage.setWidth(newPart.getWidth());
449
            }else{
450
                logger.warn("MediaRepresentationPart was not of type ImageFile. Height and width not merged: " + existingPart.getUri());
451
            }
452
            existingPart.setSize(newPart.getSize());
453
            existingPart.setUri(newPart.getUri());
454
        }
455
    }
456

  
457
    private MediaRepresentation getExistingMediaRepresentation(String uriString,
458
            Set<MediaRepresentation> existingRepresentations) {
459
        for (MediaRepresentation rep : existingRepresentations){
460
            for (MediaRepresentationPart part : rep.getParts()){
461
                if (part.getUri() != null && part.getUri().toString().equals(uriString)){
462
                    return rep;
463
                }
464
            }
465
        }
466
        return null;
467
    }
468

  
469
    /**
470
     * Creates
471
     * @see #READ_MEDIA_DATA
472
     * @return
473
     * @throws MalformedURLException
474
     */
475
    protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException {
476
        if( uriString == null){
477
            return null;
478
        } else {
479
            uriString = uriString.replace(" ", "%20");  //replace whitespace
480
            try {
481
                MediaRepresentation representation = makeMediaRepresentation(uriString, readMediaData);
482
                Media media = Media.NewInstance();
483
                media.addRepresentation(representation);
484

  
485
                if (uriStrThumb != null){
486
                    CdmImageInfo imageInfoThumb = null;
487
                    uriStrThumb = uriStrThumb.replace(" ", "%20");  //replace whitespace
488
                    URI uriThumb = new URI(uriStrThumb);
489
                    try {
490
                        if (readMediaData){
491
                            logger.info("Read media data from: " + uriThumb);
492
//                          //imageInfoThumb = CdmImageInfo.NewInstance(uriThumb, 0);
493
                            imageInfoThumb = MediaInfoFileReader.legacyFactoryMethod(uriThumb)
494
                                    .readBaseInfo()
495
                                    .getCdmImageInfo();
496
                        }
497
                    } catch (Exception e) {
498
                        String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " +  e.getMessage();
499
                        logger.warn(message);
500
                    }
501

  
502
                    ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb);
503
                    MediaRepresentation reprThumb = MediaRepresentation.NewInstance();
504
                    if(imageInfoThumb != null){
505
                        reprThumb.setMimeType(imageInfoThumb.getMimeType());
506
                        reprThumb.setSuffix(imageInfoThumb.getSuffix());
507
                    }
508
                    reprThumb.addRepresentationPart(imageFileFhumb);
509
                    media.addRepresentation(reprThumb);
510
                }
511

  
512
                return media;
513
            } catch (URISyntaxException e1) {
514
                String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " +  uriString;
515
                logger.warn(message);
516
                return null;
517
            }
518
        }
519
    }
520

  
521
    private MediaRepresentation makeMediaRepresentation(String uriString, boolean readMediaData) {
522

  
523
        uriString = uriString.replace(" ", "%20");  //replace whitespace
524
        CdmImageInfo imageInfo = null;
525
        URI uri;
526
        try {
527
            uri = new URI(uriString);
528
        } catch (URISyntaxException e1) {
529
            logger.error("Malformed URI. Could not create media representation: " + uriString);
530
            return null;
531
        }
532
        try {
533
            if (readMediaData){
534
                logger.info("Read media data from: " + uri);
535
                //imageInfo = CdmImageInfo.NewInstance(uri, 0);
536
                imageInfo = MediaInfoFileReader.legacyFactoryMethod(uri)
537
                        .readBaseInfo()
538
                        .getCdmImageInfo();
539
            }
540
        } catch (Exception e) {
541
            try {
542
                //try again
543
                //imageInfo = CdmImageInfo.NewInstance(uri, 0);
544
                imageInfo = MediaInfoFileReader.legacyFactoryMethod(uri)
545
                        .readBaseInfo()
546
                        .getCdmImageInfo();
547
            } catch (Exception e1) {
548
                String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " +  e1.getMessage();
549
                e1.printStackTrace();
550
                logger.warn(message);
551
            }
552
        }
553
        ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
554

  
555
        MediaRepresentation representation = MediaRepresentation.NewInstance();
556

  
557
        if(imageInfo != null){
558
            representation.setMimeType(imageInfo.getMimeType());
559
            representation.setSuffix(imageInfo.getSuffix());
560
        }
561
        representation.addRepresentationPart(imageFile);
562
        return representation;
563
    }
564

  
565
    private Map<String, Media> getAllExistingUrls(Taxon taxon) {
566
        Map<String, Media> result = new HashMap<>();
567
        Set<TaxonDescription> descriptions = taxon.getDescriptions();
568
        for (TaxonDescription td : descriptions){
569
            if (td.isImageGallery()){
570
                for (DescriptionElementBase deb : td.getElements()){
571
                    if (deb.isInstanceOf(TextData.class)){
572
                        TextData textData = CdmBase.deproxy(deb, TextData.class);
573
                        for (Media media :textData.getMedia()){
574
                            for (MediaRepresentation rep : media.getRepresentations()){
575
                                for (MediaRepresentationPart part : rep.getParts()){
576
                                    URI uri = part.getUri();
577
                                    if (uri != null){
578
                                        String uriStr = uri.toString();
579
                                        result.put(uriStr, media);
580
                                    }
581
                                }
582
                            }
583
                        }
584
                    }
585
                }
586
            }
587
        }
588
        return result;
589
    }
590

  
591
    private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) {
592

  
593
        MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
594
        taxonNameStr = adaptName(taxonNameStr);
595
        config.setTaxonNameTitle(taxonNameStr);
596
        config.setIncludeSynonyms(false);
597
        List<TaxonBase> list = app.getTaxonService().findTaxaByName(config);
598
        if (list.isEmpty()){
599
//            logger.warn("Taxon not found for media: " + taxonNameStr);
600
            taxonNameStr = taxonNameStr.replaceFirst(" ", " " + UTF8.HYBRID.toString());
601
            config.setTaxonNameTitle(taxonNameStr);
602
            list = app.getTaxonService().findTaxaByName(config);
603
            if (list.isEmpty()){
604
                return null;
605
            }else if (list.size() > 1){
606
                logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr);
607
            }
608
        }
609
        if (list.size()>1){
610
            Iterator<TaxonBase> it = list.iterator();
611
            while (it.hasNext()){
612
                Taxon next = (Taxon)it.next();
613
                if (next.getTaxonNodes().isEmpty() && !next.getTaxaForMisappliedName(true).isEmpty()){
614
                    it.remove();
615
                }
616
            }
617
            if (list.size()>1){
618
                logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node.");
619
                it = list.iterator();
620
                while (it.hasNext()){
621
                    Taxon next = (Taxon)it.next();
622
                    if (next.getTaxonNodes().isEmpty()){
623
                        it.remove();
624
                    }
625
                }
626
                if (list.size()>1){
627
                    logger.warn("Still more than 1 taxon found for media: " + taxonNameStr);
628
                }else if (list.size() < 1){
629
                    logger.warn("After removing nodeless taxa no taxon was left: " +  taxonNameStr);
630
                    return null;
631
                }
632
            }else if (list.size() < 1){
633
                logger.warn("After removing misapplications no taxon was left: " +  taxonNameStr);
634
                return null;
635
            }
636
        }
637
        TaxonBase<?> taxonBase = list.get(0);
638
        Taxon result;
639
        if (taxonBase.isInstanceOf(Synonym.class)){
640
            result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
641
        }else{
642
            result = CdmBase.deproxy(taxonBase, Taxon.class);
643
        }
644
        return result;
645
    }
646

  
647
    private String adaptName(String taxonNameStr) {
648
//        if (taxonNameStr.equals("Hypericum cerastoides")){
649
//            taxonNameStr = "Hypericum cerastioides";
650
//        }
651
        return taxonNameStr;
652
    }
653

  
654
	private void test(){
655
	    File f = new File(path);
656
	    String[] list = f.list();
657
	    List<String> fullFileNames = new ArrayList<>();
658
	    for (String fileName : list){
659
	        fullFileNames.add(path + fileName);
660
	        if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){
661
	            System.out.println(fileName);
662
	        }
663
	    }
664
	}
665

  
666
	private void updateMetadata(ICdmDataSource cdmDestination){
667
        CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
668
        TransactionStatus tx = app.startTransaction();
669

  
670
        deduplicationHelper = ImportDeduplicationHelper.NewInstance(app, null);
671

  
672
        List<Media> list = app.getMediaService().list(Media.class, null, null, null, null);
673
        for (Media media : list){
674
            handleSingleMediaUpdate(media);
675
        }
676

  
677
        if (testOnly){
678
            tx.setRollbackOnly();
679
        }
680
        app.commitTransaction(tx);
681
	}
682

  
683
    private void handleSingleMediaUpdate(Media media){
684
        ImageFile part = getUrlStringForMedia(media);
685
        if (part == null || part.getUri() == null){
686
            logger.warn("No uri found for media (id = " + media.getId() + ")");
687
            return;
688
        }
689
        String url = part.getUri().toString();
690
        if (url.startsWith(newUrlPath)){
691
            String fileName = url.replace(newUrlPath, "").replace("&mo=file", "");
692
            makeMetaData(media, fileName, part, true, sizeOnly);
693
            makeTitle(media, fileName, true);
694
            System.out.println(fileName);
695
        }else{
696
            logger.warn("URL does not start with standard url path: " + url);
697
        }
698
    }
699

  
700
    private ImageFile getUrlStringForMedia(Media media) {
701
        ImageFile result = null;
702
        for (MediaRepresentation rep : media.getRepresentations()){
703
            for (MediaRepresentationPart part : rep.getParts()){
704
                URI uri = part.getUri();
705
                if (uri != null){
706
                    if (result != null){
707
                        //TODO this still needs to be adapted to the 3 representations of media
708
                        logger.warn("More than 1 uri exists for media "+ media.getId());
709
                    }else if (!part.isInstanceOf(ImageFile.class)){
710
                        logger.warn("MediaRepresentationPart is not an ImageFile: " + uri);
711
                    }else{
712
                        result = CdmBase.deproxy(part, ImageFile.class);
713
                    }
714
                }
715
            }
716
        }
717
        return result;
718
    }
719

  
720
	public static void main(String[] args) {
721
		CichorieaImageUpdateActivator me = new CichorieaImageUpdateActivator();
722
		if (update_notCreate){
723
		    me.updateMetadata(cdmDestination);
724
		}else{
725
		    me.doImport(cdmDestination);
726
		}
727
//		me.test();
728
		System.exit(0);
729
	}
730
}

Also available in: Unified diff