Project

General

Profile

Download (29.1 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.app.cyprus;
10

    
11
import java.io.File;
12
import java.io.IOException;
13
import java.net.MalformedURLException;
14
import java.net.URISyntaxException;
15
import java.util.ArrayList;
16
import java.util.HashMap;
17
import java.util.HashSet;
18
import java.util.Iterator;
19
import java.util.List;
20
import java.util.Map;
21
import java.util.Set;
22
import java.util.regex.Matcher;
23
import java.util.regex.Pattern;
24

    
25
import org.apache.commons.imaging.ImageReadException;
26
import org.apache.commons.imaging.Imaging;
27
import org.apache.commons.imaging.common.GenericImageMetadata.GenericImageMetadataItem;
28
import org.apache.commons.imaging.common.ImageMetadata;
29
import org.apache.commons.imaging.common.ImageMetadata.ImageMetadataItem;
30
import org.apache.log4j.Logger;
31
import org.joda.time.DateTime;
32
import org.joda.time.format.DateTimeFormat;
33
import org.joda.time.format.DateTimeFormatter;
34
import org.springframework.transaction.TransactionStatus;
35

    
36
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
37
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
38
import eu.etaxonomy.cdm.app.common.CdmDestinations;
39
import eu.etaxonomy.cdm.common.URI;
40
import eu.etaxonomy.cdm.common.UTF8;
41
import eu.etaxonomy.cdm.common.media.CdmImageInfo;
42
import eu.etaxonomy.cdm.database.DbSchemaValidation;
43
import eu.etaxonomy.cdm.database.ICdmDataSource;
44
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
45
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
46
import eu.etaxonomy.cdm.model.agent.AgentBase;
47
import eu.etaxonomy.cdm.model.agent.Institution;
48
import eu.etaxonomy.cdm.model.agent.Person;
49
import eu.etaxonomy.cdm.model.common.CdmBase;
50
import eu.etaxonomy.cdm.model.common.Language;
51
import eu.etaxonomy.cdm.model.common.TimePeriod;
52
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
53
import eu.etaxonomy.cdm.model.description.Feature;
54
import eu.etaxonomy.cdm.model.description.TaxonDescription;
55
import eu.etaxonomy.cdm.model.description.TextData;
56
import eu.etaxonomy.cdm.model.media.ImageFile;
57
import eu.etaxonomy.cdm.model.media.Media;
58
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
59
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
60
import eu.etaxonomy.cdm.model.media.Rights;
61
import eu.etaxonomy.cdm.model.media.RightsType;
62
import eu.etaxonomy.cdm.model.taxon.Synonym;
63
import eu.etaxonomy.cdm.model.taxon.Taxon;
64
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
65

    
66
/**
67
 * Creates CDM Media from images stored in the given path.
68
 *
69
 * Note: Currently adapted to also change from Scaler IIF API to default Scaler API.
70
 * Note2: updateMetadata() still needs to be adapted to support 3 MediaRepresentations
71
 *
72
 * @author a.mueller
73
 * @since 05.2017
74
 */
75
public class CyprusImagesActivator {
76
	private static final Logger logger = Logger.getLogger(CyprusImagesActivator.class);
77

    
78
	static final ICdmDataSource cdmDestination = CdmDestinations.local_cyprus();
79
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_cyprus();
80
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_production_cyprus();
81

    
82
	static boolean testOnly = false;
83
	static boolean update_notCreate = false;
84
	//if true, data will always be updated, if false, only missing data will be updated
85
	static boolean forceUpdate = false;
86

    
87
    private static final String path = "//media/digitalimages/EditWP6/Zypern/photos/";
88
    private static final String oldUrlPath = "https://pictures.bgbm.org/digilib/Scaler/IIIF/Cyprus!";
89
    private static final String newUrlPath = "https://pictures.bgbm.org/digilib/Scaler?fn=Cyprus/";
90
    private static final String oldPostfix = "/full/full/0/default.jpg";
91
    private static final String newPostfix = "&mo=file";
92
    private static final String mediumPostfix ="&mo=fit&dw=400&dh=400&uvfix=1";
93
    private static final String smallPostfix ="&mo=fit&dw=200&dh=200&uvfix=1";
94

    
95
    private ImportDeduplicationHelper deduplicationHelper;
96

    
97
    private void doImport(ICdmDataSource cdmDestination){
98

    
99
		CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
100
		TransactionStatus tx = app.startTransaction();
101

    
102
		deduplicationHelper = ImportDeduplicationHelper.NewInstance(app, null);
103

    
104
        File file = new File(path);
105
        String[] fileList = file.list();
106
        Set<String> notFound = new HashSet<>();
107

    
108
        String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)";
109
        Pattern pattern = Pattern.compile(regEx);
110

    
111
        String start = "O";  //O
112
        String end = "Q";      //Q
113
        String startLetter = "";
114

    
115
        for (String fileName : fileList){
116
            if(fileName.compareToIgnoreCase(start) < 0 || fileName.compareToIgnoreCase(end) >= 0){
117
                continue;
118
            }
119
            Matcher matcher = pattern.matcher(fileName);
120
            if (matcher.matches() ){
121
//                System.out.println(fileName);
122
                if (!fileName.substring(0,3).equals(startLetter)){
123
                    startLetter = fileName.substring(0,3);
124
                    System.out.println(startLetter);
125
                }
126
                String taxonName = matcher.group(1);
127
                taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " ");
128
                Taxon taxon = getAcceptedTaxon(app, taxonName);
129
                if (taxon == null){
130
                    if (!notFound.contains(taxonName)){
131
                        notFound.add(taxonName);
132
                        logger.warn("Taxon not found: " + taxonName);
133
                    }
134
                }else{
135
                    try {
136
                        handleTaxon(app, taxon, fileName);
137
                    } catch (Exception e) {
138
                        logger.error("Unhandled exception ("+e.getMessage()+") when reading file " + fileName +". File not imported: ");
139
                        e.printStackTrace();
140
                    }
141
                }
142
            }else{
143
                if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){
144
                    logger.warn("Incorrect filename:" + fileName);
145
                }else{
146
                    System.out.println("Not clear yet: " + fileName);
147
                }
148
            }
149
        }
150

    
151
//		app.getTaxonService().saveOrUpdate(taxaToSave);
152

    
153
		if (testOnly){
154
		    tx.setRollbackOnly();
155
		}
156
		app.commitTransaction(tx);
157
	}
158

    
159
    private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) {
160
        Map<String, Media> existingUrls = getAllExistingUrls(taxon);
161
        String pathToOldImage = oldUrlPath + fileName + oldPostfix;
162

    
163
        String pathToFullImage = newUrlPath + fileName + newPostfix;
164
        String pathToMediumImage = newUrlPath + fileName + mediumPostfix;
165
        String pathToSmallImage = newUrlPath + fileName + smallPostfix;
166

    
167
        if (containsAll(existingUrls, pathToFullImage, pathToMediumImage, pathToSmallImage)){
168
            return;
169
        }else{
170
            Media media;
171
            if (containsAny(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage)){
172
                media = getExistingMedia(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage);
173
                if (media == null){
174
                    return;
175
                }else if (media.getAllTitles().isEmpty()){
176
                    media.setTitleCache(null, false);
177
                    media.putTitle(Language.LATIN(), fileName);
178
                }
179
            }else{
180
                media = Media.NewInstance();
181
                makeMetaData(media, fileName, false);
182
                makeTitle(media, fileName, false);
183
                if (!testOnly){
184
                    makeTextData(fileName, media, taxon);
185
                }
186
            }
187
            fillMediaWithAllRepresentations(media, pathToFullImage, pathToMediumImage, pathToSmallImage, pathToOldImage);
188
        }
189
    }
190

    
191
    private Media getExistingMedia(Map<String, Media> existingUrls, String pathToFullImage, String pathToMediumImage,
192
            String pathToSmallImage) {
193
        Set<Media> result = new HashSet<>();
194
        for(String existingUrl : existingUrls.keySet()){
195
            if (existingUrl.equals(pathToFullImage) || existingUrl.equals(pathToMediumImage) ||
196
                    existingUrl.equals(pathToSmallImage)){
197
                result.add(existingUrls.get(existingUrl));
198
            }
199
        }
200
        if (result.isEmpty()){
201
            logger.warn("Media for existing URL not found. This should not happen.");
202
            return null;
203
        }else if (result.size() > 1){
204
            logger.warn("Existing URLs have more than 1 Media. This should not happen.");
205
            return null;
206
        }else{
207
            return result.iterator().next();
208
        }
209
    }
210

    
211
    /**
212
     * <code>true</code> if all 3 paths exist in the URL set
213
     */
214
    private boolean containsAll(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage,
215
            String pathToSmallImage) {
216
        Set<String> existingUrls = existingUrlMap.keySet();
217
        return existingUrls.contains(pathToFullImage) && existingUrls.contains(pathToMediumImage)
218
                && existingUrls.contains(pathToSmallImage);
219
    }
220

    
221
    /**
222
     * <code>true</code> if any of the 3 paths exists in the URL set
223
     */
224
    private boolean containsAny(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage,
225
            String pathToSmallImage) {
226
        Set<String> existingUrls = existingUrlMap.keySet();
227
        return existingUrls.contains(pathToFullImage) || existingUrls.contains(pathToMediumImage)
228
                || existingUrls.contains(pathToSmallImage);
229
    }
230

    
231
    private void makeTitle(Media media, String fileName, boolean updateOnly) {
232
        String title = fileName.replace("_s_"," subsp. ")
233
                .replace("_"," ").replace(".jpg","").replace(".JPG","");
234
        if ( (!updateOnly) || media.getAllTitles().isEmpty()){
235
            media.putTitle(Language.LATIN(), title);
236
        }
237
    }
238

    
239
    private void makeMetaData(Media media, String fileName, boolean updateOnly) {
240

    
241
        File file = new File(path + fileName);
242
        if (!file.exists()){
243
            logger.warn("File for filename " +  fileName + " does not exist.");
244
            return;
245
        }
246

    
247
        Map<String, String> keywords = new HashMap<>();
248
        String copyright = null;
249
        String artistStr = null;
250
        String created = null;
251
        try{
252
//            IImageMetadata metadata = Sanselan.getMetadata(file);
253
            ImageMetadata metadata = Imaging.getMetadata(file);
254
            List<? extends ImageMetadataItem> items = metadata.getItems();
255
            for (ImageMetadataItem metadataItem : items){
256
//                System.out.println(item.getKeyword() +  ":    " + item.getText());
257
                if (metadataItem instanceof GenericImageMetadataItem){
258
                    GenericImageMetadataItem item = (GenericImageMetadataItem) metadataItem;
259

    
260
                    String keyword = item.getKeyword().toLowerCase();
261
                    String value =removeQuots(item.getText());
262

    
263
                    if("keywords".equals(keyword)){
264
                        String[] splits = value.split(":");
265
                        if (splits.length == 2){
266
                            keywords.put(splits[0].trim().toLowerCase(), splits[1].trim());
267
                        }else{
268
                            logger.warn("Keyword has not correct format and can not be parsed: " + value +  "  for file " + fileName);
269
                        }
270
                    }else if ("Copyright Notice".equalsIgnoreCase(keyword)){
271
                        copyright = value;
272
                    }else if ("artist".equals(keyword)){
273
                        artistStr = value;
274
                    }else if ("date time original".equalsIgnoreCase(item.getKeyword())){
275
                        created = value;
276
                    }
277
                }
278
            }
279
        } catch (ImageReadException | IOException e1) {
280
            logger.warn("       Problem (" + e1.getMessage() + ") when reading metadata from file: " + fileName);
281
            e1.printStackTrace();
282
        }
283

    
284
        AgentBase<?> artistAgent = null;
285
        Rights right = null;
286
        DateTime createdDate = null;
287
        String locality = null;
288

    
289
        //artist
290
        if (keywords.get("photographer") != null){
291
            String artist = keywords.get("photographer");
292
            artistAgent = makePerson(artist, fileName);
293
        }
294
        if (artistStr != null){
295
            if (keywords.get("photographer") == null){
296
                artistAgent = makePerson(artistStr, fileName);
297
            }else if (!keywords.get("photographer").toLowerCase().replace(" ", "")
298
                    .contains(artistStr.toLowerCase().replace(" ", ""))){
299
                logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName);
300
            }
301
        }
302

    
303
        //locality
304
        if (keywords.get("locality") != null){
305
            locality = keywords.get("locality");
306
        }
307

    
308
        //copyright
309
        if (copyright != null){
310
            AgentBase<?> agent;
311
            if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){
312
                agent = Institution.NewNamedInstance(copyright);
313
            }else{
314
                agent = makePerson(copyright, fileName);
315
            }
316
            right = Rights.NewInstance(null, null, RightsType.COPYRIGHT());
317
            right.setAgent(agent);
318
            right = deduplicationHelper.getExistingCopyright(right);
319
        }
320

    
321
        //created
322
        if (created != null){
323
            DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
324
            try {
325
                createdDate = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(created);
326
            } catch (Exception e) {
327
                logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + created + " for file " + fileName);
328
            }
329
        }
330

    
331
        boolean force = !updateOnly || forceUpdate;
332
        //add to media
333
        if (artistAgent != null && (force || media.getArtist() == null)){
334
            media.setArtist(artistAgent);
335
        }
336
        if (right != null && (force || media.getRights().isEmpty())){
337
            media.removeRights(right);
338
            media.addRights(right);
339
        }
340
        if (createdDate != null && (force || media.getMediaCreated() == null)){
341
            media.setMediaCreated(TimePeriod.NewInstance(createdDate));
342
        }
343
        if (locality != null && (force || media.getDescription(Language.ENGLISH()) == null)){
344
            media.putDescription(Language.ENGLISH(), locality);
345
        }
346
    }
347

    
348
    private Person makePerson(String artist, String fileName) {
349
        artist = artist.trim();
350
        String regEx = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
351
        Matcher matcher = Pattern.compile(regEx).matcher(artist);
352
        Person person = Person.NewInstance();
353
        if (matcher.matches()){
354
            person.setGivenName(matcher.group(1).trim());
355
            person.setFamilyName(matcher.group(2).trim());
356
        }else{
357
            person.setTitleCache(artist, true);
358
            logger.warn("Person could not be parsed: " + artist + " for file " + fileName);
359
        }
360

    
361
        person = deduplicationHelper.getExistingAuthor(person);
362
        return person;
363
    }
364

    
365
    private String removeQuots(String text) {
366
        if (text.startsWith("'") && text.endsWith("'")){
367
            return text.substring(1, text.length() -1);
368
        }else{
369
            return text;
370
        }
371
    }
372

    
373
    private void makeTextData(String fileStr, Media media, Taxon taxon) {
374
        TaxonDescription imageGallery = taxon.getImageGallery(true);
375
        TextData textData = null;
376
        if (!imageGallery.getElements().isEmpty()){
377
            DescriptionElementBase el = imageGallery.getElements().iterator().next();
378
            if (el.isInstanceOf(TextData.class)){
379
                textData = CdmBase.deproxy(el, TextData.class);
380
            }else{
381
                logger.warn("Image gallery had non-textdata description element: " +  fileStr);
382
            }
383
        }
384
        if (textData == null){
385
            textData = TextData.NewInstance();
386
            textData.setFeature(Feature.IMAGE());
387
        }
388
        imageGallery.addElement(textData);
389
        textData.addMedia(media);
390
    }
391

    
392
    private void fillMediaWithAllRepresentations(Media media, String fullPath, String mediumPath, String smallPath, String oldFullPath){
393
        Set<MediaRepresentation> existingRepresentations = new HashSet<>(media.getRepresentations());
394
        makeMediaRepresentation(oldFullPath, media, existingRepresentations, fullPath);
395
        makeMediaRepresentation(mediumPath, media, existingRepresentations, null);
396
        makeMediaRepresentation(smallPath, media, existingRepresentations, null);
397
        if(!existingRepresentations.isEmpty()){
398
            logger.warn("Media contains existing representations which are not contained in the 3 paths: " + media.getTitleCache());
399
        }
400
    }
401

    
402
    private void makeMediaRepresentation(String uriString, Media media,
403
            Set<MediaRepresentation> existingRepresentations, String replaceUri) {
404
        MediaRepresentation existingMediaRep = getExistingMediaRepresentation(uriString, existingRepresentations);
405
        boolean readMediaData = true;
406
        MediaRepresentation newMediaRep = makeMediaRepresentation(replaceUri != null? replaceUri : uriString, readMediaData);
407
        if (existingMediaRep == null){
408
            media.addRepresentation(newMediaRep);
409
        }else{
410
            existingRepresentations.remove(existingMediaRep);
411
            mergeToExistingRepresentation(existingMediaRep, newMediaRep);
412
        }
413
    }
414

    
415
    private void mergeToExistingRepresentation(MediaRepresentation existingMediaRep, MediaRepresentation newMediaRep) {
416
        existingMediaRep.setMimeType(newMediaRep.getMimeType());
417
        existingMediaRep.setSuffix(newMediaRep.getSuffix());
418
        if(!existingMediaRep.getParts().isEmpty() && !newMediaRep.getParts().isEmpty()){
419
            MediaRepresentationPart existingPart = existingMediaRep.getParts().iterator().next();
420
            ImageFile newPart = (ImageFile)newMediaRep.getParts().iterator().next();
421
            if(existingPart.isInstanceOf(ImageFile.class)){
422
                ImageFile existingImage = CdmBase.deproxy(existingPart, ImageFile.class);
423
                existingImage.setHeight(newPart.getHeight());
424
                existingImage.setWidth(newPart.getWidth());
425
            }else{
426
                logger.warn("MediaRepresentationPart was not of type ImageFile. Height and width not merged: " + existingPart.getUri());
427
            }
428
            existingPart.setSize(newPart.getSize());
429
            existingPart.setUri(newPart.getUri());
430
        }
431
    }
432

    
433
    private MediaRepresentation getExistingMediaRepresentation(String uriString,
434
            Set<MediaRepresentation> existingRepresentations) {
435
        for (MediaRepresentation rep : existingRepresentations){
436
            for (MediaRepresentationPart part : rep.getParts()){
437
                if (part.getUri() != null && part.getUri().toString().equals(uriString)){
438
                    return rep;
439
                }
440
            }
441
        }
442
        return null;
443
    }
444

    
445
    /**
446
     * Creates
447
     * @see #READ_MEDIA_DATA
448
     * @return
449
     * @throws MalformedURLException
450
     */
451
    protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException {
452
        if( uriString == null){
453
            return null;
454
        } else {
455
            uriString = uriString.replace(" ", "%20");  //replace whitespace
456
            try {
457
                MediaRepresentation representation = makeMediaRepresentation(uriString, readMediaData);
458
                Media media = Media.NewInstance();
459
                media.addRepresentation(representation);
460

    
461
                if (uriStrThumb != null){
462
                    CdmImageInfo imageInfoThumb = null;
463
                    uriStrThumb = uriStrThumb.replace(" ", "%20");  //replace whitespace
464
                    URI uriThumb = new URI(uriStrThumb);
465
                    try {
466
                        if (readMediaData){
467
                            logger.info("Read media data from: " + uriThumb);
468
                            imageInfoThumb = CdmImageInfo.NewInstance(uriThumb, 0);
469
                        }
470
                    } catch (Exception e) {
471
                        String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " +  e.getMessage();
472
                        logger.warn(message);
473
                    }
474

    
475
                    ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb);
476
                    MediaRepresentation reprThumb = MediaRepresentation.NewInstance();
477
                    if(imageInfoThumb != null){
478
                        reprThumb.setMimeType(imageInfoThumb.getMimeType());
479
                        reprThumb.setSuffix(imageInfoThumb.getSuffix());
480
                    }
481
                    reprThumb.addRepresentationPart(imageFileFhumb);
482
                    media.addRepresentation(reprThumb);
483
                }
484

    
485
                return media;
486
            } catch (URISyntaxException e1) {
487
                String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " +  uriString;
488
                logger.warn(message);
489
                return null;
490
            }
491
        }
492
    }
493

    
494
    private MediaRepresentation makeMediaRepresentation(String uriString, boolean readMediaData) {
495

    
496
        uriString = uriString.replace(" ", "%20");  //replace whitespace
497
        CdmImageInfo imageInfo = null;
498
        URI uri;
499
        try {
500
            uri = new URI(uriString);
501
        } catch (URISyntaxException e1) {
502
            logger.error("Malformed URI. Could not create media representation: " + uriString);
503
            return null;
504
        }
505
        try {
506
            if (readMediaData){
507
                logger.info("Read media data from: " + uri);
508
                imageInfo = CdmImageInfo.NewInstance(uri, 0);
509
            }
510
        } catch (Exception e) {
511
            try {
512
                //try again
513
                imageInfo = CdmImageInfo.NewInstance(uri, 0);
514
            } catch (Exception e1) {
515
                String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " +  e1.getMessage();
516
                e1.printStackTrace();
517
                logger.warn(message);
518
            }
519
        }
520
        ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
521

    
522
        MediaRepresentation representation = MediaRepresentation.NewInstance();
523

    
524
        if(imageInfo != null){
525
            representation.setMimeType(imageInfo.getMimeType());
526
            representation.setSuffix(imageInfo.getSuffix());
527
        }
528
        representation.addRepresentationPart(imageFile);
529
        return representation;
530
    }
531

    
532
    private Map<String, Media> getAllExistingUrls(Taxon taxon) {
533
        Map<String, Media> result = new HashMap<>();
534
        Set<TaxonDescription> descriptions = taxon.getDescriptions();
535
        for (TaxonDescription td : descriptions){
536
            if (td.isImageGallery()){
537
                for (DescriptionElementBase deb : td.getElements()){
538
                    if (deb.isInstanceOf(TextData.class)){
539
                        TextData textData = CdmBase.deproxy(deb, TextData.class);
540
                        for (Media media :textData.getMedia()){
541
                            for (MediaRepresentation rep : media.getRepresentations()){
542
                                for (MediaRepresentationPart part : rep.getParts()){
543
                                    URI uri = part.getUri();
544
                                    if (uri != null){
545
                                        String uriStr = uri.toString();
546
                                        result.put(uriStr, media);
547
                                    }
548
                                }
549
                            }
550
                        }
551
                    }
552
                }
553
            }
554
        }
555
        return result;
556
    }
557

    
558
    private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) {
559

    
560
        MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
561
        taxonNameStr = adaptName(taxonNameStr);
562
        config.setTaxonNameTitle(taxonNameStr);
563
        config.setIncludeSynonyms(false);
564
        List<TaxonBase> list = app.getTaxonService().findTaxaByName(config);
565
        if (list.isEmpty()){
566
//            logger.warn("Taxon not found for media: " + taxonNameStr);
567
            taxonNameStr = taxonNameStr.replaceFirst(" ", " " + UTF8.HYBRID.toString());
568
            config.setTaxonNameTitle(taxonNameStr);
569
            list = app.getTaxonService().findTaxaByName(config);
570
            if (list.isEmpty()){
571
                return null;
572
            }else if (list.size() > 1){
573
                logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr);
574
            }
575
        }
576
        if (list.size()>1){
577
            Iterator<TaxonBase> it = list.iterator();
578
            while (it.hasNext()){
579
                Taxon next = (Taxon)it.next();
580
                if (next.getTaxonNodes().isEmpty() && !next.getTaxaForMisappliedName(true).isEmpty()){
581
                    it.remove();
582
                }
583
            }
584
            if (list.size()>1){
585
                logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node.");
586
                it = list.iterator();
587
                while (it.hasNext()){
588
                    Taxon next = (Taxon)it.next();
589
                    if (next.getTaxonNodes().isEmpty()){
590
                        it.remove();
591
                    }
592
                }
593
                if (list.size()>1){
594
                    logger.warn("Still more than 1 taxon found for media: " + taxonNameStr);
595
                }else if (list.size() < 1){
596
                    logger.warn("After removing nodeless taxa no taxon was left: " +  taxonNameStr);
597
                    return null;
598
                }
599
            }else if (list.size() < 1){
600
                logger.warn("After removing misapplications no taxon was left: " +  taxonNameStr);
601
                return null;
602
            }
603
        }
604
        TaxonBase<?> taxonBase = list.get(0);
605
        Taxon result;
606
        if (taxonBase.isInstanceOf(Synonym.class)){
607
            result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
608
        }else{
609
            result = CdmBase.deproxy(taxonBase, Taxon.class);
610
        }
611
        return result;
612
    }
613

    
614
    private String adaptName(String taxonNameStr) {
615
//        if (taxonNameStr.equals("Hypericum cerastoides")){
616
//            taxonNameStr = "Hypericum cerastioides";
617
//        }
618
        return taxonNameStr;
619
    }
620

    
621
	private void test(){
622
	    File f = new File(path);
623
	    String[] list = f.list();
624
	    List<String> fullFileNames = new ArrayList<>();
625
	    for (String fileName : list){
626
	        fullFileNames.add(path + fileName);
627
	        if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){
628
	            System.out.println(fileName);
629
	        }
630
	    }
631
	}
632

    
633
	private void updateMetadata(ICdmDataSource cdmDestination){
634
        CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
635
        TransactionStatus tx = app.startTransaction();
636

    
637
        deduplicationHelper = ImportDeduplicationHelper.NewInstance(app, null);
638

    
639
        List<Media> list = app.getMediaService().list(Media.class, null, null, null, null);
640
        for (Media media : list){
641
            String fileName = getUrlStringForMedia(media);
642
            if (fileName.startsWith(newUrlPath)){
643
                //TODO not yet adapted to new image server URLs
644
                fileName = fileName.replace(newUrlPath, "");
645
                if (fileName.equals("Acinos_exiguus_C1.jpg")){  //for debugging only
646
//                  System.out.println(fileName);
647
                    makeMetaData(media, fileName, true);
648
                    makeTitle(media, fileName, true);
649
                }
650
            }else{
651
                logger.warn("Filename does not start with standard url path: " + fileName);
652
            }
653
        }
654

    
655
        if (testOnly){
656
            tx.setRollbackOnly();
657
        }
658
        app.commitTransaction(tx);
659
	}
660

    
661
    private String getUrlStringForMedia(Media media) {
662
        String result = null;
663
        for (MediaRepresentation rep : media.getRepresentations()){
664
            for (MediaRepresentationPart part : rep.getParts()){
665
                URI uri = part.getUri();
666
                if (uri != null){
667
                    if (result != null){
668
                        //TODO this still needs to be adapted to the 3 representations of media
669
                        logger.warn("More than 1 uri exists for media "+ media.getId());
670
                    }else{
671
                        result = uri.toString();
672
                    }
673
                }
674
            }
675
        }
676
        return result;
677
    }
678

    
679
	public static void main(String[] args) {
680
		CyprusImagesActivator me = new CyprusImagesActivator();
681
		if (update_notCreate){
682
		    me.updateMetadata(cdmDestination);
683
		}else{
684
		    me.doImport(cdmDestination);
685
		}
686
//		me.test();
687
		System.exit(0);
688
	}
689
}
(3-3/5)