Project

General

Profile

Download (29.3 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.app.cyprus;
10

    
11
import java.io.File;
12
import java.io.IOException;
13
import java.net.MalformedURLException;
14
import java.net.URISyntaxException;
15
import java.util.ArrayList;
16
import java.util.HashMap;
17
import java.util.HashSet;
18
import java.util.Iterator;
19
import java.util.List;
20
import java.util.Map;
21
import java.util.Set;
22
import java.util.regex.Matcher;
23
import java.util.regex.Pattern;
24

    
25
import org.apache.commons.imaging.ImageReadException;
26
import org.apache.commons.imaging.Imaging;
27
import org.apache.commons.imaging.common.GenericImageMetadata.GenericImageMetadataItem;
28
import org.apache.commons.imaging.common.ImageMetadata;
29
import org.apache.commons.imaging.common.ImageMetadata.ImageMetadataItem;
30
import org.apache.log4j.Logger;
31
import org.joda.time.DateTime;
32
import org.joda.time.format.DateTimeFormat;
33
import org.joda.time.format.DateTimeFormatter;
34
import org.springframework.transaction.TransactionStatus;
35

    
36
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
37
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
38
import eu.etaxonomy.cdm.app.common.CdmDestinations;
39
import eu.etaxonomy.cdm.common.URI;
40
import eu.etaxonomy.cdm.common.UTF8;
41
import eu.etaxonomy.cdm.common.media.CdmImageInfo;
42
import eu.etaxonomy.cdm.database.DbSchemaValidation;
43
import eu.etaxonomy.cdm.database.ICdmDataSource;
44
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
45
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
46
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
47
import eu.etaxonomy.cdm.model.agent.AgentBase;
48
import eu.etaxonomy.cdm.model.agent.Institution;
49
import eu.etaxonomy.cdm.model.agent.Person;
50
import eu.etaxonomy.cdm.model.common.CdmBase;
51
import eu.etaxonomy.cdm.model.common.Language;
52
import eu.etaxonomy.cdm.model.common.TimePeriod;
53
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
54
import eu.etaxonomy.cdm.model.description.Feature;
55
import eu.etaxonomy.cdm.model.description.TaxonDescription;
56
import eu.etaxonomy.cdm.model.description.TextData;
57
import eu.etaxonomy.cdm.model.media.ImageFile;
58
import eu.etaxonomy.cdm.model.media.Media;
59
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
60
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
61
import eu.etaxonomy.cdm.model.media.Rights;
62
import eu.etaxonomy.cdm.model.media.RightsType;
63
import eu.etaxonomy.cdm.model.taxon.Synonym;
64
import eu.etaxonomy.cdm.model.taxon.Taxon;
65
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
66

    
67
/**
68
 * Creates CDM Media from images stored in the given path.
69
 *
70
 * Note: Currently adapted to also change from Scaler IIF API to default Scaler API.
71
 * Note2: updateMetadata() still needs to be adapted to support 3 MediaRepresentations
72
 *
73
 * @author a.mueller
74
 * @since 05.2017
75
 */
76
public class CyprusImagesActivator {
77
	private static final Logger logger = Logger.getLogger(CyprusImagesActivator.class);
78

    
79
	static final ICdmDataSource cdmDestination = CdmDestinations.local_cyprus();
80
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_cyprus();
81
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_production_cyprus();
82

    
83
	static boolean testOnly = false;
84
	static boolean update_notCreate = false;
85
	//if true, data will always be updated, if false, only missing data will be updated
86
	static boolean forceUpdate = false;
87

    
88
    private static final String path = "//media/digitalimages/EditWP6/Zypern/photos/";
89
    private static final String oldUrlPath = "https://pictures.bgbm.org/digilib/Scaler/IIIF/Cyprus!";
90
    private static final String newUrlPath = "https://pictures.bgbm.org/digilib/Scaler?fn=Cyprus/";
91
    private static final String oldPostfix = "/full/full/0/default.jpg";
92
    private static final String newPostfix = "&mo=file";
93
    private static final String mediumPostfix ="&mo=fit&dw=400&dh=400&uvfix=1";
94
    private static final String smallPostfix ="&mo=fit&dw=200&dh=200&uvfix=1";
95

    
96
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper;
97

    
98
    private void doImport(ICdmDataSource cdmDestination){
99

    
100
		CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
101
		TransactionStatus tx = app.startTransaction();
102

    
103
		deduplicationHelper = (ImportDeduplicationHelper)ImportDeduplicationHelper.NewInstance(app, null);
104

    
105
        File file = new File(path);
106
        String[] fileList = file.list();
107
        Set<String> notFound = new HashSet<>();
108

    
109
        String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)";
110
        Pattern pattern = Pattern.compile(regEx);
111

    
112
        String start = "O";  //O
113
        String end = "Q";      //Q
114
        String startLetter = "";
115

    
116
        for (String fileName : fileList){
117
            if(fileName.compareToIgnoreCase(start) < 0 || fileName.compareToIgnoreCase(end) >= 0){
118
                continue;
119
            }
120
            Matcher matcher = pattern.matcher(fileName);
121
            if (matcher.matches() ){
122
//                System.out.println(fileName);
123
                if (!fileName.substring(0,3).equals(startLetter)){
124
                    startLetter = fileName.substring(0,3);
125
                    System.out.println(startLetter);
126
                }
127
                String taxonName = matcher.group(1);
128
                taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " ");
129
                Taxon taxon = getAcceptedTaxon(app, taxonName);
130
                if (taxon == null){
131
                    if (!notFound.contains(taxonName)){
132
                        notFound.add(taxonName);
133
                        logger.warn("Taxon not found: " + taxonName);
134
                    }
135
                }else{
136
                    try {
137
                        handleTaxon(app, taxon, fileName);
138
                    } catch (Exception e) {
139
                        logger.error("Unhandled exception ("+e.getMessage()+") when reading file " + fileName +". File not imported: ");
140
                        e.printStackTrace();
141
                    }
142
                }
143
            }else{
144
                if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){
145
                    logger.warn("Incorrect filename:" + fileName);
146
                }else{
147
                    System.out.println("Not clear yet: " + fileName);
148
                }
149
            }
150
        }
151

    
152
//		app.getTaxonService().saveOrUpdate(taxaToSave);
153

    
154
		if (testOnly){
155
		    tx.setRollbackOnly();
156
		}
157
		app.commitTransaction(tx);
158
	}
159

    
160
    private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) {
161
        Map<String, Media> existingUrls = getAllExistingUrls(taxon);
162
        String pathToOldImage = oldUrlPath + fileName + oldPostfix;
163

    
164
        String pathToFullImage = newUrlPath + fileName + newPostfix;
165
        String pathToMediumImage = newUrlPath + fileName + mediumPostfix;
166
        String pathToSmallImage = newUrlPath + fileName + smallPostfix;
167

    
168
        if (containsAll(existingUrls, pathToFullImage, pathToMediumImage, pathToSmallImage)){
169
            return;
170
        }else{
171
            Media media;
172
            if (containsAny(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage)){
173
                media = getExistingMedia(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage);
174
                if (media == null){
175
                    return;
176
                }else if (media.getAllTitles().isEmpty()){
177
                    media.setTitleCache(null, false);
178
                    media.putTitle(Language.LATIN(), fileName);
179
                }
180
            }else{
181
                media = Media.NewInstance();
182
                makeMetaData(media, fileName, false);
183
                makeTitle(media, fileName, false);
184
                if (!testOnly){
185
                    makeTextData(fileName, media, taxon);
186
                }
187
            }
188
            fillMediaWithAllRepresentations(media, pathToFullImage, pathToMediumImage, pathToSmallImage, pathToOldImage);
189
        }
190
    }
191

    
192
    private Media getExistingMedia(Map<String, Media> existingUrls, String pathToFullImage, String pathToMediumImage,
193
            String pathToSmallImage) {
194
        Set<Media> result = new HashSet<>();
195
        for(String existingUrl : existingUrls.keySet()){
196
            if (existingUrl.equals(pathToFullImage) || existingUrl.equals(pathToMediumImage) ||
197
                    existingUrl.equals(pathToSmallImage)){
198
                result.add(existingUrls.get(existingUrl));
199
            }
200
        }
201
        if (result.isEmpty()){
202
            logger.warn("Media for existing URL not found. This should not happen.");
203
            return null;
204
        }else if (result.size() > 1){
205
            logger.warn("Existing URLs have more than 1 Media. This should not happen.");
206
            return null;
207
        }else{
208
            return result.iterator().next();
209
        }
210
    }
211

    
212
    /**
213
     * <code>true</code> if all 3 paths exist in the URL set
214
     */
215
    private boolean containsAll(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage,
216
            String pathToSmallImage) {
217
        Set<String> existingUrls = existingUrlMap.keySet();
218
        return existingUrls.contains(pathToFullImage) && existingUrls.contains(pathToMediumImage)
219
                && existingUrls.contains(pathToSmallImage);
220
    }
221

    
222
    /**
223
     * <code>true</code> if any of the 3 paths exists in the URL set
224
     */
225
    private boolean containsAny(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage,
226
            String pathToSmallImage) {
227
        Set<String> existingUrls = existingUrlMap.keySet();
228
        return existingUrls.contains(pathToFullImage) || existingUrls.contains(pathToMediumImage)
229
                || existingUrls.contains(pathToSmallImage);
230
    }
231

    
232
    private void makeTitle(Media media, String fileName, boolean updateOnly) {
233
        String title = fileName.replace("_s_"," subsp. ")
234
                .replace("_"," ").replace(".jpg","").replace(".JPG","");
235
        if ( (!updateOnly) || media.getAllTitles().isEmpty()){
236
            media.putTitle(Language.LATIN(), title);
237
        }
238
    }
239

    
240
    private void makeMetaData(Media media, String fileName, boolean updateOnly) {
241

    
242
        File file = new File(path + fileName);
243
        if (!file.exists()){
244
            logger.warn("File for filename " +  fileName + " does not exist.");
245
            return;
246
        }
247

    
248
        Map<String, String> keywords = new HashMap<>();
249
        String copyright = null;
250
        String artistStr = null;
251
        String created = null;
252
        try{
253
//            IImageMetadata metadata = Sanselan.getMetadata(file);
254
            ImageMetadata metadata = Imaging.getMetadata(file);
255
            List<? extends ImageMetadataItem> items = metadata.getItems();
256
            for (ImageMetadataItem metadataItem : items){
257
//                System.out.println(item.getKeyword() +  ":    " + item.getText());
258
                if (metadataItem instanceof GenericImageMetadataItem){
259
                    GenericImageMetadataItem item = (GenericImageMetadataItem) metadataItem;
260

    
261
                    String keyword = item.getKeyword().toLowerCase();
262
                    String value =removeQuots(item.getText());
263

    
264
                    if("keywords".equals(keyword)){
265
                        String[] splits = value.split(":");
266
                        if (splits.length == 2){
267
                            keywords.put(splits[0].trim().toLowerCase(), splits[1].trim());
268
                        }else{
269
                            logger.warn("Keyword has not correct format and can not be parsed: " + value +  "  for file " + fileName);
270
                        }
271
                    }else if ("Copyright Notice".equalsIgnoreCase(keyword)){
272
                        copyright = value;
273
                    }else if ("artist".equals(keyword)){
274
                        artistStr = value;
275
                    }else if ("date time original".equalsIgnoreCase(item.getKeyword())){
276
                        created = value;
277
                    }
278
                }
279
            }
280
        } catch (ImageReadException | IOException e1) {
281
            logger.warn("       Problem (" + e1.getMessage() + ") when reading metadata from file: " + fileName);
282
            e1.printStackTrace();
283
        }
284

    
285
        AgentBase<?> artistAgent = null;
286
        Rights right = null;
287
        DateTime createdDate = null;
288
        String locality = null;
289

    
290
        //artist
291
        if (keywords.get("photographer") != null){
292
            String artist = keywords.get("photographer");
293
            artistAgent = makePerson(artist, fileName);
294
        }
295
        if (artistStr != null){
296
            if (keywords.get("photographer") == null){
297
                artistAgent = makePerson(artistStr, fileName);
298
            }else if (!keywords.get("photographer").toLowerCase().replace(" ", "")
299
                    .contains(artistStr.toLowerCase().replace(" ", ""))){
300
                logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName);
301
            }
302
        }
303

    
304
        //locality
305
        if (keywords.get("locality") != null){
306
            locality = keywords.get("locality");
307
        }
308

    
309
        //copyright
310
        if (copyright != null){
311
            AgentBase<?> agent;
312
            if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){
313
                agent = Institution.NewNamedInstance(copyright);
314
            }else{
315
                agent = makePerson(copyright, fileName);
316
            }
317
            right = Rights.NewInstance(null, null, RightsType.COPYRIGHT());
318
            right.setAgent(agent);
319
            right = deduplicationHelper.getExistingCopyright(null, right);
320
        }
321

    
322
        //created
323
        if (created != null){
324
            DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
325
            try {
326
                createdDate = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(created);
327
            } catch (Exception e) {
328
                logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + created + " for file " + fileName);
329
            }
330
        }
331

    
332
        boolean force = !updateOnly || forceUpdate;
333
        //add to media
334
        if (artistAgent != null && (force || media.getArtist() == null)){
335
            media.setArtist(artistAgent);
336
        }
337
        if (right != null && (force || media.getRights().isEmpty())){
338
            media.removeRights(right);
339
            media.addRights(right);
340
        }
341
        if (createdDate != null && (force || media.getMediaCreated() == null)){
342
            media.setMediaCreated(TimePeriod.NewInstance(createdDate));
343
        }
344
        if (locality != null && (force || media.getDescription(Language.ENGLISH()) == null)){
345
            media.putDescription(Language.ENGLISH(), locality);
346
        }
347
    }
348

    
349
    private Person makePerson(String artist, String fileName) {
350
        artist = artist.trim();
351
        String regEx = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
352
        Matcher matcher = Pattern.compile(regEx).matcher(artist);
353
        Person person = Person.NewInstance();
354
        if (matcher.matches()){
355
            person.setGivenName(matcher.group(1).trim());
356
            person.setFamilyName(matcher.group(2).trim());
357
        }else{
358
            person.setTitleCache(artist, true);
359
            logger.warn("Person could not be parsed: " + artist + " for file " + fileName);
360
        }
361

    
362
        person = deduplicationHelper.getExistingAuthor(null, person);
363
        return person;
364
    }
365

    
366
    private String removeQuots(String text) {
367
        if (text.startsWith("'") && text.endsWith("'")){
368
            return text.substring(1, text.length() -1);
369
        }else{
370
            return text;
371
        }
372
    }
373

    
374
    private void makeTextData(String fileStr, Media media, Taxon taxon) {
375
        TaxonDescription imageGallery = taxon.getImageGallery(true);
376
        TextData textData = null;
377
        if (!imageGallery.getElements().isEmpty()){
378
            DescriptionElementBase el = imageGallery.getElements().iterator().next();
379
            if (el.isInstanceOf(TextData.class)){
380
                textData = CdmBase.deproxy(el, TextData.class);
381
            }else{
382
                logger.warn("Image gallery had non-textdata description element: " +  fileStr);
383
            }
384
        }
385
        if (textData == null){
386
            textData = TextData.NewInstance();
387
            textData.setFeature(Feature.IMAGE());
388
        }
389
        imageGallery.addElement(textData);
390
        textData.addMedia(media);
391
    }
392

    
393
    private void fillMediaWithAllRepresentations(Media media, String fullPath, String mediumPath, String smallPath, String oldFullPath){
394
        Set<MediaRepresentation> existingRepresentations = new HashSet<>(media.getRepresentations());
395
        makeMediaRepresentation(oldFullPath, media, existingRepresentations, fullPath);
396
        makeMediaRepresentation(mediumPath, media, existingRepresentations, null);
397
        makeMediaRepresentation(smallPath, media, existingRepresentations, null);
398
        if(!existingRepresentations.isEmpty()){
399
            logger.warn("Media contains existing representations which are not contained in the 3 paths: " + media.getTitleCache());
400
        }
401
    }
402

    
403
    private void makeMediaRepresentation(String uriString, Media media,
404
            Set<MediaRepresentation> existingRepresentations, String replaceUri) {
405
        MediaRepresentation existingMediaRep = getExistingMediaRepresentation(uriString, existingRepresentations);
406
        boolean readMediaData = true;
407
        MediaRepresentation newMediaRep = makeMediaRepresentation(replaceUri != null? replaceUri : uriString, readMediaData);
408
        if (existingMediaRep == null){
409
            media.addRepresentation(newMediaRep);
410
        }else{
411
            existingRepresentations.remove(existingMediaRep);
412
            mergeToExistingRepresentation(existingMediaRep, newMediaRep);
413
        }
414
    }
415

    
416
    private void mergeToExistingRepresentation(MediaRepresentation existingMediaRep, MediaRepresentation newMediaRep) {
417
        existingMediaRep.setMimeType(newMediaRep.getMimeType());
418
        existingMediaRep.setSuffix(newMediaRep.getSuffix());
419
        if(!existingMediaRep.getParts().isEmpty() && !newMediaRep.getParts().isEmpty()){
420
            MediaRepresentationPart existingPart = existingMediaRep.getParts().iterator().next();
421
            ImageFile newPart = (ImageFile)newMediaRep.getParts().iterator().next();
422
            if(existingPart.isInstanceOf(ImageFile.class)){
423
                ImageFile existingImage = CdmBase.deproxy(existingPart, ImageFile.class);
424
                existingImage.setHeight(newPart.getHeight());
425
                existingImage.setWidth(newPart.getWidth());
426
            }else{
427
                logger.warn("MediaRepresentationPart was not of type ImageFile. Height and width not merged: " + existingPart.getUri());
428
            }
429
            existingPart.setSize(newPart.getSize());
430
            existingPart.setUri(newPart.getUri());
431
        }
432
    }
433

    
434
    private MediaRepresentation getExistingMediaRepresentation(String uriString,
435
            Set<MediaRepresentation> existingRepresentations) {
436
        for (MediaRepresentation rep : existingRepresentations){
437
            for (MediaRepresentationPart part : rep.getParts()){
438
                if (part.getUri() != null && part.getUri().toString().equals(uriString)){
439
                    return rep;
440
                }
441
            }
442
        }
443
        return null;
444
    }
445

    
446
    /**
447
     * Creates
448
     * @see #READ_MEDIA_DATA
449
     * @return
450
     * @throws MalformedURLException
451
     */
452
    protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException {
453
        if( uriString == null){
454
            return null;
455
        } else {
456
            uriString = uriString.replace(" ", "%20");  //replace whitespace
457
            try {
458
                MediaRepresentation representation = makeMediaRepresentation(uriString, readMediaData);
459
                Media media = Media.NewInstance();
460
                media.addRepresentation(representation);
461

    
462
                if (uriStrThumb != null){
463
                    CdmImageInfo imageInfoThumb = null;
464
                    uriStrThumb = uriStrThumb.replace(" ", "%20");  //replace whitespace
465
                    URI uriThumb = new URI(uriStrThumb);
466
                    try {
467
                        if (readMediaData){
468
                            logger.info("Read media data from: " + uriThumb);
469
                            imageInfoThumb = CdmImageInfo.NewInstance(uriThumb, 0);
470
                        }
471
                    } catch (Exception e) {
472
                        String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " +  e.getMessage();
473
                        logger.warn(message);
474
                    }
475

    
476
                    ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb);
477
                    MediaRepresentation reprThumb = MediaRepresentation.NewInstance();
478
                    if(imageInfoThumb != null){
479
                        reprThumb.setMimeType(imageInfoThumb.getMimeType());
480
                        reprThumb.setSuffix(imageInfoThumb.getSuffix());
481
                    }
482
                    reprThumb.addRepresentationPart(imageFileFhumb);
483
                    media.addRepresentation(reprThumb);
484
                }
485

    
486
                return media;
487
            } catch (URISyntaxException e1) {
488
                String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " +  uriString;
489
                logger.warn(message);
490
                return null;
491
            }
492
        }
493
    }
494

    
495
    private MediaRepresentation makeMediaRepresentation(String uriString, boolean readMediaData) {
496

    
497
        uriString = uriString.replace(" ", "%20");  //replace whitespace
498
        CdmImageInfo imageInfo = null;
499
        URI uri;
500
        try {
501
            uri = new URI(uriString);
502
        } catch (URISyntaxException e1) {
503
            logger.error("Malformed URI. Could not create media representation: " + uriString);
504
            return null;
505
        }
506
        try {
507
            if (readMediaData){
508
                logger.info("Read media data from: " + uri);
509
                imageInfo = CdmImageInfo.NewInstance(uri, 0);
510
            }
511
        } catch (Exception e) {
512
            try {
513
                //try again
514
                imageInfo = CdmImageInfo.NewInstance(uri, 0);
515
            } catch (Exception e1) {
516
                String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " +  e1.getMessage();
517
                e1.printStackTrace();
518
                logger.warn(message);
519
            }
520
        }
521
        ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
522

    
523
        MediaRepresentation representation = MediaRepresentation.NewInstance();
524

    
525
        if(imageInfo != null){
526
            representation.setMimeType(imageInfo.getMimeType());
527
            representation.setSuffix(imageInfo.getSuffix());
528
        }
529
        representation.addRepresentationPart(imageFile);
530
        return representation;
531
    }
532

    
533
    private Map<String, Media> getAllExistingUrls(Taxon taxon) {
534
        Map<String, Media> result = new HashMap<>();
535
        Set<TaxonDescription> descriptions = taxon.getDescriptions();
536
        for (TaxonDescription td : descriptions){
537
            if (td.isImageGallery()){
538
                for (DescriptionElementBase deb : td.getElements()){
539
                    if (deb.isInstanceOf(TextData.class)){
540
                        TextData textData = CdmBase.deproxy(deb, TextData.class);
541
                        for (Media media :textData.getMedia()){
542
                            for (MediaRepresentation rep : media.getRepresentations()){
543
                                for (MediaRepresentationPart part : rep.getParts()){
544
                                    URI uri = part.getUri();
545
                                    if (uri != null){
546
                                        String uriStr = uri.toString();
547
                                        result.put(uriStr, media);
548
                                    }
549
                                }
550
                            }
551
                        }
552
                    }
553
                }
554
            }
555
        }
556
        return result;
557
    }
558

    
559
    private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) {
560

    
561
        MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
562
        taxonNameStr = adaptName(taxonNameStr);
563
        config.setTaxonNameTitle(taxonNameStr);
564
        config.setIncludeSynonyms(false);
565
        List<TaxonBase> list = app.getTaxonService().findTaxaByName(config);
566
        if (list.isEmpty()){
567
//            logger.warn("Taxon not found for media: " + taxonNameStr);
568
            taxonNameStr = taxonNameStr.replaceFirst(" ", " " + UTF8.HYBRID.toString());
569
            config.setTaxonNameTitle(taxonNameStr);
570
            list = app.getTaxonService().findTaxaByName(config);
571
            if (list.isEmpty()){
572
                return null;
573
            }else if (list.size() > 1){
574
                logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr);
575
            }
576
        }
577
        if (list.size()>1){
578
            Iterator<TaxonBase> it = list.iterator();
579
            while (it.hasNext()){
580
                Taxon next = (Taxon)it.next();
581
                if (next.getTaxonNodes().isEmpty() && !next.getTaxaForMisappliedName(true).isEmpty()){
582
                    it.remove();
583
                }
584
            }
585
            if (list.size()>1){
586
                logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node.");
587
                it = list.iterator();
588
                while (it.hasNext()){
589
                    Taxon next = (Taxon)it.next();
590
                    if (next.getTaxonNodes().isEmpty()){
591
                        it.remove();
592
                    }
593
                }
594
                if (list.size()>1){
595
                    logger.warn("Still more than 1 taxon found for media: " + taxonNameStr);
596
                }else if (list.size() < 1){
597
                    logger.warn("After removing nodeless taxa no taxon was left: " +  taxonNameStr);
598
                    return null;
599
                }
600
            }else if (list.size() < 1){
601
                logger.warn("After removing misapplications no taxon was left: " +  taxonNameStr);
602
                return null;
603
            }
604
        }
605
        TaxonBase<?> taxonBase = list.get(0);
606
        Taxon result;
607
        if (taxonBase.isInstanceOf(Synonym.class)){
608
            result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
609
        }else{
610
            result = CdmBase.deproxy(taxonBase, Taxon.class);
611
        }
612
        return result;
613
    }
614

    
615
    private String adaptName(String taxonNameStr) {
616
//        if (taxonNameStr.equals("Hypericum cerastoides")){
617
//            taxonNameStr = "Hypericum cerastioides";
618
//        }
619
        return taxonNameStr;
620
    }
621

    
622
	private void test(){
623
	    File f = new File(path);
624
	    String[] list = f.list();
625
	    List<String> fullFileNames = new ArrayList<>();
626
	    for (String fileName : list){
627
	        fullFileNames.add(path + fileName);
628
	        if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){
629
	            System.out.println(fileName);
630
	        }
631
	    }
632
	}
633

    
634
	private void updateMetadata(ICdmDataSource cdmDestination){
635
        CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
636
        TransactionStatus tx = app.startTransaction();
637

    
638
        deduplicationHelper = (ImportDeduplicationHelper)ImportDeduplicationHelper.NewInstance(app, null);
639

    
640
        List<Media> list = app.getMediaService().list(Media.class, null, null, null, null);
641
        for (Media media : list){
642
            String fileName = getUrlStringForMedia(media);
643
            if (fileName.startsWith(newUrlPath)){
644
                //TODO not yet adapted to new image server URLs
645
                fileName = fileName.replace(newUrlPath, "");
646
                if (fileName.equals("Acinos_exiguus_C1.jpg")){  //for debugging only
647
//                  System.out.println(fileName);
648
                    makeMetaData(media, fileName, true);
649
                    makeTitle(media, fileName, true);
650
                }
651
            }else{
652
                logger.warn("Filename does not start with standard url path: " + fileName);
653
            }
654
        }
655

    
656
        if (testOnly){
657
            tx.setRollbackOnly();
658
        }
659
        app.commitTransaction(tx);
660
	}
661

    
662
    private String getUrlStringForMedia(Media media) {
663
        String result = null;
664
        for (MediaRepresentation rep : media.getRepresentations()){
665
            for (MediaRepresentationPart part : rep.getParts()){
666
                URI uri = part.getUri();
667
                if (uri != null){
668
                    if (result != null){
669
                        //TODO this still needs to be adapted to the 3 representations of media
670
                        logger.warn("More than 1 uri exists for media "+ media.getId());
671
                    }else{
672
                        result = uri.toString();
673
                    }
674
                }
675
            }
676
        }
677
        return result;
678
    }
679

    
680
	public static void main(String[] args) {
681
		CyprusImagesActivator me = new CyprusImagesActivator();
682
		if (update_notCreate){
683
		    me.updateMetadata(cdmDestination);
684
		}else{
685
		    me.doImport(cdmDestination);
686
		}
687
//		me.test();
688
		System.exit(0);
689
	}
690
}
(3-3/5)