Project

General

Profile

Download (29.3 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.app.cyprus;
11

    
12
import java.io.File;
13
import java.io.IOException;
14
import java.net.MalformedURLException;
15
import eu.etaxonomy.cdm.common.URI;
16
import java.net.URISyntaxException;
17
import java.util.ArrayList;
18
import java.util.HashMap;
19
import java.util.HashSet;
20
import java.util.Iterator;
21
import java.util.List;
22
import java.util.Map;
23
import java.util.Set;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26

    
27
import org.apache.commons.imaging.ImageReadException;
28
import org.apache.commons.imaging.Imaging;
29
import org.apache.commons.imaging.common.GenericImageMetadata.GenericImageMetadataItem;
30
import org.apache.commons.imaging.common.ImageMetadata;
31
import org.apache.commons.imaging.common.ImageMetadata.ImageMetadataItem;
32
import org.apache.log4j.Logger;
33
import org.joda.time.DateTime;
34
import org.joda.time.format.DateTimeFormat;
35
import org.joda.time.format.DateTimeFormatter;
36
import org.springframework.transaction.TransactionStatus;
37

    
38
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
39
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
40
import eu.etaxonomy.cdm.app.common.CdmDestinations;
41
import eu.etaxonomy.cdm.common.UTF8;
42
import eu.etaxonomy.cdm.common.media.CdmImageInfo;
43
import eu.etaxonomy.cdm.database.DbSchemaValidation;
44
import eu.etaxonomy.cdm.database.ICdmDataSource;
45
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
46
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
47
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
48
import eu.etaxonomy.cdm.model.agent.AgentBase;
49
import eu.etaxonomy.cdm.model.agent.Institution;
50
import eu.etaxonomy.cdm.model.agent.Person;
51
import eu.etaxonomy.cdm.model.common.CdmBase;
52
import eu.etaxonomy.cdm.model.common.Language;
53
import eu.etaxonomy.cdm.model.common.TimePeriod;
54
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
55
import eu.etaxonomy.cdm.model.description.Feature;
56
import eu.etaxonomy.cdm.model.description.TaxonDescription;
57
import eu.etaxonomy.cdm.model.description.TextData;
58
import eu.etaxonomy.cdm.model.media.ImageFile;
59
import eu.etaxonomy.cdm.model.media.Media;
60
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
61
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
62
import eu.etaxonomy.cdm.model.media.Rights;
63
import eu.etaxonomy.cdm.model.media.RightsType;
64
import eu.etaxonomy.cdm.model.taxon.Synonym;
65
import eu.etaxonomy.cdm.model.taxon.Taxon;
66
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
67

    
68
/**
69
 * Creates CDM Media from images stored in the given path.
70
 *
71
 * Note: Currently adapted to also change from Scaler IIF API to default Scaler API.
72
 * Note2: updateMetadata() still needs to be adapted to support 3 MediaRepresentations
73
 *
74
 * @author a.mueller
75
 * @since 05.2017
76
 */
77
public class CyprusImagesActivator {
78
	private static final Logger logger = Logger.getLogger(CyprusImagesActivator.class);
79

    
80
	static final ICdmDataSource cdmDestination = CdmDestinations.local_cyprus();
81
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_cyprus();
82
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_production_cyprus();
83

    
84
	static boolean testOnly = false;
85
	static boolean update_notCreate = false;
86
	//if true, data will always be updated, if false, only missing data will be updated
87
	static boolean forceUpdate = false;
88

    
89
    private static final String path = "//media/digitalimages/EditWP6/Zypern/photos/";
90
    private static final String oldUrlPath = "https://pictures.bgbm.org/digilib/Scaler/IIIF/Cyprus!";
91
    private static final String newUrlPath = "https://pictures.bgbm.org/digilib/Scaler?fn=Cyprus/";
92
    private static final String oldPostfix = "/full/full/0/default.jpg";
93
    private static final String newPostfix = "&mo=file";
94
    private static final String mediumPostfix ="&mo=fit&dw=400&dh=400&uvfix=1";
95
    private static final String smallPostfix ="&mo=fit&dw=200&dh=200&uvfix=1";
96

    
97
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper;
98

    
99
    private void doImport(ICdmDataSource cdmDestination){
100

    
101
		CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
102
		TransactionStatus tx = app.startTransaction();
103

    
104
		deduplicationHelper = (ImportDeduplicationHelper)ImportDeduplicationHelper.NewInstance(app);
105

    
106
        File file = new File(path);
107
        String[] fileList = file.list();
108
        Set<String> notFound = new HashSet<>();
109

    
110
        String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)";
111
        Pattern pattern = Pattern.compile(regEx);
112

    
113
        String start = "O";  //O
114
        String end = "Q";      //Q
115
        String startLetter = "";
116

    
117
        for (String fileName : fileList){
118
            if(fileName.compareToIgnoreCase(start) < 0 || fileName.compareToIgnoreCase(end) >= 0){
119
                continue;
120
            }
121
            Matcher matcher = pattern.matcher(fileName);
122
            if (matcher.matches() ){
123
//                System.out.println(fileName);
124
                if (!fileName.substring(0,3).equals(startLetter)){
125
                    startLetter = fileName.substring(0,3);
126
                    System.out.println(startLetter);
127
                }
128
                String taxonName = matcher.group(1);
129
                taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " ");
130
                Taxon taxon = getAcceptedTaxon(app, taxonName);
131
                if (taxon == null){
132
                    if (!notFound.contains(taxonName)){
133
                        notFound.add(taxonName);
134
                        logger.warn("Taxon not found: " + taxonName);
135
                    }
136
                }else{
137
                    try {
138
                        handleTaxon(app, taxon, fileName);
139
                    } catch (Exception e) {
140
                        logger.error("Unhandled exception ("+e.getMessage()+") when reading file " + fileName +". File not imported: ");
141
                        e.printStackTrace();
142
                    }
143
                }
144
            }else{
145
                if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){
146
                    logger.warn("Incorrect filename:" + fileName);
147
                }else{
148
                    System.out.println("Not clear yet: " + fileName);
149
                }
150
            }
151
        }
152

    
153
//		app.getTaxonService().saveOrUpdate(taxaToSave);
154

    
155
		if (testOnly){
156
		    tx.setRollbackOnly();
157
		}
158
		app.commitTransaction(tx);
159
	}
160

    
161
    private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) {
162
        Map<String, Media> existingUrls = getAllExistingUrls(taxon);
163
        String pathToOldImage = oldUrlPath + fileName + oldPostfix;
164

    
165
        String pathToFullImage = newUrlPath + fileName + newPostfix;
166
        String pathToMediumImage = newUrlPath + fileName + mediumPostfix;
167
        String pathToSmallImage = newUrlPath + fileName + smallPostfix;
168

    
169
        if (containsAll(existingUrls, pathToFullImage, pathToMediumImage, pathToSmallImage)){
170
            return;
171
        }else{
172
            Media media;
173
            if (containsAny(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage)){
174
                media = getExistingMedia(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage);
175
                if (media == null){
176
                    return;
177
                }else if (media.getAllTitles().isEmpty()){
178
                    media.setTitleCache(null, false);
179
                    media.putTitle(Language.LATIN(), fileName);
180
                }
181
            }else{
182
                media = Media.NewInstance();
183
                makeMetaData(media, fileName, false);
184
                makeTitle(media, fileName, false);
185
                if (!testOnly){
186
                    makeTextData(fileName, media, taxon);
187
                }
188
            }
189
            fillMediaWithAllRepresentations(media, pathToFullImage, pathToMediumImage, pathToSmallImage, pathToOldImage);
190
        }
191
    }
192

    
193
    private Media getExistingMedia(Map<String, Media> existingUrls, String pathToFullImage, String pathToMediumImage,
194
            String pathToSmallImage) {
195
        Set<Media> result = new HashSet<>();
196
        for(String existingUrl : existingUrls.keySet()){
197
            if (existingUrl.equals(pathToFullImage) || existingUrl.equals(pathToMediumImage) ||
198
                    existingUrl.equals(pathToSmallImage)){
199
                result.add(existingUrls.get(existingUrl));
200
            }
201
        }
202
        if (result.isEmpty()){
203
            logger.warn("Media for existing URL not found. This should not happen.");
204
            return null;
205
        }else if (result.size() > 1){
206
            logger.warn("Existing URLs have more than 1 Media. This should not happen.");
207
            return null;
208
        }else{
209
            return result.iterator().next();
210
        }
211
    }
212

    
213
    /**
214
     * <code>true</code> if all 3 paths exist in the URL set
215
     */
216
    private boolean containsAll(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage,
217
            String pathToSmallImage) {
218
        Set<String> existingUrls = existingUrlMap.keySet();
219
        return existingUrls.contains(pathToFullImage) && existingUrls.contains(pathToMediumImage)
220
                && existingUrls.contains(pathToSmallImage);
221
    }
222

    
223
    /**
224
     * <code>true</code> if any of the 3 paths exists in the URL set
225
     */
226
    private boolean containsAny(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage,
227
            String pathToSmallImage) {
228
        Set<String> existingUrls = existingUrlMap.keySet();
229
        return existingUrls.contains(pathToFullImage) || existingUrls.contains(pathToMediumImage)
230
                || existingUrls.contains(pathToSmallImage);
231
    }
232

    
233
    private void makeTitle(Media media, String fileName, boolean updateOnly) {
234
        String title = fileName.replace("_s_"," subsp. ")
235
                .replace("_"," ").replace(".jpg","").replace(".JPG","");
236
        if ( (!updateOnly) || media.getAllTitles().isEmpty()){
237
            media.putTitle(Language.LATIN(), title);
238
        }
239
    }
240

    
241
    private void makeMetaData(Media media, String fileName, boolean updateOnly) {
242

    
243
        File file = new File(path + fileName);
244
        if (!file.exists()){
245
            logger.warn("File for filename " +  fileName + " does not exist.");
246
            return;
247
        }
248

    
249
        Map<String, String> keywords = new HashMap<>();
250
        String copyright = null;
251
        String artistStr = null;
252
        String created = null;
253
        try{
254
//            IImageMetadata metadata = Sanselan.getMetadata(file);
255
            ImageMetadata metadata = Imaging.getMetadata(file);
256
            List<? extends ImageMetadataItem> items = metadata.getItems();
257
            for (ImageMetadataItem metadataItem : items){
258
//                System.out.println(item.getKeyword() +  ":    " + item.getText());
259
                if (metadataItem instanceof GenericImageMetadataItem){
260
                    GenericImageMetadataItem item = (GenericImageMetadataItem) metadataItem;
261

    
262
                    String keyword = item.getKeyword().toLowerCase();
263
                    String value =removeQuots(item.getText());
264

    
265
                    if("keywords".equals(keyword)){
266
                        String[] splits = value.split(":");
267
                        if (splits.length == 2){
268
                            keywords.put(splits[0].trim().toLowerCase(), splits[1].trim());
269
                        }else{
270
                            logger.warn("Keyword has not correct format and can not be parsed: " + value +  "  for file " + fileName);
271
                        }
272
                    }else if ("Copyright Notice".equalsIgnoreCase(keyword)){
273
                        copyright = value;
274
                    }else if ("artist".equals(keyword)){
275
                        artistStr = value;
276
                    }else if ("date time original".equalsIgnoreCase(item.getKeyword())){
277
                        created = value;
278
                    }
279
                }
280
            }
281
        } catch (ImageReadException | IOException e1) {
282
            logger.warn("       Problem (" + e1.getMessage() + ") when reading metadata from file: " + fileName);
283
            e1.printStackTrace();
284
        }
285

    
286
        AgentBase<?> artistAgent = null;
287
        Rights right = null;
288
        DateTime createdDate = null;
289
        String locality = null;
290

    
291
        //artist
292
        if (keywords.get("photographer") != null){
293
            String artist = keywords.get("photographer");
294
            artistAgent = makePerson(artist, fileName);
295
        }
296
        if (artistStr != null){
297
            if (keywords.get("photographer") == null){
298
                artistAgent = makePerson(artistStr, fileName);
299
            }else if (!keywords.get("photographer").toLowerCase().replace(" ", "")
300
                    .contains(artistStr.toLowerCase().replace(" ", ""))){
301
                logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName);
302
            }
303
        }
304

    
305
        //locality
306
        if (keywords.get("locality") != null){
307
            locality = keywords.get("locality");
308
        }
309

    
310
        //copyright
311
        if (copyright != null){
312
            AgentBase<?> agent;
313
            if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){
314
                agent = Institution.NewNamedInstance(copyright);
315
            }else{
316
                agent = makePerson(copyright, fileName);
317
            }
318
            right = Rights.NewInstance(null, null, RightsType.COPYRIGHT());
319
            right.setAgent(agent);
320
            right = deduplicationHelper.getExistingCopyright(null, right);
321
        }
322

    
323
        //created
324
        if (created != null){
325
            DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
326
            try {
327
                createdDate = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(created);
328
            } catch (Exception e) {
329
                logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + created + " for file " + fileName);
330
            }
331
        }
332

    
333
        boolean force = !updateOnly || forceUpdate;
334
        //add to media
335
        if (artistAgent != null && (force || media.getArtist() == null)){
336
            media.setArtist(artistAgent);
337
        }
338
        if (right != null && (force || media.getRights().isEmpty())){
339
            media.removeRights(right);
340
            media.addRights(right);
341
        }
342
        if (createdDate != null && (force || media.getMediaCreated() == null)){
343
            media.setMediaCreated(TimePeriod.NewInstance(createdDate));
344
        }
345
        if (locality != null && (force || media.getDescription(Language.ENGLISH()) == null)){
346
            media.putDescription(Language.ENGLISH(), locality);
347
        }
348
    }
349

    
350
    private Person makePerson(String artist, String fileName) {
351
        artist = artist.trim();
352
        String regEx = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
353
        Matcher matcher = Pattern.compile(regEx).matcher(artist);
354
        Person person = Person.NewInstance();
355
        if (matcher.matches()){
356
            person.setGivenName(matcher.group(1).trim());
357
            person.setFamilyName(matcher.group(2).trim());
358
        }else{
359
            person.setTitleCache(artist, true);
360
            logger.warn("Person could not be parsed: " + artist + " for file " + fileName);
361
        }
362

    
363
        person = deduplicationHelper.getExistingAuthor(null, person);
364
        return person;
365
    }
366

    
367
    private String removeQuots(String text) {
368
        if (text.startsWith("'") && text.endsWith("'")){
369
            return text.substring(1, text.length() -1);
370
        }else{
371
            return text;
372
        }
373
    }
374

    
375
    private void makeTextData(String fileStr, Media media, Taxon taxon) {
376
        TaxonDescription imageGallery = taxon.getImageGallery(true);
377
        TextData textData = null;
378
        if (!imageGallery.getElements().isEmpty()){
379
            DescriptionElementBase el = imageGallery.getElements().iterator().next();
380
            if (el.isInstanceOf(TextData.class)){
381
                textData = CdmBase.deproxy(el, TextData.class);
382
            }else{
383
                logger.warn("Image gallery had non-textdata description element: " +  fileStr);
384
            }
385
        }
386
        if (textData == null){
387
            textData = TextData.NewInstance();
388
            textData.setFeature(Feature.IMAGE());
389
        }
390
        imageGallery.addElement(textData);
391
        textData.addMedia(media);
392
    }
393

    
394
    private void fillMediaWithAllRepresentations(Media media, String fullPath, String mediumPath, String smallPath, String oldFullPath){
395
        Set<MediaRepresentation> existingRepresentations = new HashSet<>(media.getRepresentations());
396
        makeMediaRepresentation(oldFullPath, media, existingRepresentations, fullPath);
397
        makeMediaRepresentation(mediumPath, media, existingRepresentations, null);
398
        makeMediaRepresentation(smallPath, media, existingRepresentations, null);
399
        if(!existingRepresentations.isEmpty()){
400
            logger.warn("Media contains existing representations which are not contained in the 3 paths: " + media.getTitleCache());
401
        }
402
    }
403

    
404
    private void makeMediaRepresentation(String uriString, Media media,
405
            Set<MediaRepresentation> existingRepresentations, String replaceUri) {
406
        MediaRepresentation existingMediaRep = getExistingMediaRepresentation(uriString, existingRepresentations);
407
        boolean readMediaData = true;
408
        MediaRepresentation newMediaRep = makeMediaRepresentation(replaceUri != null? replaceUri : uriString, readMediaData);
409
        if (existingMediaRep == null){
410
            media.addRepresentation(newMediaRep);
411
        }else{
412
            existingRepresentations.remove(existingMediaRep);
413
            mergeToExistingRepresentation(existingMediaRep, newMediaRep);
414
        }
415
    }
416

    
417
    private void mergeToExistingRepresentation(MediaRepresentation existingMediaRep, MediaRepresentation newMediaRep) {
418
        existingMediaRep.setMimeType(newMediaRep.getMimeType());
419
        existingMediaRep.setSuffix(newMediaRep.getSuffix());
420
        if(!existingMediaRep.getParts().isEmpty() && !newMediaRep.getParts().isEmpty()){
421
            MediaRepresentationPart existingPart = existingMediaRep.getParts().iterator().next();
422
            ImageFile newPart = (ImageFile)newMediaRep.getParts().iterator().next();
423
            if(existingPart.isInstanceOf(ImageFile.class)){
424
                ImageFile existingImage = CdmBase.deproxy(existingPart, ImageFile.class);
425
                existingImage.setHeight(newPart.getHeight());
426
                existingImage.setWidth(newPart.getWidth());
427
            }else{
428
                logger.warn("MediaRepresentationPart was not of type ImageFile. Height and width not merged: " + existingPart.getUri());
429
            }
430
            existingPart.setSize(newPart.getSize());
431
            existingPart.setUri(newPart.getUri());
432
        }
433
    }
434

    
435
    private MediaRepresentation getExistingMediaRepresentation(String uriString,
436
            Set<MediaRepresentation> existingRepresentations) {
437
        for (MediaRepresentation rep : existingRepresentations){
438
            for (MediaRepresentationPart part : rep.getParts()){
439
                if (part.getUri() != null && part.getUri().toString().equals(uriString)){
440
                    return rep;
441
                }
442
            }
443
        }
444
        return null;
445
    }
446

    
447
    /**
448
     * Creates
449
     * @see #READ_MEDIA_DATA
450
     * @return
451
     * @throws MalformedURLException
452
     */
453
    protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException {
454
        if( uriString == null){
455
            return null;
456
        } else {
457
            uriString = uriString.replace(" ", "%20");  //replace whitespace
458
            try {
459
                MediaRepresentation representation = makeMediaRepresentation(uriString, readMediaData);
460
                Media media = Media.NewInstance();
461
                media.addRepresentation(representation);
462

    
463
                if (uriStrThumb != null){
464
                    CdmImageInfo imageInfoThumb = null;
465
                    uriStrThumb = uriStrThumb.replace(" ", "%20");  //replace whitespace
466
                    URI uriThumb = new URI(uriStrThumb);
467
                    try {
468
                        if (readMediaData){
469
                            logger.info("Read media data from: " + uriThumb);
470
                            imageInfoThumb = CdmImageInfo.NewInstance(uriThumb, 0);
471
                        }
472
                    } catch (Exception e) {
473
                        String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " +  e.getMessage();
474
                        logger.warn(message);
475
                    }
476

    
477
                    ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb);
478
                    MediaRepresentation reprThumb = MediaRepresentation.NewInstance();
479
                    if(imageInfoThumb != null){
480
                        reprThumb.setMimeType(imageInfoThumb.getMimeType());
481
                        reprThumb.setSuffix(imageInfoThumb.getSuffix());
482
                    }
483
                    reprThumb.addRepresentationPart(imageFileFhumb);
484
                    media.addRepresentation(reprThumb);
485
                }
486

    
487
                return media;
488
            } catch (URISyntaxException e1) {
489
                String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " +  uriString;
490
                logger.warn(message);
491
                return null;
492
            }
493
        }
494
    }
495

    
496
    private MediaRepresentation makeMediaRepresentation(String uriString, boolean readMediaData) {
497

    
498
        uriString = uriString.replace(" ", "%20");  //replace whitespace
499
        CdmImageInfo imageInfo = null;
500
        URI uri;
501
        try {
502
            uri = new URI(uriString);
503
        } catch (URISyntaxException e1) {
504
            logger.error("Malformed URI. Could not create media representation: " + uriString);
505
            return null;
506
        }
507
        try {
508
            if (readMediaData){
509
                logger.info("Read media data from: " + uri);
510
                imageInfo = CdmImageInfo.NewInstance(uri, 0);
511
            }
512
        } catch (Exception e) {
513
            try {
514
                //try again
515
                imageInfo = CdmImageInfo.NewInstance(uri, 0);
516
            } catch (Exception e1) {
517
                String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " +  e1.getMessage();
518
                e1.printStackTrace();
519
                logger.warn(message);
520
            }
521
        }
522
        ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
523

    
524
        MediaRepresentation representation = MediaRepresentation.NewInstance();
525

    
526
        if(imageInfo != null){
527
            representation.setMimeType(imageInfo.getMimeType());
528
            representation.setSuffix(imageInfo.getSuffix());
529
        }
530
        representation.addRepresentationPart(imageFile);
531
        return representation;
532
    }
533

    
534
    private Map<String, Media> getAllExistingUrls(Taxon taxon) {
535
        Map<String, Media> result = new HashMap<>();
536
        Set<TaxonDescription> descriptions = taxon.getDescriptions();
537
        for (TaxonDescription td : descriptions){
538
            if (td.isImageGallery()){
539
                for (DescriptionElementBase deb : td.getElements()){
540
                    if (deb.isInstanceOf(TextData.class)){
541
                        TextData textData = CdmBase.deproxy(deb, TextData.class);
542
                        for (Media media :textData.getMedia()){
543
                            for (MediaRepresentation rep : media.getRepresentations()){
544
                                for (MediaRepresentationPart part : rep.getParts()){
545
                                    URI uri = part.getUri();
546
                                    if (uri != null){
547
                                        String uriStr = uri.toString();
548
                                        result.put(uriStr, media);
549
                                    }
550
                                }
551
                            }
552
                        }
553
                    }
554
                }
555
            }
556
        }
557
        return result;
558
    }
559

    
560
    private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) {
561

    
562
        MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
563
        taxonNameStr = adaptName(taxonNameStr);
564
        config.setTaxonNameTitle(taxonNameStr);
565
        config.setIncludeSynonyms(false);
566
        List<TaxonBase> list = app.getTaxonService().findTaxaByName(config);
567
        if (list.isEmpty()){
568
//            logger.warn("Taxon not found for media: " + taxonNameStr);
569
            taxonNameStr = taxonNameStr.replaceFirst(" ", " " + UTF8.HYBRID.toString());
570
            config.setTaxonNameTitle(taxonNameStr);
571
            list = app.getTaxonService().findTaxaByName(config);
572
            if (list.isEmpty()){
573
                return null;
574
            }else if (list.size() > 1){
575
                logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr);
576
            }
577
        }
578
        if (list.size()>1){
579
            Iterator<TaxonBase> it = list.iterator();
580
            while (it.hasNext()){
581
                Taxon next = (Taxon)it.next();
582
                if (next.getTaxonNodes().isEmpty() && !next.getTaxaForMisappliedName(true).isEmpty()){
583
                    it.remove();
584
                }
585
            }
586
            if (list.size()>1){
587
                logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node.");
588
                it = list.iterator();
589
                while (it.hasNext()){
590
                    Taxon next = (Taxon)it.next();
591
                    if (next.getTaxonNodes().isEmpty()){
592
                        it.remove();
593
                    }
594
                }
595
                if (list.size()>1){
596
                    logger.warn("Still more than 1 taxon found for media: " + taxonNameStr);
597
                }else if (list.size() < 1){
598
                    logger.warn("After removing nodeless taxa no taxon was left: " +  taxonNameStr);
599
                    return null;
600
                }
601
            }else if (list.size() < 1){
602
                logger.warn("After removing misapplications no taxon was left: " +  taxonNameStr);
603
                return null;
604
            }
605
        }
606
        TaxonBase<?> taxonBase = list.get(0);
607
        Taxon result;
608
        if (taxonBase.isInstanceOf(Synonym.class)){
609
            result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
610
        }else{
611
            result = CdmBase.deproxy(taxonBase, Taxon.class);
612
        }
613
        return result;
614
    }
615

    
616
    private String adaptName(String taxonNameStr) {
617
//        if (taxonNameStr.equals("Hypericum cerastoides")){
618
//            taxonNameStr = "Hypericum cerastioides";
619
//        }
620
        return taxonNameStr;
621
    }
622

    
623
	private void test(){
624
	    File f = new File(path);
625
	    String[] list = f.list();
626
	    List<String> fullFileNames = new ArrayList<>();
627
	    for (String fileName : list){
628
	        fullFileNames.add(path + fileName);
629
	        if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){
630
	            System.out.println(fileName);
631
	        }
632
	    }
633
	}
634

    
635
	private void updateMetadata(ICdmDataSource cdmDestination){
636
        CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
637
        TransactionStatus tx = app.startTransaction();
638

    
639
        deduplicationHelper = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewInstance(app);
640

    
641
        List<Media> list = app.getMediaService().list(Media.class, null, null, null, null);
642
        for (Media media : list){
643
            String fileName = getUrlStringForMedia(media);
644
            if (fileName.startsWith(newUrlPath)){
645
                //TODO not yet adapted to new image server URLs
646
                fileName = fileName.replace(newUrlPath, "");
647
                if (fileName.equals("Acinos_exiguus_C1.jpg")){  //for debugging only
648
//                  System.out.println(fileName);
649
                    makeMetaData(media, fileName, true);
650
                    makeTitle(media, fileName, true);
651
                }
652
            }else{
653
                logger.warn("Filename does not start with standard url path: " + fileName);
654
            }
655
        }
656

    
657
        if (testOnly){
658
            tx.setRollbackOnly();
659
        }
660
        app.commitTransaction(tx);
661
	}
662

    
663
    private String getUrlStringForMedia(Media media) {
664
        String result = null;
665
        for (MediaRepresentation rep : media.getRepresentations()){
666
            for (MediaRepresentationPart part : rep.getParts()){
667
                URI uri = part.getUri();
668
                if (uri != null){
669
                    if (result != null){
670
                        //TODO this still needs to be adapted to the 3 representations of media
671
                        logger.warn("More than 1 uri exists for media "+ media.getId());
672
                    }else{
673
                        result = uri.toString();
674
                    }
675
                }
676
            }
677
        }
678
        return result;
679
    }
680

    
681
	public static void main(String[] args) {
682
		CyprusImagesActivator me = new CyprusImagesActivator();
683
		if (update_notCreate){
684
		    me.updateMetadata(cdmDestination);
685
		}else{
686
		    me.doImport(cdmDestination);
687
		}
688
//		me.test();
689
		System.exit(0);
690
	}
691
}
(3-3/4)