Project

General

Profile

Download (22.3 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.app.cyprus;
11

    
12
import java.io.File;
13
import java.io.IOException;
14
import java.net.MalformedURLException;
15
import java.net.URI;
16
import java.net.URISyntaxException;
17
import java.util.ArrayList;
18
import java.util.HashMap;
19
import java.util.HashSet;
20
import java.util.Iterator;
21
import java.util.List;
22
import java.util.Map;
23
import java.util.Set;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26

    
27
import org.apache.commons.imaging.ImageReadException;
28
import org.apache.commons.imaging.Imaging;
29
import org.apache.commons.imaging.common.GenericImageMetadata.GenericImageMetadataItem;
30
import org.apache.commons.imaging.common.ImageMetadata;
31
import org.apache.commons.imaging.common.ImageMetadata.ImageMetadataItem;
32
import org.apache.log4j.Logger;
33
import org.joda.time.DateTime;
34
import org.joda.time.format.DateTimeFormat;
35
import org.joda.time.format.DateTimeFormatter;
36
import org.springframework.transaction.TransactionStatus;
37

    
38
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
39
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
40
import eu.etaxonomy.cdm.app.common.CdmDestinations;
41
import eu.etaxonomy.cdm.common.UTF8;
42
import eu.etaxonomy.cdm.common.media.ImageInfo;
43
import eu.etaxonomy.cdm.database.DbSchemaValidation;
44
import eu.etaxonomy.cdm.database.ICdmDataSource;
45
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
46
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
47
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
48
import eu.etaxonomy.cdm.model.agent.AgentBase;
49
import eu.etaxonomy.cdm.model.agent.Institution;
50
import eu.etaxonomy.cdm.model.agent.Person;
51
import eu.etaxonomy.cdm.model.common.CdmBase;
52
import eu.etaxonomy.cdm.model.common.Language;
53
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
54
import eu.etaxonomy.cdm.model.description.Feature;
55
import eu.etaxonomy.cdm.model.description.TaxonDescription;
56
import eu.etaxonomy.cdm.model.description.TextData;
57
import eu.etaxonomy.cdm.model.media.ImageFile;
58
import eu.etaxonomy.cdm.model.media.Media;
59
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
60
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
61
import eu.etaxonomy.cdm.model.media.Rights;
62
import eu.etaxonomy.cdm.model.media.RightsType;
63
import eu.etaxonomy.cdm.model.taxon.Synonym;
64
import eu.etaxonomy.cdm.model.taxon.Taxon;
65
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
66

    
67
/**
68
 * @author a.mueller
69
 * @since 05.2017
70
 */
71
public class CyprusImagesActivator {
72
	private static final Logger logger = Logger.getLogger(CyprusImagesActivator.class);
73

    
74

    
75
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_dev();
76
	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_production();
77

    
78
	static boolean testOnly = false;
79
	static boolean update_notCreate = false;
80
	//if true, data will always be updated, if false, only missing data will be updated
81
	static boolean forceUpdate = false;
82

    
83
    private static final String path = "//media/digitalimages/EditWP6/Zypern/photos/";
84
    private static final String urlPath = "http://media.bgbm.org/erez/erez?src=EditWP6/zypern/photos/";
85

    
86
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper;
87

    
88

    
89
	private void doImport(ICdmDataSource cdmDestination){
90

    
91
		CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
92
		TransactionStatus tx = app.startTransaction();
93

    
94
		deduplicationHelper = (ImportDeduplicationHelper)ImportDeduplicationHelper.NewInstance(app);
95

    
96
        File file = new File(path);
97
        String[] fileList = file.list();
98
        Set<String> notFound = new HashSet<>();
99

    
100
        String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)";
101
        Pattern pattern = Pattern.compile(regEx);
102

    
103
        for (String fileName : fileList){
104

    
105
            Matcher matcher = pattern.matcher(fileName);
106
            if (matcher.matches()){
107
//                System.out.println(fileName);
108
                String taxonName = matcher.group(1);
109
                taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " ");
110
                Taxon taxon = getAcceptedTaxon(app, taxonName);
111
                if (taxon == null){
112
                    if (!notFound.contains(taxonName)){
113
                        notFound.add(taxonName);
114
                        logger.warn("Taxon not found: " + taxonName);
115
                    }
116
                }else{
117
                    handleTaxon(app, taxon, fileName);
118
                }
119
            }else{
120
                if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){
121
                    logger.warn("Incorrect filename:" + fileName);
122
                }
123
            }
124
        }
125

    
126
//		app.getTaxonService().saveOrUpdate(taxaToSave);
127

    
128
		if (testOnly){
129
		    tx.setRollbackOnly();
130
		}
131
		app.commitTransaction(tx);
132
	}
133

    
134
    /**
135
     * @param app
136
     * @param taxon
137
     * @param fileName
138
     */
139
    private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) {
140
        Set<String> urlStr = getAllExistingUrls(taxon);
141
        String fullName = urlPath + fileName;
142
        if (urlStr.contains(fullName)){
143
            return;
144
        }else{
145
            addMedia(app, taxon, fileName);
146
        }
147
    }
148

    
149
    /**
150
     * @param app
151
     * @param taxon
152
     * @param fileName
153
     */
154
    private void addMedia(CdmApplicationController app, Taxon taxon, String fileName) {
155
        try {
156
            String fullName = urlPath + fileName;
157
            Media media = getImageMedia(fullName, null, true);
158
            makeMetaData(media, fileName, false);
159
            makeTitle(media, fileName, false);
160
            if (!testOnly){
161
                makeTextData(fileName, media, taxon);
162
            }
163

    
164
        } catch (Exception e) {
165
            e.printStackTrace();
166
            return;
167
        }
168
    }
169

    
170
    /**
171
     * @param media
172
     * @param fileName
173
     * @param b
174
     */
175
    private void makeTitle(Media media, String fileName, boolean updateOnly) {
176
        String title = fileName.replace("_s_"," subsp. ")
177
                .replace("_"," ").replace(".jpg","").replace(".JPG","");
178
        if ( (!updateOnly) || media.getAllTitles().isEmpty()){
179
            media.putTitle(Language.LATIN(), title);
180
        }
181
    }
182

    
183
    /**
184
     * @param media
185
     */
186
    private void makeMetaData(Media media, String fileName, boolean updateOnly) {
187

    
188
        File file = new File(path + fileName);
189
        if (!file.exists()){
190
            logger.warn("File for filename " +  fileName + " does not exist.");
191
            return;
192
        }
193

    
194
        Map<String, String> keywords = new HashMap<>();
195
        String copyright = null;
196
        String artistStr = null;
197
        String created = null;
198
        try{
199
//            IImageMetadata metadata = Sanselan.getMetadata(file);
200
            ImageMetadata metadata = Imaging.getMetadata(file);
201
            List<? extends ImageMetadataItem> items = metadata.getItems();
202
            for (Object object : items){
203
                ImageMetadataItem metadataItem = (ImageMetadataItem) object;
204
//                System.out.println(item.getKeyword() +  ":    " + item.getText());
205
                if (metadataItem instanceof GenericImageMetadataItem){
206
                    GenericImageMetadataItem item = (GenericImageMetadataItem) metadataItem;
207

    
208
                    String keyword = item.getKeyword().toLowerCase();
209
                    String value =removeQuots(item.getText());
210

    
211
                    if("keywords".equals(keyword)){
212
                        String[] splits = value.split(":");
213
                        if (splits.length == 2){
214
                            keywords.put(splits[0].trim().toLowerCase(), splits[1].trim());
215
                        }else{
216
                            logger.warn("Keyword has not correct format and can not be parsed: " + value +  "  for file " + fileName);
217
                        }
218
                    }else if ("Copyright Notice".equalsIgnoreCase(keyword)){
219
                        copyright = value;
220
                    }else if ("artist".equals(keyword)){
221
                        artistStr = value;
222
                    }else if ("date time original".equalsIgnoreCase(item.getKeyword())){
223
                        created = value;
224
                    }
225
                }
226
            }
227
        } catch (ImageReadException | IOException e1) {
228
            logger.warn("       Problem (" + e1.getMessage() + ") when reading metadata from file: " + fileName);
229
            e1.printStackTrace();
230
        }
231

    
232

    
233
        AgentBase<?> artistAgent = null;
234
        Rights right = null;
235
        DateTime createdDate = null;
236
        String locality = null;
237

    
238
        //artist
239
        if (keywords.get("photographer") != null){
240
            String artist = keywords.get("photographer");
241
            artistAgent = makePerson(artist, fileName);
242
        }
243
        if (artistStr != null){
244
            if (keywords.get("photographer") == null){
245
                artistAgent = makePerson(artistStr, fileName);
246
            }else if (!keywords.get("photographer").toLowerCase().replace(" ", "")
247
                    .contains(artistStr.toLowerCase().replace(" ", ""))){
248
                logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName);
249
            }
250
        }
251

    
252
        //locality
253
        if (keywords.get("locality") != null){
254
            locality = keywords.get("locality");
255
        }
256

    
257
        //copyright
258
        if (copyright != null){
259
            AgentBase<?> agent;
260
            if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){
261
                agent = Institution.NewNamedInstance(copyright);
262
            }else{
263
                agent = makePerson(copyright, fileName);
264
            }
265
            right = Rights.NewInstance(null, null, RightsType.COPYRIGHT());
266
            right.setAgent(agent);
267
            right = deduplicationHelper.getExistingCopyright(null, right);
268
        }
269

    
270
        //created
271
        if (created != null){
272
            DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
273
            try {
274
                createdDate = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(created);
275
            } catch (Exception e) {
276
                logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + created + " for file " + fileName);
277
            }
278
        }
279

    
280
        boolean force = !updateOnly || forceUpdate;
281
        //add to media
282
        if (artistAgent != null && (force || media.getArtist() == null)){
283
            media.setArtist(artistAgent);
284
        }
285
        if (right != null && (force || media.getRights().isEmpty())){
286
            media.removeRights(right);
287
            media.addRights(right);
288
        }
289
        if (createdDate != null && (force || media.getMediaCreated() == null)){
290
            media.setMediaCreated(createdDate);
291
        }
292
        if (locality != null && (force || media.getDescription(Language.ENGLISH()) == null)){
293
            media.putDescription(Language.ENGLISH(), locality);
294
        }
295
    }
296

    
297
    /**
298
     * @param artist
299
     * @return
300
     */
301
    private Person makePerson(String artist, String fileName) {
302
        artist = artist.trim();
303
        String regEx = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
304
        Matcher matcher = Pattern.compile(regEx).matcher(artist);
305
        Person person = Person.NewInstance();
306
        if (matcher.matches()){
307
            person.setGivenName(matcher.group(1).trim());
308
            person.setFamilyName(matcher.group(2).trim());
309
        }else{
310
            person.setTitleCache(artist, true);
311
            logger.warn("Person could not be parsed: " + artist + " for file " + fileName);
312
        }
313

    
314
        person = (Person)deduplicationHelper.getExistingAuthor(null, person);
315
        return person;
316
    }
317

    
318
    private String removeQuots(String text) {
319
        if (text.startsWith("'") && text.endsWith("'")){
320
            return text.substring(1, text.length() -1);
321
        }else{
322
            return text;
323
        }
324
    }
325

    
326
    private void makeTextData(String fileStr, Media media, Taxon taxon) {
327
        TaxonDescription imageGallery = taxon.getImageGallery(true);
328
        TextData textData = null;
329
        if (!imageGallery.getElements().isEmpty()){
330
            DescriptionElementBase el = imageGallery.getElements().iterator().next();
331
            if (el.isInstanceOf(TextData.class)){
332
                textData = CdmBase.deproxy(el, TextData.class);
333
            }else{
334
                logger.warn("Image gallery had non-textdata description element: " +  fileStr);
335
            }
336
        }
337
        if (textData == null){
338
            textData = TextData.NewInstance();
339
            textData.setFeature(Feature.IMAGE());
340
        }
341
        imageGallery.addElement(textData);
342
        textData.addMedia(media);
343
    }
344

    
345
    /**
346
     * Creates
347
     * @param uriString
348
     * @param readDataFromUrl
349
     * @see #READ_MEDIA_DATA
350
     * @return
351
     * @throws MalformedURLException
352
     */
353
    protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException {
354
        if( uriString == null){
355
            return null;
356
        } else {
357
            uriString = uriString.replace(" ", "%20");  //replace whitespace
358
            try {
359
                ImageInfo imageInfo = null;
360
                URI uri = new URI(uriString);
361

    
362
                try {
363
                    if (readMediaData){
364
                        logger.info("Read media data from: " + uri);
365
                        imageInfo = ImageInfo.NewInstance(uri, 0);
366
                    }
367
                } catch (Exception e) {
368
                    String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " +  e.getMessage();
369
                    logger.warn(message);
370
                }
371
                ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
372

    
373
                MediaRepresentation representation = MediaRepresentation.NewInstance();
374

    
375
                if(imageInfo != null){
376
                    representation.setMimeType(imageInfo.getMimeType());
377
                    representation.setSuffix(imageInfo.getSuffix());
378
                }
379
                representation.addRepresentationPart(imageFile);
380
                Media media = Media.NewInstance();
381
                media.addRepresentation(representation);
382

    
383
                if (uriStrThumb != null){
384
                    ImageInfo imageInfoThumb = null;
385
                    uriStrThumb = uriStrThumb.replace(" ", "%20");  //replace whitespace
386
                    URI uriThumb = new URI(uriStrThumb);
387
                    try {
388
                        if (readMediaData){
389
                            logger.info("Read media data from: " + uriThumb);
390
                            imageInfoThumb = ImageInfo.NewInstance(uriThumb, 0);
391
                        }
392
                    } catch (Exception e) {
393
                        String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " +  e.getMessage();
394
                        logger.warn(message);
395
                    }
396

    
397
                    ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb);
398
                    MediaRepresentation reprThumb = MediaRepresentation.NewInstance();
399
                    if(imageInfoThumb != null){
400
                        reprThumb.setMimeType(imageInfoThumb.getMimeType());
401
                        reprThumb.setSuffix(imageInfoThumb.getSuffix());
402
                    }
403
                    reprThumb.addRepresentationPart(imageFileFhumb);
404
                    media.addRepresentation(reprThumb);
405
                }
406

    
407
                return media;
408
            } catch (URISyntaxException e1) {
409
                String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " +  uriString;
410
                logger.warn(message);
411
                return null;
412
            }
413
        }
414
    }
415

    
416
    /**
417
     * @param taxon
418
     * @return
419
     */
420
    private Set<String> getAllExistingUrls(Taxon taxon) {
421
        Set<String> result = new HashSet<>();
422
        Set<TaxonDescription> descriptions = taxon.getDescriptions();
423
        for (TaxonDescription td : descriptions){
424
            if (td.isImageGallery()){
425
                for (DescriptionElementBase deb : td.getElements()){
426
                    if (deb.isInstanceOf(TextData.class)){
427
                        TextData textData = CdmBase.deproxy(deb, TextData.class);
428
                        for (Media media :textData.getMedia()){
429
                            for (MediaRepresentation rep : media.getRepresentations()){
430
                                for (MediaRepresentationPart part : rep.getParts()){
431
                                    URI uri = part.getUri();
432
                                    if (uri != null){
433
                                        String uriStr = uri.toString();
434
                                        result.add(uriStr);
435
                                    }
436
                                }
437
                            }
438
                        }
439

    
440
                    }
441
                }
442
            }
443
        }
444
        return result;
445
    }
446

    
447
    private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) {
448

    
449
        MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
450
        taxonNameStr = adaptName(taxonNameStr);
451
        config.setTaxonNameTitle(taxonNameStr);
452
        config.setIncludeSynonyms(false);
453
        List<TaxonBase> list = app.getTaxonService().findTaxaByName(config);
454
        if (list.isEmpty()){
455
//            logger.warn("Taxon not found for media: " + taxonNameStr);
456
            taxonNameStr = taxonNameStr.replaceFirst(" ", " " + UTF8.HYBRID.toString());
457
            config.setTaxonNameTitle(taxonNameStr);
458
            list = app.getTaxonService().findTaxaByName(config);
459
            if (list.isEmpty()){
460
                return null;
461
            }else if (list.size() > 1){
462
                logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr);
463
            }
464
        }
465
        if (list.size()>1){
466
            Iterator<TaxonBase> it = list.iterator();
467
            while (it.hasNext()){
468
                Taxon next = (Taxon)it.next();
469
                if (next.getTaxonNodes().isEmpty() && !next.getTaxaForMisappliedName(true).isEmpty()){
470
                    it.remove();
471
                }
472
            }
473
            if (list.size()>1){
474
                logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node.");
475
                it = list.iterator();
476
                while (it.hasNext()){
477
                    Taxon next = (Taxon)it.next();
478
                    if (next.getTaxonNodes().isEmpty()){
479
                        it.remove();
480
                    }
481
                }
482
                if (list.size()>1){
483
                    logger.warn("Still more than 1 taxon found for media: " + taxonNameStr);
484
                }else if (list.size() < 1){
485
                    logger.warn("After removing nodeless taxa no taxon was left: " +  taxonNameStr);
486
                    return null;
487
                }
488
            }else if (list.size() < 1){
489
                logger.warn("After removing misapplications no taxon was left: " +  taxonNameStr);
490
                return null;
491
            }
492
        }
493
        TaxonBase<?> taxonBase = list.get(0);
494
        Taxon result;
495
        if (taxonBase.isInstanceOf(Synonym.class)){
496
            result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
497
        }else{
498
            result = CdmBase.deproxy(taxonBase, Taxon.class);
499
        }
500
        return result;
501
    }
502

    
503
    /**
504
     * @param taxonNameStr
505
     * @return
506
     */
507
    private String adaptName(String taxonNameStr) {
508
//        if (taxonNameStr.equals("Hypericum cerastoides")){
509
//            taxonNameStr = "Hypericum cerastioides";
510
//        }
511
        return taxonNameStr;
512
    }
513

    
514
	private void test(){
515
	    File f = new File(path);
516
	    String[] list = f.list();
517
	    List<String> fullFileNames = new ArrayList<>();
518
	    for (String fileName : list){
519
	        fullFileNames.add(path + fileName);
520
	        if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){
521
	            System.out.println(fileName);
522
	        }
523
	    }
524
	}
525

    
526
	private void updateMetadata(ICdmDataSource cdmDestination){
527
        CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
528
        TransactionStatus tx = app.startTransaction();
529

    
530
        deduplicationHelper = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewInstance(app);
531

    
532
        List<Media> list = app.getMediaService().list(Media.class, null, null, null, null);
533
        for (Media media : list){
534
            String fileName = getUrlStringForMedia(media);
535
            if (fileName.startsWith(urlPath)){
536
                fileName = fileName.replace(urlPath, "");
537
                if (fileName.equals("Acinos_exiguus_C1.jpg")){  //for debugging only
538
//                  System.out.println(fileName);
539
                    makeMetaData(media, fileName, true);
540
                    makeTitle(media, fileName, true);
541
                }
542
            }else{
543
                logger.warn("Filename does not start with standard url path: " + fileName);
544
            }
545
        }
546

    
547
        if (testOnly){
548
            tx.setRollbackOnly();
549
        }
550
        app.commitTransaction(tx);
551

    
552
	}
553

    
554
	/**
555
     * @param media
556
	 * @return
557
     */
558
    private String getUrlStringForMedia(Media media) {
559
        String result = null;
560
        for (MediaRepresentation rep : media.getRepresentations()){
561
            for (MediaRepresentationPart part : rep.getParts()){
562
                URI uri = part.getUri();
563
                if (uri != null){
564
                    if (result != null){
565
                        logger.warn("More than 1 uri exists for media "+ media.getId());
566
                    }else{
567
                        result = uri.toString();
568
                    }
569
                }
570
            }
571
        }
572
        return result;
573
    }
574

    
575
    /**
576
	 * @param args
577
	 */
578
	public static void main(String[] args) {
579
		CyprusImagesActivator me = new CyprusImagesActivator();
580
		if (update_notCreate){
581
		    me.updateMetadata(cdmDestination);
582
		}else{
583
		    me.doImport(cdmDestination);
584
		}
585
//		me.test();
586
		System.exit(0);
587
	}
588

    
589
}
(3-3/4)