Project

General

Profile

Download (22.4 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.app.cyprus;
11

    
12
import java.io.File;
13
import java.io.IOException;
14
import java.net.MalformedURLException;
15
import java.net.URI;
16
import java.net.URISyntaxException;
17
import java.util.ArrayList;
18
import java.util.HashMap;
19
import java.util.HashSet;
20
import java.util.Iterator;
21
import java.util.List;
22
import java.util.Map;
23
import java.util.Set;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26

    
27
import org.apache.commons.imaging.ImageReadException;
28
import org.apache.commons.imaging.Imaging;
29
import org.apache.commons.imaging.common.GenericImageMetadata.GenericImageMetadataItem;
30
import org.apache.commons.imaging.common.ImageMetadata;
31
import org.apache.commons.imaging.common.ImageMetadata.ImageMetadataItem;
32
import org.apache.log4j.Logger;
33
import org.joda.time.DateTime;
34
import org.joda.time.format.DateTimeFormat;
35
import org.joda.time.format.DateTimeFormatter;
36
import org.springframework.transaction.TransactionStatus;
37

    
38
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
39
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
40
import eu.etaxonomy.cdm.app.common.CdmDestinations;
41
import eu.etaxonomy.cdm.common.UTF8;
42
import eu.etaxonomy.cdm.common.media.ImageInfo;
43
import eu.etaxonomy.cdm.database.DbSchemaValidation;
44
import eu.etaxonomy.cdm.database.ICdmDataSource;
45
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
46
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
47
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
48
import eu.etaxonomy.cdm.model.agent.AgentBase;
49
import eu.etaxonomy.cdm.model.agent.Institution;
50
import eu.etaxonomy.cdm.model.agent.Person;
51
import eu.etaxonomy.cdm.model.common.CdmBase;
52
import eu.etaxonomy.cdm.model.common.Language;
53
import eu.etaxonomy.cdm.model.common.TimePeriod;
54
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
55
import eu.etaxonomy.cdm.model.description.Feature;
56
import eu.etaxonomy.cdm.model.description.TaxonDescription;
57
import eu.etaxonomy.cdm.model.description.TextData;
58
import eu.etaxonomy.cdm.model.media.ImageFile;
59
import eu.etaxonomy.cdm.model.media.Media;
60
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
61
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
62
import eu.etaxonomy.cdm.model.media.Rights;
63
import eu.etaxonomy.cdm.model.media.RightsType;
64
import eu.etaxonomy.cdm.model.taxon.Synonym;
65
import eu.etaxonomy.cdm.model.taxon.Taxon;
66
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
67

    
68
/**
69
 * @author a.mueller
70
 * @since 05.2017
71
 */
72
public class CyprusImagesActivator {
73
	private static final Logger logger = Logger.getLogger(CyprusImagesActivator.class);
74

    
75

    
76
	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_dev();
77
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_production_cyprus();
78

    
79
	static boolean testOnly = false;
80
	static boolean update_notCreate = false;
81
	//if true, data will always be updated, if false, only missing data will be updated
82
	static boolean forceUpdate = false;
83

    
84
    private static final String path = "//media/digitalimages/EditWP6/Zypern/photos/";
85
    private static final String urlPath = "http://media.bgbm.org/erez/erez?src=EditWP6/zypern/photos/";
86

    
87
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper;
88

    
89

    
90
	private void doImport(ICdmDataSource cdmDestination){
91

    
92
		CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
93
		TransactionStatus tx = app.startTransaction();
94

    
95
		deduplicationHelper = (ImportDeduplicationHelper)ImportDeduplicationHelper.NewInstance(app);
96

    
97
        File file = new File(path);
98
        String[] fileList = file.list();
99
        Set<String> notFound = new HashSet<>();
100

    
101
        String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)";
102
        Pattern pattern = Pattern.compile(regEx);
103

    
104
        for (String fileName : fileList){
105

    
106
            Matcher matcher = pattern.matcher(fileName);
107
            if (matcher.matches()){
108
//                System.out.println(fileName);
109
                String taxonName = matcher.group(1);
110
                taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " ");
111
                Taxon taxon = getAcceptedTaxon(app, taxonName);
112
                if (taxon == null){
113
                    if (!notFound.contains(taxonName)){
114
                        notFound.add(taxonName);
115
                        logger.warn("Taxon not found: " + taxonName);
116
                    }
117
                }else{
118
                    handleTaxon(app, taxon, fileName);
119
                }
120
            }else{
121
                if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){
122
                    logger.warn("Incorrect filename:" + fileName);
123
                }
124
            }
125
        }
126

    
127
//		app.getTaxonService().saveOrUpdate(taxaToSave);
128

    
129
		if (testOnly){
130
		    tx.setRollbackOnly();
131
		}
132
		app.commitTransaction(tx);
133
	}
134

    
135
    /**
136
     * @param app
137
     * @param taxon
138
     * @param fileName
139
     */
140
    private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) {
141
        Set<String> urlStr = getAllExistingUrls(taxon);
142
        String fullName = urlPath + fileName;
143
        if (urlStr.contains(fullName)){
144
            return;
145
        }else{
146
            addMedia(app, taxon, fileName);
147
        }
148
    }
149

    
150
    /**
151
     * @param app
152
     * @param taxon
153
     * @param fileName
154
     */
155
    private void addMedia(CdmApplicationController app, Taxon taxon, String fileName) {
156
        try {
157
            String fullName = urlPath + fileName;
158
            Media media = getImageMedia(fullName, null, true);
159
            makeMetaData(media, fileName, false);
160
            makeTitle(media, fileName, false);
161
            if (!testOnly){
162
                makeTextData(fileName, media, taxon);
163
            }
164

    
165
        } catch (Exception e) {
166
            e.printStackTrace();
167
            return;
168
        }
169
    }
170

    
171
    /**
172
     * @param media
173
     * @param fileName
174
     * @param b
175
     */
176
    private void makeTitle(Media media, String fileName, boolean updateOnly) {
177
        String title = fileName.replace("_s_"," subsp. ")
178
                .replace("_"," ").replace(".jpg","").replace(".JPG","");
179
        if ( (!updateOnly) || media.getAllTitles().isEmpty()){
180
            media.putTitle(Language.LATIN(), title);
181
        }
182
    }
183

    
184
    /**
185
     * @param media
186
     */
187
    private void makeMetaData(Media media, String fileName, boolean updateOnly) {
188

    
189
        File file = new File(path + fileName);
190
        if (!file.exists()){
191
            logger.warn("File for filename " +  fileName + " does not exist.");
192
            return;
193
        }
194

    
195
        Map<String, String> keywords = new HashMap<>();
196
        String copyright = null;
197
        String artistStr = null;
198
        String created = null;
199
        try{
200
//            IImageMetadata metadata = Sanselan.getMetadata(file);
201
            ImageMetadata metadata = Imaging.getMetadata(file);
202
            List<? extends ImageMetadataItem> items = metadata.getItems();
203
            for (Object object : items){
204
                ImageMetadataItem metadataItem = (ImageMetadataItem) object;
205
//                System.out.println(item.getKeyword() +  ":    " + item.getText());
206
                if (metadataItem instanceof GenericImageMetadataItem){
207
                    GenericImageMetadataItem item = (GenericImageMetadataItem) metadataItem;
208

    
209
                    String keyword = item.getKeyword().toLowerCase();
210
                    String value =removeQuots(item.getText());
211

    
212
                    if("keywords".equals(keyword)){
213
                        String[] splits = value.split(":");
214
                        if (splits.length == 2){
215
                            keywords.put(splits[0].trim().toLowerCase(), splits[1].trim());
216
                        }else{
217
                            logger.warn("Keyword has not correct format and can not be parsed: " + value +  "  for file " + fileName);
218
                        }
219
                    }else if ("Copyright Notice".equalsIgnoreCase(keyword)){
220
                        copyright = value;
221
                    }else if ("artist".equals(keyword)){
222
                        artistStr = value;
223
                    }else if ("date time original".equalsIgnoreCase(item.getKeyword())){
224
                        created = value;
225
                    }
226
                }
227
            }
228
        } catch (ImageReadException | IOException e1) {
229
            logger.warn("       Problem (" + e1.getMessage() + ") when reading metadata from file: " + fileName);
230
            e1.printStackTrace();
231
        }
232

    
233

    
234
        AgentBase<?> artistAgent = null;
235
        Rights right = null;
236
        DateTime createdDate = null;
237
        String locality = null;
238

    
239
        //artist
240
        if (keywords.get("photographer") != null){
241
            String artist = keywords.get("photographer");
242
            artistAgent = makePerson(artist, fileName);
243
        }
244
        if (artistStr != null){
245
            if (keywords.get("photographer") == null){
246
                artistAgent = makePerson(artistStr, fileName);
247
            }else if (!keywords.get("photographer").toLowerCase().replace(" ", "")
248
                    .contains(artistStr.toLowerCase().replace(" ", ""))){
249
                logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName);
250
            }
251
        }
252

    
253
        //locality
254
        if (keywords.get("locality") != null){
255
            locality = keywords.get("locality");
256
        }
257

    
258
        //copyright
259
        if (copyright != null){
260
            AgentBase<?> agent;
261
            if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){
262
                agent = Institution.NewNamedInstance(copyright);
263
            }else{
264
                agent = makePerson(copyright, fileName);
265
            }
266
            right = Rights.NewInstance(null, null, RightsType.COPYRIGHT());
267
            right.setAgent(agent);
268
            right = deduplicationHelper.getExistingCopyright(null, right);
269
        }
270

    
271
        //created
272
        if (created != null){
273
            DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
274
            try {
275
                createdDate = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(created);
276
            } catch (Exception e) {
277
                logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + created + " for file " + fileName);
278
            }
279
        }
280

    
281
        boolean force = !updateOnly || forceUpdate;
282
        //add to media
283
        if (artistAgent != null && (force || media.getArtist() == null)){
284
            media.setArtist(artistAgent);
285
        }
286
        if (right != null && (force || media.getRights().isEmpty())){
287
            media.removeRights(right);
288
            media.addRights(right);
289
        }
290
        if (createdDate != null && (force || media.getMediaCreated() == null)){
291
            media.setMediaCreated(TimePeriod.NewInstance(createdDate));
292
        }
293
        if (locality != null && (force || media.getDescription(Language.ENGLISH()) == null)){
294
            media.putDescription(Language.ENGLISH(), locality);
295
        }
296
    }
297

    
298
    /**
299
     * @param artist
300
     * @return
301
     */
302
    private Person makePerson(String artist, String fileName) {
303
        artist = artist.trim();
304
        String regEx = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
305
        Matcher matcher = Pattern.compile(regEx).matcher(artist);
306
        Person person = Person.NewInstance();
307
        if (matcher.matches()){
308
            person.setGivenName(matcher.group(1).trim());
309
            person.setFamilyName(matcher.group(2).trim());
310
        }else{
311
            person.setTitleCache(artist, true);
312
            logger.warn("Person could not be parsed: " + artist + " for file " + fileName);
313
        }
314

    
315
        person = deduplicationHelper.getExistingAuthor(null, person);
316
        return person;
317
    }
318

    
319
    private String removeQuots(String text) {
320
        if (text.startsWith("'") && text.endsWith("'")){
321
            return text.substring(1, text.length() -1);
322
        }else{
323
            return text;
324
        }
325
    }
326

    
327
    private void makeTextData(String fileStr, Media media, Taxon taxon) {
328
        TaxonDescription imageGallery = taxon.getImageGallery(true);
329
        TextData textData = null;
330
        if (!imageGallery.getElements().isEmpty()){
331
            DescriptionElementBase el = imageGallery.getElements().iterator().next();
332
            if (el.isInstanceOf(TextData.class)){
333
                textData = CdmBase.deproxy(el, TextData.class);
334
            }else{
335
                logger.warn("Image gallery had non-textdata description element: " +  fileStr);
336
            }
337
        }
338
        if (textData == null){
339
            textData = TextData.NewInstance();
340
            textData.setFeature(Feature.IMAGE());
341
        }
342
        imageGallery.addElement(textData);
343
        textData.addMedia(media);
344
    }
345

    
346
    /**
347
     * Creates
348
     * @param uriString
349
     * @param readDataFromUrl
350
     * @see #READ_MEDIA_DATA
351
     * @return
352
     * @throws MalformedURLException
353
     */
354
    protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException {
355
        if( uriString == null){
356
            return null;
357
        } else {
358
            uriString = uriString.replace(" ", "%20");  //replace whitespace
359
            try {
360
                ImageInfo imageInfo = null;
361
                URI uri = new URI(uriString);
362

    
363
                try {
364
                    if (readMediaData){
365
                        logger.info("Read media data from: " + uri);
366
                        imageInfo = ImageInfo.NewInstance(uri, 0);
367
                    }
368
                } catch (Exception e) {
369
                    String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " +  e.getMessage();
370
                    logger.warn(message);
371
                }
372
                ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
373

    
374
                MediaRepresentation representation = MediaRepresentation.NewInstance();
375

    
376
                if(imageInfo != null){
377
                    representation.setMimeType(imageInfo.getMimeType());
378
                    representation.setSuffix(imageInfo.getSuffix());
379
                }
380
                representation.addRepresentationPart(imageFile);
381
                Media media = Media.NewInstance();
382
                media.addRepresentation(representation);
383

    
384
                if (uriStrThumb != null){
385
                    ImageInfo imageInfoThumb = null;
386
                    uriStrThumb = uriStrThumb.replace(" ", "%20");  //replace whitespace
387
                    URI uriThumb = new URI(uriStrThumb);
388
                    try {
389
                        if (readMediaData){
390
                            logger.info("Read media data from: " + uriThumb);
391
                            imageInfoThumb = ImageInfo.NewInstance(uriThumb, 0);
392
                        }
393
                    } catch (Exception e) {
394
                        String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " +  e.getMessage();
395
                        logger.warn(message);
396
                    }
397

    
398
                    ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb);
399
                    MediaRepresentation reprThumb = MediaRepresentation.NewInstance();
400
                    if(imageInfoThumb != null){
401
                        reprThumb.setMimeType(imageInfoThumb.getMimeType());
402
                        reprThumb.setSuffix(imageInfoThumb.getSuffix());
403
                    }
404
                    reprThumb.addRepresentationPart(imageFileFhumb);
405
                    media.addRepresentation(reprThumb);
406
                }
407

    
408
                return media;
409
            } catch (URISyntaxException e1) {
410
                String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " +  uriString;
411
                logger.warn(message);
412
                return null;
413
            }
414
        }
415
    }
416

    
417
    /**
418
     * @param taxon
419
     * @return
420
     */
421
    private Set<String> getAllExistingUrls(Taxon taxon) {
422
        Set<String> result = new HashSet<>();
423
        Set<TaxonDescription> descriptions = taxon.getDescriptions();
424
        for (TaxonDescription td : descriptions){
425
            if (td.isImageGallery()){
426
                for (DescriptionElementBase deb : td.getElements()){
427
                    if (deb.isInstanceOf(TextData.class)){
428
                        TextData textData = CdmBase.deproxy(deb, TextData.class);
429
                        for (Media media :textData.getMedia()){
430
                            for (MediaRepresentation rep : media.getRepresentations()){
431
                                for (MediaRepresentationPart part : rep.getParts()){
432
                                    URI uri = part.getUri();
433
                                    if (uri != null){
434
                                        String uriStr = uri.toString();
435
                                        result.add(uriStr);
436
                                    }
437
                                }
438
                            }
439
                        }
440

    
441
                    }
442
                }
443
            }
444
        }
445
        return result;
446
    }
447

    
448
    private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) {
449

    
450
        MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
451
        taxonNameStr = adaptName(taxonNameStr);
452
        config.setTaxonNameTitle(taxonNameStr);
453
        config.setIncludeSynonyms(false);
454
        List<TaxonBase> list = app.getTaxonService().findTaxaByName(config);
455
        if (list.isEmpty()){
456
//            logger.warn("Taxon not found for media: " + taxonNameStr);
457
            taxonNameStr = taxonNameStr.replaceFirst(" ", " " + UTF8.HYBRID.toString());
458
            config.setTaxonNameTitle(taxonNameStr);
459
            list = app.getTaxonService().findTaxaByName(config);
460
            if (list.isEmpty()){
461
                return null;
462
            }else if (list.size() > 1){
463
                logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr);
464
            }
465
        }
466
        if (list.size()>1){
467
            Iterator<TaxonBase> it = list.iterator();
468
            while (it.hasNext()){
469
                Taxon next = (Taxon)it.next();
470
                if (next.getTaxonNodes().isEmpty() && !next.getTaxaForMisappliedName(true).isEmpty()){
471
                    it.remove();
472
                }
473
            }
474
            if (list.size()>1){
475
                logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node.");
476
                it = list.iterator();
477
                while (it.hasNext()){
478
                    Taxon next = (Taxon)it.next();
479
                    if (next.getTaxonNodes().isEmpty()){
480
                        it.remove();
481
                    }
482
                }
483
                if (list.size()>1){
484
                    logger.warn("Still more than 1 taxon found for media: " + taxonNameStr);
485
                }else if (list.size() < 1){
486
                    logger.warn("After removing nodeless taxa no taxon was left: " +  taxonNameStr);
487
                    return null;
488
                }
489
            }else if (list.size() < 1){
490
                logger.warn("After removing misapplications no taxon was left: " +  taxonNameStr);
491
                return null;
492
            }
493
        }
494
        TaxonBase<?> taxonBase = list.get(0);
495
        Taxon result;
496
        if (taxonBase.isInstanceOf(Synonym.class)){
497
            result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
498
        }else{
499
            result = CdmBase.deproxy(taxonBase, Taxon.class);
500
        }
501
        return result;
502
    }
503

    
504
    /**
505
     * @param taxonNameStr
506
     * @return
507
     */
508
    private String adaptName(String taxonNameStr) {
509
//        if (taxonNameStr.equals("Hypericum cerastoides")){
510
//            taxonNameStr = "Hypericum cerastioides";
511
//        }
512
        return taxonNameStr;
513
    }
514

    
515
	private void test(){
516
	    File f = new File(path);
517
	    String[] list = f.list();
518
	    List<String> fullFileNames = new ArrayList<>();
519
	    for (String fileName : list){
520
	        fullFileNames.add(path + fileName);
521
	        if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){
522
	            System.out.println(fileName);
523
	        }
524
	    }
525
	}
526

    
527
	private void updateMetadata(ICdmDataSource cdmDestination){
528
        CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
529
        TransactionStatus tx = app.startTransaction();
530

    
531
        deduplicationHelper = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewInstance(app);
532

    
533
        List<Media> list = app.getMediaService().list(Media.class, null, null, null, null);
534
        for (Media media : list){
535
            String fileName = getUrlStringForMedia(media);
536
            if (fileName.startsWith(urlPath)){
537
                fileName = fileName.replace(urlPath, "");
538
                if (fileName.equals("Acinos_exiguus_C1.jpg")){  //for debugging only
539
//                  System.out.println(fileName);
540
                    makeMetaData(media, fileName, true);
541
                    makeTitle(media, fileName, true);
542
                }
543
            }else{
544
                logger.warn("Filename does not start with standard url path: " + fileName);
545
            }
546
        }
547

    
548
        if (testOnly){
549
            tx.setRollbackOnly();
550
        }
551
        app.commitTransaction(tx);
552

    
553
	}
554

    
555
	/**
556
     * @param media
557
	 * @return
558
     */
559
    private String getUrlStringForMedia(Media media) {
560
        String result = null;
561
        for (MediaRepresentation rep : media.getRepresentations()){
562
            for (MediaRepresentationPart part : rep.getParts()){
563
                URI uri = part.getUri();
564
                if (uri != null){
565
                    if (result != null){
566
                        logger.warn("More than 1 uri exists for media "+ media.getId());
567
                    }else{
568
                        result = uri.toString();
569
                    }
570
                }
571
            }
572
        }
573
        return result;
574
    }
575

    
576
    /**
577
	 * @param args
578
	 */
579
	public static void main(String[] args) {
580
		CyprusImagesActivator me = new CyprusImagesActivator();
581
		if (update_notCreate){
582
		    me.updateMetadata(cdmDestination);
583
		}else{
584
		    me.doImport(cdmDestination);
585
		}
586
//		me.test();
587
		System.exit(0);
588
	}
589

    
590
}
(3-3/4)