Project

General

Profile

Download (21.8 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.app.cyprus;
11

    
12
import java.io.File;
13
import java.io.IOException;
14
import java.net.MalformedURLException;
15
import java.net.URI;
16
import java.net.URISyntaxException;
17
import java.util.ArrayList;
18
import java.util.HashMap;
19
import java.util.HashSet;
20
import java.util.Iterator;
21
import java.util.List;
22
import java.util.Map;
23
import java.util.Set;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26

    
27
import org.apache.log4j.Logger;
28
import org.apache.sanselan.ImageReadException;
29
import org.apache.sanselan.Sanselan;
30
import org.apache.sanselan.common.IImageMetadata;
31
import org.apache.sanselan.common.ImageMetadata.Item;
32
import org.joda.time.DateTime;
33
import org.joda.time.format.DateTimeFormat;
34
import org.joda.time.format.DateTimeFormatter;
35
import org.springframework.transaction.TransactionStatus;
36

    
37
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
38
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
39
import eu.etaxonomy.cdm.app.common.CdmDestinations;
40
import eu.etaxonomy.cdm.common.UTF8;
41
import eu.etaxonomy.cdm.common.media.ImageInfo;
42
import eu.etaxonomy.cdm.database.DbSchemaValidation;
43
import eu.etaxonomy.cdm.database.ICdmDataSource;
44
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
45
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
46
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
47
import eu.etaxonomy.cdm.model.agent.AgentBase;
48
import eu.etaxonomy.cdm.model.agent.Institution;
49
import eu.etaxonomy.cdm.model.agent.Person;
50
import eu.etaxonomy.cdm.model.common.CdmBase;
51
import eu.etaxonomy.cdm.model.common.Language;
52
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
53
import eu.etaxonomy.cdm.model.description.Feature;
54
import eu.etaxonomy.cdm.model.description.TaxonDescription;
55
import eu.etaxonomy.cdm.model.description.TextData;
56
import eu.etaxonomy.cdm.model.media.ImageFile;
57
import eu.etaxonomy.cdm.model.media.Media;
58
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
59
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
60
import eu.etaxonomy.cdm.model.media.Rights;
61
import eu.etaxonomy.cdm.model.media.RightsType;
62
import eu.etaxonomy.cdm.model.taxon.Synonym;
63
import eu.etaxonomy.cdm.model.taxon.Taxon;
64
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
65

    
66
/**
67
 * @author a.mueller
68
 * @created 05.2017
69
 */
70
public class CyprusImagesActivator {
71
	private static final Logger logger = Logger.getLogger(CyprusImagesActivator.class);
72

    
73

    
74
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_dev();
75
	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_production();
76

    
77
	static boolean testOnly = true;
78
	static boolean update_notCreate = true;
79
	//if true, data will always be updated, if false, only missing data will be updated
80
	static boolean forceUpdate = false;
81

    
82
    private static final String path = "//media/digitalimages/EditWP6/Zypern/photos/";
83
    private static final String urlPath = "http://media.bgbm.org/erez/erez?src=EditWP6/zypern/photos/";
84

    
85
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper;
86

    
87

    
88
	private void doImport(ICdmDataSource cdmDestination){
89

    
90
		CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
91
		TransactionStatus tx = app.startTransaction();
92

    
93
		deduplicationHelper = (ImportDeduplicationHelper)ImportDeduplicationHelper.NewInstance(app);
94

    
95
        File file = new File(path);
96
        String[] fileList = file.list();
97
        Set<String> notFound = new HashSet<>();
98

    
99
        String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)";
100
        Pattern pattern = Pattern.compile(regEx);
101

    
102
        for (String fileName : fileList){
103

    
104
            Matcher matcher = pattern.matcher(fileName);
105
            if (matcher.matches()){
106
//                System.out.println(fileName);
107
                String taxonName = matcher.group(1);
108
                taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " ");
109
                Taxon taxon = getAcceptedTaxon(app, taxonName);
110
                if (taxon == null){
111
                    if (!notFound.contains(taxonName)){
112
                        notFound.add(taxonName);
113
                        logger.warn("Taxon not found: " + taxonName);
114
                    }
115
                }else{
116
                    handleTaxon(app, taxon, fileName);
117
                }
118
            }else{
119
                if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){
120
                    logger.warn("Incorrect filename:" + fileName);
121
                }
122
            }
123
        }
124

    
125
//		app.getTaxonService().saveOrUpdate(taxaToSave);
126

    
127
		if (testOnly){
128
		    tx.setRollbackOnly();
129
		}
130
		app.commitTransaction(tx);
131
	}
132

    
133
    /**
134
     * @param app
135
     * @param taxon
136
     * @param fileName
137
     */
138
    private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) {
139
        Set<String> urlStr = getAllExistingUrls(taxon);
140
        String fullName = urlPath + fileName;
141
        if (urlStr.contains(fullName)){
142
            return;
143
        }else{
144
            addMedia(app, taxon, fileName);
145
        }
146
    }
147

    
148
    /**
149
     * @param app
150
     * @param taxon
151
     * @param fileName
152
     */
153
    private void addMedia(CdmApplicationController app, Taxon taxon, String fileName) {
154
        try {
155
            String fullName = urlPath + fileName;
156
            Media media = getImageMedia(fullName, null, true);
157
            makeMetaData(media, fileName, false);
158
            makeTitle(media, fileName, false);
159
            if (!testOnly){
160
                makeTextData(fileName, media, taxon);
161
            }
162

    
163
        } catch (Exception e) {
164
            e.printStackTrace();
165
            return;
166
        }
167
    }
168

    
169
    /**
170
     * @param media
171
     * @param fileName
172
     * @param b
173
     */
174
    private void makeTitle(Media media, String fileName, boolean updateOnly) {
175
        String title = fileName.replace("_s_"," subsp. ")
176
                .replace("_"," ").replace(".jpg","").replace(".JPG","");
177
        if ( (!updateOnly) || media.getAllTitles().isEmpty()){
178
            media.putTitle(Language.LATIN(), title);
179
        }
180
    }
181

    
182
    /**
183
     * @param media
184
     */
185
    private void makeMetaData(Media media, String fileName, boolean updateOnly) {
186

    
187
        File file = new File(path + fileName);
188
        if (!file.exists()){
189
            logger.warn("File for filename " +  fileName + " does not exist.");
190
            return;
191
        }
192

    
193
        Map<String, String> keywords = new HashMap<>();
194
        String copyright = null;
195
        String artistStr = null;
196
        String created = null;
197
        try{
198
            IImageMetadata metadata = Sanselan.getMetadata(file);
199
            ArrayList<?> items = metadata.getItems();
200
            for (Object object : items){
201
                Item item = (Item) object;
202
//                System.out.println(item.getKeyword() +  ":    " + item.getText());
203
                String keyword = item.getKeyword().toLowerCase();
204
                String value =removeQuots(item.getText());
205

    
206
                if("keywords".equals(keyword)){
207
                    String[] splits = value.split(":");
208
                    if (splits.length == 2){
209
                        keywords.put(splits[0].trim().toLowerCase(), splits[1].trim());
210
                    }else{
211
                        logger.warn("Keyword has not correct format and can not be parsed: " + value +  "  for file " + fileName);
212
                    }
213
                }else if ("Copyright Notice".equalsIgnoreCase(keyword)){
214
                    copyright = value;
215
                }else if ("artist".equals(keyword)){
216
                    artistStr = value;
217
                }else if ("date time original".equalsIgnoreCase(item.getKeyword())){
218
                    created = value;
219
                }
220
            }
221
        } catch (ImageReadException | IOException e1) {
222
            logger.warn("       Problem (" + e1.getMessage() + ") when reading metadata from file: " + fileName);
223
        }
224

    
225

    
226
        AgentBase<?> artistAgent = null;
227
        Rights right = null;
228
        DateTime createdDate = null;
229
        String locality = null;
230

    
231
        //artist
232
        if (keywords.get("photographer") != null){
233
            String artist = keywords.get("photographer");
234
            artistAgent = makePerson(artist, fileName);
235
        }
236
        if (artistStr != null){
237
            if (keywords.get("photographer") == null){
238
                artistAgent = makePerson(artistStr, fileName);
239
            }else if (!keywords.get("photographer").toLowerCase().replace(" ", "")
240
                    .contains(artistStr.toLowerCase().replace(" ", ""))){
241
                logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName);
242
            }
243
        }
244

    
245
        //locality
246
        if (keywords.get("locality") != null){
247
            locality = keywords.get("locality");
248
        }
249

    
250
        //copyright
251
        if (copyright != null){
252
            AgentBase<?> agent;
253
            if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){
254
                agent = Institution.NewNamedInstance(copyright);
255
            }else{
256
                agent = makePerson(copyright, fileName);
257
            }
258
            right = Rights.NewInstance(null, null, RightsType.COPYRIGHT());
259
            right.setAgent(agent);
260
            right = deduplicationHelper.getExistingCopyright(null, right);
261
        }
262

    
263
        //created
264
        if (created != null){
265
            DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
266
            try {
267
                createdDate = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(created);
268
            } catch (Exception e) {
269
                logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + created + " for file " + fileName);
270
            }
271
        }
272

    
273
        boolean force = !updateOnly || forceUpdate;
274
        //add to media
275
        if (artistAgent != null && (force || media.getArtist() == null)){
276
            media.setArtist(artistAgent);
277
        }
278
        if (right != null && (force || media.getRights().isEmpty())){
279
            media.removeRights(right);
280
            media.addRights(right);
281
        }
282
        if (createdDate != null && (force || media.getMediaCreated() == null)){
283
            media.setMediaCreated(createdDate);
284
        }
285
        if (locality != null && (force || media.getDescription(Language.ENGLISH()) == null)){
286
            media.putDescription(Language.ENGLISH(), locality);
287
        }
288
    }
289

    
290
    /**
291
     * @param artist
292
     * @return
293
     */
294
    private Person makePerson(String artist, String fileName) {
295
        artist = artist.trim();
296
        String regEx = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
297
        Matcher matcher = Pattern.compile(regEx).matcher(artist);
298
        Person person = Person.NewInstance();
299
        if (matcher.matches()){
300
            person.setFirstname(matcher.group(1).trim());
301
            person.setLastname(matcher.group(2).trim());
302
        }else{
303
            person.setTitleCache(artist, true);
304
            logger.warn("Person could not be parsed: " + artist + " for file " + fileName);
305
        }
306

    
307
        person = (Person)deduplicationHelper.getExistingAuthor(null, person);
308
        return person;
309
    }
310

    
311
    private String removeQuots(String text) {
312
        if (text.startsWith("'") && text.endsWith("'")){
313
            return text.substring(1, text.length() -1);
314
        }else{
315
            return text;
316
        }
317
    }
318

    
319
    private void makeTextData(String fileStr, Media media, Taxon taxon) {
320
        TaxonDescription imageGallery = taxon.getImageGallery(true);
321
        TextData textData = null;
322
        if (!imageGallery.getElements().isEmpty()){
323
            DescriptionElementBase el = imageGallery.getElements().iterator().next();
324
            if (el.isInstanceOf(TextData.class)){
325
                textData = CdmBase.deproxy(el, TextData.class);
326
            }else{
327
                logger.warn("Image gallery had non-textdata description elmenet: " +  fileStr);
328
            }
329
        }
330
        if (textData == null){
331
            textData = TextData.NewInstance();
332
            textData.setFeature(Feature.IMAGE());
333
        }
334
        imageGallery.addElement(textData);
335
        textData.addMedia(media);
336
    }
337

    
338
    /**
339
     * Creates
340
     * @param uriString
341
     * @param readDataFromUrl
342
     * @see #READ_MEDIA_DATA
343
     * @return
344
     * @throws MalformedURLException
345
     */
346
    protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException {
347
        if( uriString == null){
348
            return null;
349
        } else {
350
            uriString = uriString.replace(" ", "%20");  //replace whitespace
351
            try {
352
                ImageInfo imageInfo = null;
353
                URI uri = new URI(uriString);
354

    
355
                try {
356
                    if (readMediaData){
357
                        logger.info("Read media data from: " + uri);
358
                        imageInfo = ImageInfo.NewInstance(uri, 0);
359
                    }
360
                } catch (Exception e) {
361
                    String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " +  e.getMessage();
362
                    logger.warn(message);
363
                }
364
                ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
365

    
366
                MediaRepresentation representation = MediaRepresentation.NewInstance();
367

    
368
                if(imageInfo != null){
369
                    representation.setMimeType(imageInfo.getMimeType());
370
                    representation.setSuffix(imageInfo.getSuffix());
371
                }
372
                representation.addRepresentationPart(imageFile);
373
                Media media = Media.NewInstance();
374
                media.addRepresentation(representation);
375

    
376
                if (uriStrThumb != null){
377
                    ImageInfo imageInfoThumb = null;
378
                    uriStrThumb = uriStrThumb.replace(" ", "%20");  //replace whitespace
379
                    URI uriThumb = new URI(uriStrThumb);
380
                    try {
381
                        if (readMediaData){
382
                            logger.info("Read media data from: " + uriThumb);
383
                            imageInfoThumb = ImageInfo.NewInstance(uriThumb, 0);
384
                        }
385
                    } catch (Exception e) {
386
                        String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " +  e.getMessage();
387
                        logger.warn(message);
388
                    }
389

    
390
                    ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb);
391
                    MediaRepresentation reprThumb = MediaRepresentation.NewInstance();
392
                    if(imageInfoThumb != null){
393
                        reprThumb.setMimeType(imageInfoThumb.getMimeType());
394
                        reprThumb.setSuffix(imageInfoThumb.getSuffix());
395
                    }
396
                    reprThumb.addRepresentationPart(imageFileFhumb);
397
                    media.addRepresentation(reprThumb);
398
                }
399

    
400
                return media;
401
            } catch (URISyntaxException e1) {
402
                String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " +  uriString;
403
                logger.warn(message);
404
                return null;
405
            }
406
        }
407
    }
408

    
409
    /**
410
     * @param taxon
411
     * @return
412
     */
413
    private Set<String> getAllExistingUrls(Taxon taxon) {
414
        Set<String> result = new HashSet<>();
415
        Set<TaxonDescription> descriptions = taxon.getDescriptions();
416
        for (TaxonDescription td : descriptions){
417
            if (td.isImageGallery()){
418
                for (DescriptionElementBase deb : td.getElements()){
419
                    if (deb.isInstanceOf(TextData.class)){
420
                        TextData textData = CdmBase.deproxy(deb, TextData.class);
421
                        for (Media media :textData.getMedia()){
422
                            for (MediaRepresentation rep : media.getRepresentations()){
423
                                for (MediaRepresentationPart part : rep.getParts()){
424
                                    URI uri = part.getUri();
425
                                    if (uri != null){
426
                                        String uriStr = uri.toString();
427
                                        result.add(uriStr);
428
                                    }
429
                                }
430
                            }
431
                        }
432

    
433
                    }
434
                }
435
            }
436
        }
437
        return result;
438
    }
439

    
440
    private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) {
441

    
442
        MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
443
        taxonNameStr = adaptName(taxonNameStr);
444
        config.setTaxonNameTitle(taxonNameStr);
445
        config.setIncludeSynonyms(false);
446
        List<TaxonBase> list = app.getTaxonService().findTaxaByName(config);
447
        if (list.isEmpty()){
448
//            logger.warn("Taxon not found for media: " + taxonNameStr);
449
            taxonNameStr = taxonNameStr.replaceFirst(" ", " " + UTF8.HYBRID.toString());
450
            config.setTaxonNameTitle(taxonNameStr);
451
            list = app.getTaxonService().findTaxaByName(config);
452
            if (list.isEmpty()){
453
                return null;
454
            }else if (list.size() > 1){
455
                logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr);
456
            }
457
        }
458
        if (list.size()>1){
459
            Iterator<TaxonBase> it = list.iterator();
460
            while (it.hasNext()){
461
                Taxon next = (Taxon)it.next();
462
                if (next.getTaxonNodes().isEmpty() && !next.getTaxonForMisappliedName().isEmpty()){
463
                    it.remove();
464
                }
465
            }
466
            if (list.size()>1){
467
                logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node.");
468
                it = list.iterator();
469
                while (it.hasNext()){
470
                    Taxon next = (Taxon)it.next();
471
                    if (next.getTaxonNodes().isEmpty()){
472
                        it.remove();
473
                    }
474
                }
475
                if (list.size()>1){
476
                    logger.warn("Still more than 1 taxon found for media: " + taxonNameStr);
477
                }else if (list.size() < 1){
478
                    logger.warn("After removing nodeless taxa no taxon was left: " +  taxonNameStr);
479
                    return null;
480
                }
481
            }else if (list.size() < 1){
482
                logger.warn("After removing misapplications no taxon was left: " +  taxonNameStr);
483
                return null;
484
            }
485
        }
486
        TaxonBase<?> taxonBase = list.get(0);
487
        Taxon result;
488
        if (taxonBase.isInstanceOf(Synonym.class)){
489
            result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
490
        }else{
491
            result = CdmBase.deproxy(taxonBase, Taxon.class);
492
        }
493
        return result;
494
    }
495

    
496
    /**
497
     * @param taxonNameStr
498
     * @return
499
     */
500
    private String adaptName(String taxonNameStr) {
501
//        if (taxonNameStr.equals("Hypericum cerastoides")){
502
//            taxonNameStr = "Hypericum cerastioides";
503
//        }
504
        return taxonNameStr;
505
    }
506

    
507
	private void test(){
508
	    File f = new File(path);
509
	    String[] list = f.list();
510
	    List<String> fullFileNames = new ArrayList<>();
511
	    for (String fileName : list){
512
	        fullFileNames.add(path + fileName);
513
	        if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){
514
	            System.out.println(fileName);
515
	        }
516
	    }
517
	}
518

    
519
	private void updateMetadata(ICdmDataSource cdmDestination){
520
        CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE);
521
        TransactionStatus tx = app.startTransaction();
522

    
523
        deduplicationHelper = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewInstance(app);
524

    
525
        List<Media> list = app.getMediaService().list(Media.class, null, null, null, null);
526
        for (Media media : list){
527
            String fileName = getUrlStringForMedia(media);
528
            if (fileName.startsWith(urlPath)){
529
                fileName = fileName.replace(urlPath, "");
530
                if (fileName.equals("Acinos_exiguus_C1.jpg")){  //for debugging only
531
//                  System.out.println(fileName);
532
                    makeMetaData(media, fileName, true);
533
                    makeTitle(media, fileName, true);
534
                }
535
            }else{
536
                logger.warn("Filename does not start with standard url path: " + fileName);
537
            }
538
        }
539

    
540
        if (testOnly){
541
            tx.setRollbackOnly();
542
        }
543
        app.commitTransaction(tx);
544

    
545
	}
546

    
547
	/**
548
     * @param media
549
	 * @return
550
     */
551
    private String getUrlStringForMedia(Media media) {
552
        String result = null;
553
        for (MediaRepresentation rep : media.getRepresentations()){
554
            for (MediaRepresentationPart part : rep.getParts()){
555
                URI uri = part.getUri();
556
                if (uri != null){
557
                    if (result != null){
558
                        logger.warn("More than 1 uri exists for media "+ media.getId());
559
                    }else{
560
                        result = uri.toString();
561
                    }
562
                }
563
            }
564
        }
565
        return result;
566
    }
567

    
568
    /**
569
	 * @param args
570
	 */
571
	public static void main(String[] args) {
572
		CyprusImagesActivator me = new CyprusImagesActivator();
573
		if (update_notCreate){
574
		    me.updateMetadata(cdmDestination);
575
		}else{
576
		    me.doImport(cdmDestination);
577
		}
578
//		me.test();
579
		System.exit(0);
580
	}
581

    
582
}
(3-3/4)