Project

General

Profile

Download (21.7 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.app.cyprus;
11

    
12
import java.io.File;
13
import java.io.IOException;
14
import java.net.MalformedURLException;
15
import java.net.URI;
16
import java.net.URISyntaxException;
17
import java.util.ArrayList;
18
import java.util.HashMap;
19
import java.util.HashSet;
20
import java.util.Iterator;
21
import java.util.List;
22
import java.util.Map;
23
import java.util.Set;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26

    
27
import org.apache.log4j.Logger;
28
import org.apache.sanselan.ImageReadException;
29
import org.apache.sanselan.Sanselan;
30
import org.apache.sanselan.common.IImageMetadata;
31
import org.apache.sanselan.common.ImageMetadata.Item;
32
import org.joda.time.DateTime;
33
import org.joda.time.format.DateTimeFormat;
34
import org.joda.time.format.DateTimeFormatter;
35
import org.springframework.transaction.TransactionStatus;
36

    
37
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
38
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
39
import eu.etaxonomy.cdm.app.common.CdmDestinations;
40
import eu.etaxonomy.cdm.common.UTF8;
41
import eu.etaxonomy.cdm.common.media.ImageInfo;
42
import eu.etaxonomy.cdm.database.DbSchemaValidation;
43
import eu.etaxonomy.cdm.database.ICdmDataSource;
44
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
45
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
46
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
47
import eu.etaxonomy.cdm.model.agent.AgentBase;
48
import eu.etaxonomy.cdm.model.agent.Institution;
49
import eu.etaxonomy.cdm.model.agent.Person;
50
import eu.etaxonomy.cdm.model.common.CdmBase;
51
import eu.etaxonomy.cdm.model.common.Language;
52
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
53
import eu.etaxonomy.cdm.model.description.Feature;
54
import eu.etaxonomy.cdm.model.description.TaxonDescription;
55
import eu.etaxonomy.cdm.model.description.TextData;
56
import eu.etaxonomy.cdm.model.media.ImageFile;
57
import eu.etaxonomy.cdm.model.media.Media;
58
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
59
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
60
import eu.etaxonomy.cdm.model.media.Rights;
61
import eu.etaxonomy.cdm.model.media.RightsType;
62
import eu.etaxonomy.cdm.model.taxon.Synonym;
63
import eu.etaxonomy.cdm.model.taxon.Taxon;
64
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
65

    
66
/**
67
 * @author a.mueller
68
 * @created 16.12.2010
69
 */
70
public class CyprusImagesActivator {
71
	private static final Logger logger = Logger.getLogger(CyprusImagesActivator.class);
72

    
73
	//database validation status (create, update, validate ...)
74
    static DbSchemaValidation hbm2dll = DbSchemaValidation.VALIDATE;
75

    
76
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_dev();
77
	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_production();
78

    
79
	static boolean testOnly = false;
80
	static boolean update_notCreate = true;
81
	static boolean forceUpdate = false;
82

    
83
    private static final String path = "//media/digitalimages/EditWP6/Zypern/photos/";
84
    private static final String urlPath = "http://media.bgbm.org/erez/erez?src=EditWP6/zypern/photos/";
85

    
86
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper;
87

    
88

    
89
	private void doImport(ICdmDataSource cdmDestination){
90

    
91
		CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, hbm2dll);
92
		TransactionStatus tx = app.startTransaction();
93

    
94
		deduplicationHelper = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewInstance(app);
95

    
96
        File file = new File(path);
97
        String[] fileList = file.list();
98
        Set<String> notFound = new HashSet<>();
99

    
100
        String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)";
101
        Pattern pattern = Pattern.compile(regEx);
102

    
103
        for (String fileName : fileList){
104

    
105
            Matcher matcher = pattern.matcher(fileName);
106
            if (matcher.matches()){
107
//                System.out.println(fileName);
108
                String taxonName = matcher.group(1);
109
                taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " ");
110
                Taxon taxon = getAcceptedTaxon(app, taxonName);
111
                if (taxon == null){
112
                    if (!notFound.contains(taxonName)){
113
                        notFound.add(taxonName);
114
                        logger.warn("Taxon not found: " + taxonName);
115
                    }
116
                }else{
117
                    handleTaxon(app, taxon, fileName);
118
                }
119
            }else{
120
                if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){
121
                    logger.warn("Incorrect filename:" + fileName);
122
                }
123
            }
124
        }
125

    
126
//		app.getTaxonService().saveOrUpdate(taxaToSave);
127

    
128
		if (testOnly){
129
		    tx.setRollbackOnly();
130
		}
131
		app.commitTransaction(tx);
132
	}
133

    
134
    /**
135
     * @param app
136
     * @param taxon
137
     * @param fileName
138
     */
139
    private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) {
140
        Set<String> urlStr = getAllExistingUrls(taxon);
141
        String fullName = urlPath + fileName;
142
        if (urlStr.contains(fullName)){
143
            return;
144
        }else{
145
            addMedia(app, taxon, fileName);
146
        }
147
    }
148

    
149
    /**
150
     * @param app
151
     * @param taxon
152
     * @param fileName
153
     */
154
    private void addMedia(CdmApplicationController app, Taxon taxon, String fileName) {
155
        try {
156
            String fullName = urlPath + fileName;
157
            Media media = getImageMedia(fullName, null, true);
158
            makeMetaData(media, fileName, false);
159
            makeTitle(media, fileName, false);
160
            if (!testOnly){
161
                makeTextData(fileName, media, taxon);
162
            }
163

    
164
        } catch (Exception e) {
165
            e.printStackTrace();
166
            return;
167
        }
168
    }
169

    
170
    /**
171
     * @param media
172
     * @param fileName
173
     * @param b
174
     */
175
    private void makeTitle(Media media, String fileName, boolean updateOnly) {
176
        String title = fileName.replace("_s_"," subsp. ")
177
                .replace("_"," ").replace(".jpg","").replace(".JPG","");
178
        if ( (!updateOnly) || media.getAllTitles().isEmpty()){
179
            media.putTitle(Language.LATIN(), title);
180
        }
181
    }
182

    
183
    /**
184
     * @param media
185
     */
186
    private void makeMetaData(Media media, String fileName, boolean updateOnly) {
187

    
188
        File file = new File(path + fileName);
189
        if (!file.exists()){
190
            logger.warn("File for filename " +  fileName + " does not exist.");
191
            return;
192
        }
193

    
194
        Map<String, String> keywords = new HashMap<>();
195
        String copyright = null;
196
        String artistStr = null;
197
        String created = null;
198
        try{
199
            IImageMetadata metadata = Sanselan.getMetadata(file);
200
            ArrayList<?> items = metadata.getItems();
201
            for (Object object : items){
202
                Item item = (Item) object;
203
//                System.out.println(item.getKeyword() +  ":    " + item.getText());
204
                String keyword = item.getKeyword().toLowerCase();
205
                String value =removeQuots(item.getText());
206

    
207
                if("keywords".equals(keyword)){
208
                    String[] splits = value.split(":");
209
                    if (splits.length == 2){
210
                        keywords.put(splits[0].trim().toLowerCase(), splits[1].trim());
211
                    }else{
212
                        logger.warn("Keyword has not correct format and can not be parsed: " + value +  "  for file " + fileName);
213
                    }
214
                }else if ("Copyright Notice".equalsIgnoreCase(keyword)){
215
                    copyright = value;
216
                }else if ("artist".equals(keyword)){
217
                    artistStr = value;
218
                }else if ("date time original".equalsIgnoreCase(item.getKeyword())){
219
                    created = value;
220
                }
221
            }
222
        } catch (ImageReadException | IOException e1) {
223
            logger.warn("       Problem (" + e1.getMessage() + ") when reading metadata from file: " + fileName);
224
        }
225

    
226

    
227
        AgentBase<?> artistAgent = null;
228
        Rights right = null;
229
        DateTime createdDate = null;
230
        String locality = null;
231

    
232
        //artist
233
        if (keywords.get("photographer") != null){
234
            String artist = keywords.get("photographer");
235
            artistAgent = makePerson(artist, fileName);
236
        }
237
        if (artistStr != null){
238
            if (keywords.get("photographer") == null){
239
                artistAgent = makePerson(artistStr, fileName);
240
            }else if (!keywords.get("photographer").toLowerCase().replace(" ", "")
241
                    .contains(artistStr.toLowerCase().replace(" ", ""))){
242
                logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName);
243
            }
244
        }
245

    
246
        //locality
247
        if (keywords.get("locality") != null){
248
            locality = keywords.get("locality");
249
        }
250

    
251
        //copyright
252
        if (copyright != null){
253
            AgentBase<?> agent;
254
            if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){
255
                agent = Institution.NewNamedInstance(copyright);
256
            }else{
257
                agent = makePerson(copyright, fileName);
258
            }
259
            right = Rights.NewInstance(null, null, RightsType.COPYRIGHT());
260
            right.setAgent(agent);
261
            right = deduplicationHelper.getExistingCopyright(null, right);
262
        }
263

    
264
        //created
265
        if (created != null){
266
            DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
267
            try {
268
                createdDate = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(created);
269
            } catch (Exception e) {
270
                logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + created + " for file " + fileName);
271
            }
272
        }
273

    
274
        boolean force = !updateOnly || forceUpdate;
275
        //add to media
276
        if (artistAgent != null && (force || media.getArtist() == null)){
277
            media.setArtist(artistAgent);
278
        }
279
        if (right != null && (force || media.getRights().isEmpty())){
280
            media.removeRights(right);
281
            media.addRights(right);
282
        }
283
        if (createdDate != null && (force || media.getMediaCreated() == null)){
284
            media.setMediaCreated(createdDate);
285
        }
286
        if (locality != null && (force || media.getDescription(Language.ENGLISH()) == null)){
287
            media.putDescription(Language.ENGLISH(), locality);
288
        }
289
    }
290

    
291
    /**
292
     * @param artist
293
     * @return
294
     */
295
    private Person makePerson(String artist, String fileName) {
296
        artist = artist.trim();
297
        String regEx = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
298
        Matcher matcher = Pattern.compile(regEx).matcher(artist);
299
        Person person = Person.NewInstance();
300
        if (matcher.matches()){
301
            person.setFirstname(matcher.group(1).trim());
302
            person.setLastname(matcher.group(2).trim());
303
        }else{
304
            person.setTitleCache(artist, true);
305
            logger.warn("Person could not be parsed: " + artist + " for file " + fileName);
306
        }
307

    
308
        person = (Person)deduplicationHelper.getExistingAuthor(null, person);
309
        return person;
310
    }
311

    
312
    private String removeQuots(String text) {
313
        if (text.startsWith("'") && text.endsWith("'")){
314
            return text.substring(1, text.length() -1);
315
        }else{
316
            return text;
317
        }
318
    }
319

    
320
    private void makeTextData(String fileStr, Media media, Taxon taxon) {
321
        TaxonDescription imageGallery = taxon.getImageGallery(true);
322
        TextData textData = null;
323
        if (!imageGallery.getElements().isEmpty()){
324
            DescriptionElementBase el = imageGallery.getElements().iterator().next();
325
            if (el.isInstanceOf(TextData.class)){
326
                textData = CdmBase.deproxy(el, TextData.class);
327
            }else{
328
                logger.warn("Image gallery had non-textdata description elmenet: " +  fileStr);
329
            }
330
        }
331
        if (textData == null){
332
            textData = TextData.NewInstance();
333
            textData.setFeature(Feature.IMAGE());
334
        }
335
        imageGallery.addElement(textData);
336
        textData.addMedia(media);
337
    }
338

    
339
    /**
340
     * Creates
341
     * @param uriString
342
     * @param readDataFromUrl
343
     * @see #READ_MEDIA_DATA
344
     * @return
345
     * @throws MalformedURLException
346
     */
347
    protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException {
348
        if( uriString == null){
349
            return null;
350
        } else {
351
            uriString = uriString.replace(" ", "%20");  //replace whitespace
352
            try {
353
                ImageInfo imageInfo = null;
354
                URI uri = new URI(uriString);
355

    
356
                try {
357
                    if (readMediaData){
358
                        logger.info("Read media data from: " + uri);
359
                        imageInfo = ImageInfo.NewInstance(uri, 0);
360
                    }
361
                } catch (Exception e) {
362
                    String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " +  e.getMessage();
363
                    logger.warn(message);
364
                }
365
                ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
366

    
367
                MediaRepresentation representation = MediaRepresentation.NewInstance();
368

    
369
                if(imageInfo != null){
370
                    representation.setMimeType(imageInfo.getMimeType());
371
                    representation.setSuffix(imageInfo.getSuffix());
372
                }
373
                representation.addRepresentationPart(imageFile);
374
                Media media = Media.NewInstance();
375
                media.addRepresentation(representation);
376

    
377
                if (uriStrThumb != null){
378
                    ImageInfo imageInfoThumb = null;
379
                    uriStrThumb = uriStrThumb.replace(" ", "%20");  //replace whitespace
380
                    URI uriThumb = new URI(uriStrThumb);
381
                    try {
382
                        if (readMediaData){
383
                            logger.info("Read media data from: " + uriThumb);
384
                            imageInfoThumb = ImageInfo.NewInstance(uriThumb, 0);
385
                        }
386
                    } catch (Exception e) {
387
                        String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " +  e.getMessage();
388
                        logger.warn(message);
389
                    }
390

    
391
                    ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb);
392
                    MediaRepresentation reprThumb = MediaRepresentation.NewInstance();
393
                    if(imageInfoThumb != null){
394
                        reprThumb.setMimeType(imageInfoThumb.getMimeType());
395
                        reprThumb.setSuffix(imageInfoThumb.getSuffix());
396
                    }
397
                    reprThumb.addRepresentationPart(imageFileFhumb);
398
                    media.addRepresentation(reprThumb);
399
                }
400

    
401
                return media;
402
            } catch (URISyntaxException e1) {
403
                String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " +  uriString;
404
                logger.warn(message);
405
                return null;
406
            }
407
        }
408
    }
409

    
410
    /**
411
     * @param taxon
412
     * @return
413
     */
414
    private Set<String> getAllExistingUrls(Taxon taxon) {
415
        Set<String> result = new HashSet<>();
416
        Set<TaxonDescription> descriptions = taxon.getDescriptions();
417
        for (TaxonDescription td : descriptions){
418
            if (td.isImageGallery()){
419
                for (DescriptionElementBase deb : td.getElements()){
420
                    if (deb.isInstanceOf(TextData.class)){
421
                        TextData textData = CdmBase.deproxy(deb, TextData.class);
422
                        for (Media media :textData.getMedia()){
423
                            for (MediaRepresentation rep : media.getRepresentations()){
424
                                for (MediaRepresentationPart part : rep.getParts()){
425
                                    URI uri = part.getUri();
426
                                    if (uri != null){
427
                                        String uriStr = uri.toString();
428
                                        result.add(uriStr);
429
                                    }
430
                                }
431
                            }
432
                        }
433

    
434
                    }
435
                }
436
            }
437
        }
438
        return result;
439
    }
440

    
441
    private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) {
442

    
443
        MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
444
        taxonNameStr = adaptName(taxonNameStr);
445
        config.setTaxonNameTitle(taxonNameStr);
446
        config.setIncludeSynonyms(false);
447
        List<TaxonBase> list = app.getTaxonService().findTaxaByName(config);
448
        if (list.isEmpty()){
449
//            logger.warn("Taxon not found for media: " + taxonNameStr);
450
            taxonNameStr = taxonNameStr.replaceFirst(" ", UTF8.HYBRID.toString() + " ");
451
            list = app.getTaxonService().findTaxaByName(config);
452
            if (list.isEmpty()){
453
                return null;
454
            }else if (list.size() > 1){
455
                logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr);
456
            }
457
        }
458
        if (list.size()>1){
459
            Iterator<TaxonBase> it = list.iterator();
460
            while (it.hasNext()){
461
                Taxon next = (Taxon)it.next();
462
                if (next.getTaxonNodes().isEmpty() && !next.getTaxonForMisappliedName().isEmpty()){
463
                    it.remove();
464
                }
465
            }
466
            if (list.size()>1){
467
                logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node.");
468
                it = list.iterator();
469
                while (it.hasNext()){
470
                    Taxon next = (Taxon)it.next();
471
                    if (next.getTaxonNodes().isEmpty()){
472
                        it.remove();
473
                    }
474
                }
475
                if (list.size()>1){
476
                    logger.warn("Still more than 1 taxon found for media: " + taxonNameStr);
477
                }else if (list.size() < 1){
478
                    logger.warn("After removing nodeless taxa no taxon was left: " +  taxonNameStr);
479
                    return null;
480
                }
481
            }else if (list.size() < 1){
482
                logger.warn("After removing misapplications no taxon was left: " +  taxonNameStr);
483
                return null;
484
            }
485
        }
486
        TaxonBase<?> taxonBase = list.get(0);
487
        Taxon result;
488
        if (taxonBase.isInstanceOf(Synonym.class)){
489
            result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
490
        }else{
491
            result = CdmBase.deproxy(taxonBase, Taxon.class);
492
        }
493
        return result;
494
    }
495

    
496
    /**
497
     * @param taxonNameStr
498
     * @return
499
     */
500
    private String adaptName(String taxonNameStr) {
501
//        if (taxonNameStr.equals("Hypericum cerastoides")){
502
//            taxonNameStr = "Hypericum cerastioides";
503
//        }
504
        return taxonNameStr;
505
    }
506

    
507
	private void test(){
508
	    File f = new File(path);
509
	    String[] list = f.list();
510
	    List<String> fullFileNames = new ArrayList<>();
511
	    for (String fileName : list){
512
	        fullFileNames.add(path + fileName);
513
	        if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){
514
	            System.out.println(fileName);
515
	        }
516
	    }
517
	}
518

    
519
	private void updateMetadata(ICdmDataSource cdmDestination){
520
        CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, hbm2dll);
521
        TransactionStatus tx = app.startTransaction();
522

    
523
        deduplicationHelper = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewInstance(app);
524

    
525
        List<Media> list = app.getMediaService().list(Media.class, null, null, null, null);
526
        for (Media media : list){
527
            String fileName = getUrlStringForMedia(media);
528
            if (fileName.startsWith(urlPath)){
529
                fileName = fileName.replace(urlPath, "");
530
//                System.out.println(fileName);
531
//                makeMetaData(media, fileName, true);
532
                makeTitle(media, fileName, true);
533
            }else{
534
                logger.warn("Filename does not start with standard url path: " + fileName);
535
            }
536
        }
537

    
538
        if (testOnly){
539
            tx.setRollbackOnly();
540
        }
541
        app.commitTransaction(tx);
542

    
543
	}
544

    
545
	/**
546
     * @param media
547
	 * @return
548
     */
549
    private String getUrlStringForMedia(Media media) {
550
        String result = null;
551
        for (MediaRepresentation rep : media.getRepresentations()){
552
            for (MediaRepresentationPart part : rep.getParts()){
553
                URI uri = part.getUri();
554
                if (uri != null){
555
                    if (result != null){
556
                        logger.warn("More than 1 uri exists for media "+ media.getId());
557
                    }else{
558
                        result = uri.toString();
559
                    }
560
                }
561
            }
562
        }
563
        return result;
564
    }
565

    
566
    /**
567
	 * @param args
568
	 */
569
	public static void main(String[] args) {
570
		CyprusImagesActivator me = new CyprusImagesActivator();
571
		if (update_notCreate){
572
		    me.updateMetadata(cdmDestination);
573
		}else{
574
		    me.doImport(cdmDestination);
575
		}
576
//		me.test();
577
		System.exit(0);
578
	}
579

    
580
}
(3-3/4)