Project

General

Profile

Download (19 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.app.cyprus;
11

    
12
import java.io.File;
13
import java.io.IOException;
14
import java.net.MalformedURLException;
15
import java.net.URI;
16
import java.net.URISyntaxException;
17
import java.util.ArrayList;
18
import java.util.HashMap;
19
import java.util.HashSet;
20
import java.util.Iterator;
21
import java.util.List;
22
import java.util.Map;
23
import java.util.Set;
24
import java.util.regex.Matcher;
25
import java.util.regex.Pattern;
26

    
27
import org.apache.log4j.Logger;
28
import org.apache.sanselan.ImageReadException;
29
import org.apache.sanselan.Sanselan;
30
import org.apache.sanselan.common.IImageMetadata;
31
import org.apache.sanselan.common.ImageMetadata.Item;
32
import org.joda.time.DateTime;
33
import org.joda.time.format.DateTimeFormat;
34
import org.joda.time.format.DateTimeFormatter;
35
import org.springframework.transaction.TransactionStatus;
36

    
37
import eu.etaxonomy.cdm.api.application.CdmApplicationController;
38
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
39
import eu.etaxonomy.cdm.app.common.CdmDestinations;
40
import eu.etaxonomy.cdm.common.UTF8;
41
import eu.etaxonomy.cdm.common.media.ImageInfo;
42
import eu.etaxonomy.cdm.database.DbSchemaValidation;
43
import eu.etaxonomy.cdm.database.ICdmDataSource;
44
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
45
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
46
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
47
import eu.etaxonomy.cdm.model.agent.AgentBase;
48
import eu.etaxonomy.cdm.model.agent.Institution;
49
import eu.etaxonomy.cdm.model.agent.Person;
50
import eu.etaxonomy.cdm.model.common.CdmBase;
51
import eu.etaxonomy.cdm.model.common.Language;
52
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
53
import eu.etaxonomy.cdm.model.description.Feature;
54
import eu.etaxonomy.cdm.model.description.TaxonDescription;
55
import eu.etaxonomy.cdm.model.description.TextData;
56
import eu.etaxonomy.cdm.model.media.ImageFile;
57
import eu.etaxonomy.cdm.model.media.Media;
58
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
59
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart;
60
import eu.etaxonomy.cdm.model.media.Rights;
61
import eu.etaxonomy.cdm.model.media.RightsType;
62
import eu.etaxonomy.cdm.model.taxon.Synonym;
63
import eu.etaxonomy.cdm.model.taxon.Taxon;
64
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
65

    
66
/**
67
 * @author a.mueller
68
 * @created 16.12.2010
69
 */
70
public class CyprusImagesActivator {
71
	private static final Logger logger = Logger.getLogger(CyprusImagesActivator.class);
72

    
73
	//database validation status (create, update, validate ...)
74
    static DbSchemaValidation hbm2dll = DbSchemaValidation.VALIDATE;
75

    
76
//	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_dev();
77
	static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_production();
78

    
79
	boolean testOnly = false;
80

    
81
    private static final String path = "//media/digitalimages/EditWP6/Zypern/photos/";
82
    private static final String urlPath = "http://media.bgbm.org/erez/erez?src=EditWP6/zypern/photos/";
83

    
84
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper;
85

    
86

    
87
	private void doImport(ICdmDataSource cdmDestination){
88

    
89
		CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, hbm2dll);
90
		TransactionStatus tx = app.startTransaction();
91

    
92
		deduplicationHelper = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewInstance(app);
93

    
94
        File file = new File(path);
95
        String[] fileList = file.list();
96
        Set<String> notFound = new HashSet<>();
97

    
98
        String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)";
99
        Pattern pattern = Pattern.compile(regEx);
100

    
101
        for (String fileName : fileList){
102
            Matcher matcher = pattern.matcher(fileName);
103
            if (matcher.matches()){
104
//                System.out.println(fileName);
105
                String taxonName = matcher.group(1);
106
                taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " ");
107
                Taxon taxon = getAcceptedTaxon(app, taxonName);
108
                if (taxon == null){
109
                    if (!notFound.contains(taxonName)){
110
                        notFound.add(taxonName);
111
                        logger.warn("Taxon not found: " + taxonName);
112
                    }
113
                }else{
114
                    handleTaxon(app, taxon, fileName);
115
                }
116
            }else{
117
                if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){
118
                    logger.warn("Incorrect filename:" + fileName);
119
                }
120
            }
121
        }
122

    
123
//		app.getTaxonService().saveOrUpdate(taxaToSave);
124

    
125
		if (testOnly){
126
		    tx.setRollbackOnly();
127
		}
128
		app.commitTransaction(tx);
129
	}
130

    
131
    /**
132
     * @param app
133
     * @param taxon
134
     * @param fileName
135
     */
136
    private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) {
137
        Set<String> urlStr = getAllExistingUrls(taxon);
138
        String fullName = urlPath + fileName;
139
        if (urlStr.contains(fullName)){
140
            return;
141
        }else{
142
            addMedia(app, taxon, fileName);
143
        }
144
    }
145

    
146
    /**
147
     * @param app
148
     * @param taxon
149
     * @param fileName
150
     */
151
    private void addMedia(CdmApplicationController app, Taxon taxon, String fileName) {
152
        try {
153
            String fullName = urlPath + fileName;
154
            Media media = getImageMedia(fullName, null, true);
155
            makeMetaData(media, fileName);
156
            String title = fileName.replace("_s_"," subsp. ")
157
                    .replace("_"," ").replace(".jpg","").replace(".JPG","");
158
            media.putTitle(Language.LATIN(), title);
159
            if (!testOnly){
160
                makeTextData(fileName, media, taxon);
161
            }
162

    
163
        } catch (Exception e) {
164
            e.printStackTrace();
165
            return;
166
        }
167
    }
168

    
169
    /**
170
     * @param media
171
     */
172
    private void makeMetaData(Media media, String fileName) {
173
        //image metadata
174

    
175
        Map<String, String> keywords = new HashMap<>();
176
        File file = new File(path + fileName);
177
        String copyright = null;
178
        String artistStr = null;
179
        try{
180
            IImageMetadata metadata = Sanselan.getMetadata(file);
181
            ArrayList<?> items = metadata.getItems();
182
            for (Object object : items){
183
                Item item = (Item) object;
184
//                System.out.println(item.getKeyword() +  ":    " + item.getText());
185
                String keyword = item.getKeyword().toLowerCase();
186
                String value =removeQuots(item.getText());
187

    
188
                if("keywords".equals(keyword)){
189
                    String[] splits = value.split(":");
190
                    if (splits.length == 2){
191
                        keywords.put(splits[0].trim().toLowerCase(), splits[1].trim());
192
                    }else{
193
                        logger.warn("Keyword has not correct format and can not be parsed: " + value +  "  for file " + fileName);
194
                    }
195
                }else if ("Copyright Notice".equalsIgnoreCase(keyword)){
196
                    copyright = value;
197
                }else if ("artist".equals(keyword)){
198
                    artistStr = value;
199
                }else if ("date time original".equalsIgnoreCase(item.getKeyword())){
200
                    DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
201
                    DateTime created;
202
                    try {
203
                        created = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(value);
204
                        media.setMediaCreated(created);
205
                    } catch (Exception e) {
206
                        logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + value + " for file " + fileName);
207
                    }
208
                }
209
            }
210
        } catch (ImageReadException | IOException e1) {
211
            logger.warn("       Problem (" + e1.getMessage() + ") when reading metadata from file: " + fileName);
212
        }
213
        if (keywords.get("photographer") != null){
214
            String artist = keywords.get("photographer");
215
            Person person = makePerson(artist, fileName);
216
            media.setArtist(person);
217
        }
218
        if (artistStr != null){
219
            if (keywords.get("photographer") == null){
220
                Person person = makePerson(artistStr, fileName);
221
                media.setArtist(person);
222
            }else if (!keywords.get("photographer").toLowerCase().replace(" ", "")
223
                    .contains(artistStr.toLowerCase().replace(" ", ""))){
224
                logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName);
225
            }
226
        }
227
        if (keywords.get("locality") != null){
228
            String locality = keywords.get("locality");
229
            media.putDescription(Language.ENGLISH(), locality);
230
        }
231
        if (copyright != null){
232
            if (rightsMap.get(copyright)!= null){
233
                media.addRights(rightsMap.get(copyright));
234
            }else{
235
                AgentBase<?> agent;
236
                if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){
237
                    agent = Institution.NewNamedInstance(copyright);
238
                }else{
239
                    agent = makePerson(copyright, fileName);
240
                }
241
                Rights r = Rights.NewInstance(null, null, RightsType.COPYRIGHT());
242
                r.setAgent(agent);
243
                media.addRights(r);
244
                rightsMap.put(copyright, r);
245
            }
246
        }
247
    }
248

    
249
    private static Map<String, Rights> rightsMap = new HashMap<>();
250

    
251
    /**
252
     * @param artist
253
     * @return
254
     */
255
    private Person makePerson(String artist, String fileName) {
256
        artist = artist.trim();
257
        String regEx = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
258
        Matcher matcher = Pattern.compile(regEx).matcher(artist);
259
        Person person = Person.NewInstance();
260
        if (matcher.matches()){
261
            person.setFirstname(matcher.group(1).trim());
262
            person.setLastname(matcher.group(2).trim());
263
        }else{
264
            person.setTitleCache(artist, true);
265
            logger.warn("Person could not be parsed: " + artist + " for file " + fileName);
266
        }
267

    
268
        person = (Person)deduplicationHelper.getExistingAuthor(null, person);
269
        return person;
270
    }
271

    
272
    private String removeQuots(String text) {
273
        if (text.startsWith("'") && text.endsWith("'")){
274
            return text.substring(1, text.length() -1);
275
        }else{
276
            return text;
277
        }
278
    }
279

    
280
    private void makeTextData(String fileStr, Media media, Taxon taxon) {
281
        TaxonDescription imageGallery = taxon.getImageGallery(true);
282
        TextData textData = null;
283
        if (!imageGallery.getElements().isEmpty()){
284
            DescriptionElementBase el = imageGallery.getElements().iterator().next();
285
            if (el.isInstanceOf(TextData.class)){
286
                textData = CdmBase.deproxy(el, TextData.class);
287
            }else{
288
                logger.warn("Image gallery had non-textdata description elmenet: " +  fileStr);
289
            }
290
        }
291
        if (textData == null){
292
            textData = TextData.NewInstance();
293
            textData.setFeature(Feature.IMAGE());
294
        }
295
        imageGallery.addElement(textData);
296
        textData.addMedia(media);
297
    }
298

    
299
    /**
300
     * Creates
301
     * @param uriString
302
     * @param readDataFromUrl
303
     * @see #READ_MEDIA_DATA
304
     * @return
305
     * @throws MalformedURLException
306
     */
307
    protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException {
308
        if( uriString == null){
309
            return null;
310
        } else {
311
            uriString = uriString.replace(" ", "%20");  //replace whitespace
312
            try {
313
                ImageInfo imageInfo = null;
314
                URI uri = new URI(uriString);
315

    
316
                try {
317
                    if (readMediaData){
318
                        logger.info("Read media data from: " + uri);
319
                        imageInfo = ImageInfo.NewInstance(uri, 0);
320
                    }
321
                } catch (Exception e) {
322
                    String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " +  e.getMessage();
323
                    logger.warn(message);
324
                }
325
                ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
326

    
327
                MediaRepresentation representation = MediaRepresentation.NewInstance();
328

    
329
                if(imageInfo != null){
330
                    representation.setMimeType(imageInfo.getMimeType());
331
                    representation.setSuffix(imageInfo.getSuffix());
332
                }
333
                representation.addRepresentationPart(imageFile);
334
                Media media = Media.NewInstance();
335
                media.addRepresentation(representation);
336

    
337
                if (uriStrThumb != null){
338
                    ImageInfo imageInfoThumb = null;
339
                    uriStrThumb = uriStrThumb.replace(" ", "%20");  //replace whitespace
340
                    URI uriThumb = new URI(uriStrThumb);
341
                    try {
342
                        if (readMediaData){
343
                            logger.info("Read media data from: " + uriThumb);
344
                            imageInfoThumb = ImageInfo.NewInstance(uriThumb, 0);
345
                        }
346
                    } catch (Exception e) {
347
                        String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " +  e.getMessage();
348
                        logger.warn(message);
349
                    }
350

    
351
                    ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb);
352
                    MediaRepresentation reprThumb = MediaRepresentation.NewInstance();
353
                    if(imageInfoThumb != null){
354
                        reprThumb.setMimeType(imageInfoThumb.getMimeType());
355
                        reprThumb.setSuffix(imageInfoThumb.getSuffix());
356
                    }
357
                    reprThumb.addRepresentationPart(imageFileFhumb);
358
                    media.addRepresentation(reprThumb);
359
                }
360

    
361
                return media;
362
            } catch (URISyntaxException e1) {
363
                String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " +  uriString;
364
                logger.warn(message);
365
                return null;
366
            }
367
        }
368
    }
369

    
370
    /**
371
     * @param taxon
372
     * @return
373
     */
374
    private Set<String> getAllExistingUrls(Taxon taxon) {
375
        Set<String> result = new HashSet<>();
376
        Set<TaxonDescription> descriptions = taxon.getDescriptions();
377
        for (TaxonDescription td : descriptions){
378
            if (td.isImageGallery()){
379
                for (DescriptionElementBase deb : td.getElements()){
380
                    if (deb.isInstanceOf(TextData.class)){
381
                        TextData textData = CdmBase.deproxy(deb, TextData.class);
382
                        for (Media media :textData.getMedia()){
383
                            for (MediaRepresentation rep : media.getRepresentations()){
384
                                for (MediaRepresentationPart part : rep.getParts()){
385
                                    URI uri = part.getUri();
386
                                    if (uri != null){
387
                                        String uriStr = uri.toString();
388
                                        result.add(uriStr);
389
                                    }
390
                                }
391
                            }
392
                        }
393

    
394
                    }
395
                }
396
            }
397
        }
398
        return result;
399
    }
400

    
401
    private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) {
402

    
403
        MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
404
        taxonNameStr = adaptName(taxonNameStr);
405
        config.setTaxonNameTitle(taxonNameStr);
406
        config.setIncludeSynonyms(false);
407
        List<TaxonBase> list = app.getTaxonService().findTaxaByName(config);
408
        if (list.isEmpty()){
409
//            logger.warn("Taxon not found for media: " + taxonNameStr);
410
            taxonNameStr = taxonNameStr.replaceFirst(" ", UTF8.HYBRID.toString() + " ");
411
            list = app.getTaxonService().findTaxaByName(config);
412
            if (list.isEmpty()){
413
                return null;
414
            }else if (list.size() > 1){
415
                logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr);
416
            }
417
        }
418
        if (list.size()>1){
419
            Iterator<TaxonBase> it = list.iterator();
420
            while (it.hasNext()){
421
                Taxon next = (Taxon)it.next();
422
                if (next.getTaxonNodes().isEmpty() && !next.getTaxonForMisappliedName().isEmpty()){
423
                    it.remove();
424
                }
425
            }
426
            if (list.size()>1){
427
                logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node.");
428
                it = list.iterator();
429
                while (it.hasNext()){
430
                    Taxon next = (Taxon)it.next();
431
                    if (next.getTaxonNodes().isEmpty()){
432
                        it.remove();
433
                    }
434
                }
435
                if (list.size()>1){
436
                    logger.warn("Still more than 1 taxon found for media: " + taxonNameStr);
437
                }else if (list.size() < 1){
438
                    logger.warn("After removing nodeless taxa no taxon was left: " +  taxonNameStr);
439
                    return null;
440
                }
441
            }else if (list.size() < 1){
442
                logger.warn("After removing misapplications no taxon was left: " +  taxonNameStr);
443
                return null;
444
            }
445
        }
446
        TaxonBase<?> taxonBase = list.get(0);
447
        Taxon result;
448
        if (taxonBase.isInstanceOf(Synonym.class)){
449
            result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
450
        }else{
451
            result = CdmBase.deproxy(taxonBase, Taxon.class);
452
        }
453
        return result;
454
    }
455

    
456
    /**
457
     * @param taxonNameStr
458
     * @return
459
     */
460
    private String adaptName(String taxonNameStr) {
461
//        if (taxonNameStr.equals("Hypericum cerastoides")){
462
//            taxonNameStr = "Hypericum cerastioides";
463
//        }
464
        return taxonNameStr;
465
    }
466

    
467
	public void test(){
468
	    File f = new File(path);
469
	    String[] list = f.list();
470
	    List<String> fullFileNames = new ArrayList<>();
471
	    for (String fileName : list){
472
	        fullFileNames.add(path + fileName);
473
	        if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){
474
	            System.out.println(fileName);
475
	        }
476
	    }
477
	}
478

    
479
	/**
480
	 * @param args
481
	 */
482
	public static void main(String[] args) {
483
		CyprusImagesActivator me = new CyprusImagesActivator();
484
		me.doImport(cdmDestination);
485
//		me.test();
486
		System.exit(0);
487
	}
488

    
489
}
(3-3/4)