Project

General

Profile

Download (11.4 KB) Statistics
| Branch: | Revision:
1 91138b6c Andreas Müller
/**
2
* Copyright (C) 2007 EDIT
3 1454af38 Andreas Müller
* European Distributed Institute of Taxonomy
4 91138b6c Andreas Müller
* http://www.e-taxonomy.eu
5 1454af38 Andreas Müller
*
6 91138b6c Andreas Müller
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
10
package eu.etaxonomy.cdm.io;
11
12
import java.io.File;
13
import java.io.IOException;
14
import java.net.MalformedURLException;
15
import java.net.URL;
16
import java.util.ArrayList;
17
import java.util.HashMap;
18
import java.util.List;
19
import java.util.Map;
20
21
import org.apache.log4j.Logger;
22
import org.apache.sanselan.ImageInfo;
23
import org.apache.sanselan.ImageReadException;
24
import org.apache.sanselan.Sanselan;
25
import org.apache.sanselan.common.IImageMetadata;
26
import org.apache.sanselan.common.ImageMetadata.Item;
27
import org.apache.sanselan.formats.jpeg.JpegImageMetadata;
28
import org.springframework.stereotype.Component;
29
30
import eu.etaxonomy.cdm.app.images.AbstractImageImporter;
31 c59ecc4a Andreas Müller
import eu.etaxonomy.cdm.app.images.ImageImportState;
32 91138b6c Andreas Müller
import eu.etaxonomy.cdm.model.agent.AgentBase;
33
import eu.etaxonomy.cdm.model.agent.Person;
34
import eu.etaxonomy.cdm.model.common.CdmBase;
35
import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
36
import eu.etaxonomy.cdm.model.description.Feature;
37
import eu.etaxonomy.cdm.model.description.TaxonDescription;
38
import eu.etaxonomy.cdm.model.description.TextData;
39
import eu.etaxonomy.cdm.model.media.ImageFile;
40
import eu.etaxonomy.cdm.model.media.Media;
41
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
42
import eu.etaxonomy.cdm.model.media.Rights;
43 bb38665e Andreas Müller
import eu.etaxonomy.cdm.model.media.RightsType;
44 401fe405 Andreas Müller
import eu.etaxonomy.cdm.model.reference.Reference;
45 91138b6c Andreas Müller
import eu.etaxonomy.cdm.model.taxon.Taxon;
46
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
47
import eu.etaxonomy.cdm.strategy.match.DefaultMatchStrategy;
48 aaeb1aaa Andreas Müller
import eu.etaxonomy.cdm.strategy.match.IMatchStrategyEqual;
49 91138b6c Andreas Müller
50
/**
51
 * TODO not working at the moment
52 1454af38 Andreas Müller
 *
53 91138b6c Andreas Müller
 * @author n.hoffmann
54 a13538c8 Andreas Müller
 * @since 18.11.2008
55 91138b6c Andreas Müller
 */
56
@Component
57
public class PalmaeImageImport extends AbstractImageImporter {
58 1454af38 Andreas Müller
59
    private static final long serialVersionUID = 1226643507245147417L;
60
61
    private static final Logger logger = Logger.getLogger(PalmaeImageImport.class);
62
63 91138b6c Andreas Müller
	enum MetaData{
64
		NAME,
65
		ARTIST,
66
		COPYRIGHT,
67
		COPYRIGHTNOTICE,
68
		OBJECTNAME
69
	}
70 1454af38 Andreas Müller
71 91138b6c Andreas Müller
	private static int modCount = 300;
72
73
	private static String pluralString = "images";
74 1454af38 Andreas Müller
75 91138b6c Andreas Müller
	/**
76
	 * Rudimetary implementation using apache sanselan. This implementation depends
77
	 * on the metadata standards used in the palmae images. The IPTC field ObjectName
78 1454af38 Andreas Müller
	 * contains a string like this: "Arecaceae; Eugeissona utilis". The string
79 91138b6c Andreas Müller
	 * in front of the semicolon is the family name and the one behind, the taxon name.
80 1454af38 Andreas Müller
	 * So we basically assume, that if the string gets split by ";" the element at
81 91138b6c Andreas Müller
	 * index 1 should be the taxon name.
82
	 * If this format changes this method breaks!
83 1454af38 Andreas Müller
	 *
84
	 * TODO The ImageMetaData class of the commons package should provide
85 91138b6c Andreas Müller
	 * convenient access to the metadata of an image as well as all the error handling
86 1454af38 Andreas Müller
	 *
87 91138b6c Andreas Müller
	 * @param imageFile
88
	 * @return the name of the taxon as stored in ObjectName IPTC tag
89
	 */
90
	public String retrieveTaxonNameFromImageMetadata(File imageFile){
91
		String name = null;
92 1454af38 Andreas Müller
93 91138b6c Andreas Müller
		IImageMetadata metadata = null;
94 1454af38 Andreas Müller
95 91138b6c Andreas Müller
		try {
96
			metadata = Sanselan.getMetadata(imageFile);
97
		} catch (ImageReadException e) {
98
			logger.error("Error reading image" + " in " + imageFile.getName(), e);
99
		} catch (IOException e) {
100
			logger.error("Error reading file"  + " in " + imageFile.getName(), e);
101
		}
102 1454af38 Andreas Müller
103 91138b6c Andreas Müller
		if(metadata instanceof JpegImageMetadata){
104
			JpegImageMetadata jpegMetadata = (JpegImageMetadata) metadata;
105
106
			for (Object object : jpegMetadata.getItems()){
107 1454af38 Andreas Müller
108 91138b6c Andreas Müller
				Item item = (Item) object;
109 1454af38 Andreas Müller
110 91138b6c Andreas Müller
				if(item.getKeyword().equals("ObjectName")){
111
					logger.debug("File: " + imageFile.getName() + ". ObjectName string is: " + item.getText());
112
					String[] objectNameSplit = item.getText().split(";");
113 1454af38 Andreas Müller
114 91138b6c Andreas Müller
					try {
115
						name = objectNameSplit[1].trim();
116
					} catch (ArrayIndexOutOfBoundsException e) {
117
						logger.warn("ObjectNameSplit has no second part: " + item.getText() + " in " + imageFile.getName());
118
						//throw e;
119
					}
120
				}
121
			}
122
		}
123 1454af38 Andreas Müller
124
125 91138b6c Andreas Müller
		return name;
126
	}
127 1454af38 Andreas Müller
128
	private Map<MetaData, String> getMetaData(File imageFile, List<MetaData> metaData){
129
		HashMap<MetaData, String> result = new HashMap<>();
130
131 91138b6c Andreas Müller
		IImageMetadata metadata = null;
132 1454af38 Andreas Müller
		List<String> metaDataStrings = new ArrayList<>();
133
134 91138b6c Andreas Müller
		for (MetaData data: metaData){
135
			metaDataStrings.add(data.name().toLowerCase());
136
		}
137 1454af38 Andreas Müller
138
139 91138b6c Andreas Müller
		try {
140
			metadata = Sanselan.getMetadata(imageFile);
141
		} catch (ImageReadException e) {
142
			logger.error("Error reading image" + " in " + imageFile.getName(), e);
143
		} catch (IOException e) {
144
			logger.error("Error reading file"  + " in " + imageFile.getName(), e);
145
		}
146 1454af38 Andreas Müller
147
148
149 91138b6c Andreas Müller
		if(metadata instanceof JpegImageMetadata){
150
			JpegImageMetadata jpegMetadata = (JpegImageMetadata) metadata;
151 1454af38 Andreas Müller
152 91138b6c Andreas Müller
			for (Object object : jpegMetadata.getItems()){
153
				Item item = (Item) object;
154 1454af38 Andreas Müller
155 91138b6c Andreas Müller
				if(metaDataStrings.contains(item.getKeyword().toLowerCase())){
156
					logger.debug("File: " + imageFile.getName() + ". "+ item.getKeyword() +"string is: " + item.getText());
157
					result.put(MetaData.valueOf(item.getKeyword().toUpperCase()), item.getText());
158
				}
159
			}
160
		}
161 1454af38 Andreas Müller
162 91138b6c Andreas Müller
		return result;
163
	}
164
165 1454af38 Andreas Müller
166
167
	@Override
168
    protected void invokeImageImport (ImageImportState state){
169
170 c59ecc4a Andreas Müller
		logger.info("Importing images from directory: " + state.getConfig().getSourceNameString());
171 1454af38 Andreas Müller
172 c59ecc4a Andreas Müller
		File sourceFolder = new File(state.getConfig().getSource());
173 91138b6c Andreas Müller
		String taxonName;
174
		if(sourceFolder.isDirectory()){
175
			int count = 0;
176
			for( File file : sourceFolder.listFiles()){
177
				if(file.isFile()){
178
					doCount(count++, modCount, pluralString);
179 1454af38 Andreas Müller
180 91138b6c Andreas Müller
					taxonName= retrieveTaxonNameFromImageMetadata(file);
181
					logger.debug("Looking up taxa with taxon name: " + taxonName);
182 1454af38 Andreas Müller
183 91138b6c Andreas Müller
					//TODO:
184 1454af38 Andreas Müller
					ArrayList<MetaData> metaDataList = new ArrayList<>();
185 91138b6c Andreas Müller
					metaDataList.add (MetaData.ARTIST);
186
					metaDataList.add (MetaData.COPYRIGHT);
187
					metaDataList.add (MetaData.COPYRIGHTNOTICE);
188
					metaDataList.add (MetaData.OBJECTNAME);
189
					//metaDataList.add (MetaData.NAME);
190 1454af38 Andreas Müller
191 91138b6c Andreas Müller
					Map<MetaData, String> metaData = getMetaData(file, metaDataList);
192 1454af38 Andreas Müller
193
194
195 c59ecc4a Andreas Müller
					Reference sec = referenceService.find(state.getConfig().getSecUuid());
196 91138b6c Andreas Müller
197 1454af38 Andreas Müller
					List<TaxonBase> taxa = new ArrayList<>();
198 91138b6c Andreas Müller
					if (taxonName != null){
199 ab2f9bc5 Andreas Müller
						taxa = taxonService.searchByName(taxonName, true, sec);
200 91138b6c Andreas Müller
					}else{
201
						logger.error("TaxonName is null "  + " in " + file.getName());
202
					}
203
					if(taxa.size() == 0){
204
						logger.warn("no taxon with this name found: " + taxonName + " in " + file.getName());
205
					}else if(taxa.size() > 1){
206
						logger.error(taxa);
207
						logger.error("multiple taxa with this name found: " + taxonName + " in " + file.getName());
208
					}else{
209
						Taxon taxon = (Taxon) taxa.get(0);
210 1454af38 Andreas Müller
211 91138b6c Andreas Müller
						taxonService.saveOrUpdate(taxon);
212 1454af38 Andreas Müller
213 91138b6c Andreas Müller
						//MetaDataFactory metaDataFactory = MetaDataFactory.getInstance();
214
						//ImageMetaData imageMetaData = (ImageMetaData) metaDataFactory.readMediaData(file.toURI(), MimeType.IMAGE);
215
						try{
216
						ImageInfo imageinfo = Sanselan.getImageInfo(file);
217 1454af38 Andreas Müller
218 91138b6c Andreas Müller
						String mimeType = imageinfo.getMimeType();
219
						String suffix = "jpg";
220 1454af38 Andreas Müller
221
222 91138b6c Andreas Müller
						// URL for this image
223
						URL url = null;
224
						try {
225 c59ecc4a Andreas Müller
							url = new URL(state.getConfig().getMediaUrlString() + file.getName());
226 91138b6c Andreas Müller
						} catch (MalformedURLException e) {
227
							logger.warn("URL is malformed: "+ url);
228
						}
229 1454af38 Andreas Müller
230
231 401fe405 Andreas Müller
						ImageFile imageFile = ImageFile.NewInstance(url.toURI(),null, imageinfo.getHeight(), imageinfo.getWidth());
232 1454af38 Andreas Müller
233
234 91138b6c Andreas Müller
						MediaRepresentation representation = MediaRepresentation.NewInstance(mimeType, suffix);
235
						representation.addRepresentationPart(imageFile);
236 1454af38 Andreas Müller
237 91138b6c Andreas Müller
						Media media = Media.NewInstance();
238
						media.addRepresentation(representation);
239
						if (metaData.containsKey(MetaData.OBJECTNAME)){
240
							media.setTitleCache(metaData.get(MetaData.OBJECTNAME).replace("'", ""), true);
241
						}
242
						//TODO: add the rights and the author:
243
						Person artist = null;
244
						if (metaData.containsKey(MetaData.ARTIST)){
245
							//TODO search for the person first and then create the object...
246
							artist = Person.NewTitledInstance(metaData.get(MetaData.ARTIST).replace("'", ""));
247 4e1b6f7d Andreas Müller
							artist.setGivenName(getGivenName(metaData.get(MetaData.ARTIST)).replace("'", ""));
248
							artist.setFamilyName(getFamilyName(metaData.get(MetaData.ARTIST)).replace("'", ""));
249 1454af38 Andreas Müller
250 aaeb1aaa Andreas Müller
							IMatchStrategyEqual matchStrategy = DefaultMatchStrategy.NewInstance(AgentBase.class);
251 91138b6c Andreas Müller
							try{
252
								List<Person> agents = commonService.findMatching(artist, matchStrategy);
253 1454af38 Andreas Müller
254 91138b6c Andreas Müller
								if (agents.size()!= 0){
255
									artist = agents.get(0);
256
								}
257
							}catch(eu.etaxonomy.cdm.strategy.match.MatchException e){
258
								logger.warn("MatchException occurred");
259
							}
260 1454af38 Andreas Müller
261 91138b6c Andreas Müller
							media.setArtist(artist);
262
						}
263 1454af38 Andreas Müller
264 91138b6c Andreas Müller
						if (metaData.containsKey(MetaData.COPYRIGHT)){
265 1454af38 Andreas Müller
							//TODO: maybe search for the identic right...
266 91138b6c Andreas Müller
							Rights copyright = Rights.NewInstance();
267 bb38665e Andreas Müller
							copyright.setType(RightsType.COPYRIGHT());
268 91138b6c Andreas Müller
							Person copyrightOwner;
269 4e1b6f7d Andreas Müller
							if (artist != null && !artist.getFamilyName().equalsIgnoreCase(getFamilyName(metaData.get(MetaData.COPYRIGHT)))){
270 91138b6c Andreas Müller
								copyrightOwner = Person.NewInstance();
271 1454af38 Andreas Müller
272 4e1b6f7d Andreas Müller
								copyrightOwner.setGivenName(getGivenName(metaData.get(MetaData.COPYRIGHT)));
273
								copyrightOwner.setFamilyName(getFamilyName(metaData.get(MetaData.COPYRIGHT)));
274 91138b6c Andreas Müller
							}else
275
							{
276
								copyrightOwner = artist;
277
							}
278
							copyright.setAgent(copyrightOwner);
279
							//IMatchStrategy matchStrategy = DefaultMatchStrategy.NewInstance(Rights.class);
280
							media.addRights(copyright);
281
						}
282 1454af38 Andreas Müller
283 c59ecc4a Andreas Müller
						Reference sourceRef = state.getConfig().getSourceReference();
284 91138b6c Andreas Müller
						TaxonDescription description = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
285 1454af38 Andreas Müller
286
287 91138b6c Andreas Müller
						TextData textData = null;
288
						for (DescriptionElementBase element : description.getElements()){
289
							if (element.isInstanceOf(TextData.class)){
290
								textData = CdmBase.deproxy(element, TextData.class);
291
							}
292
						}
293
						if (textData == null){
294
							textData = TextData.NewInstance();
295
						}
296 1454af38 Andreas Müller
297
298 91138b6c Andreas Müller
						textData.addMedia(media);
299 1454af38 Andreas Müller
300 91138b6c Andreas Müller
						textData.setFeature(Feature.IMAGE());
301 1454af38 Andreas Müller
302 91138b6c Andreas Müller
						description.addElement(textData);
303 1454af38 Andreas Müller
304 91138b6c Andreas Müller
						taxonService.saveOrUpdate(taxon);
305
						}catch(Exception e) {
306
							e.printStackTrace();
307
						}
308
					}
309
				}
310
			}
311
		}else{
312
			logger.error("given source folder is not a directory");
313
		}
314 c59ecc4a Andreas Müller
		return;
315 91138b6c Andreas Müller
	}
316 1454af38 Andreas Müller
317 4e1b6f7d Andreas Müller
	private String getGivenName(String artist){
318 91138b6c Andreas Müller
		if (artist == null){
319
			return "";
320
		}
321
		if (!artist.contains(" ")) {
322
			return "";
323
		}
324
		if (artist.contains(",")){
325
			String [] artistSplits = artist.split(",");
326
			artist = artistSplits[0];
327 1454af38 Andreas Müller
328 91138b6c Andreas Müller
		}
329 1454af38 Andreas Müller
330 91138b6c Andreas Müller
		try{
331
		return artist.substring(0, artist.lastIndexOf(' ')).replace("'", "");
332
		}catch (Exception e){
333
			return "";
334
		}
335
	}
336 1454af38 Andreas Müller
337 4e1b6f7d Andreas Müller
	private String getFamilyName(String artist){
338 1454af38 Andreas Müller
339 91138b6c Andreas Müller
		if (artist.contains(",")){
340
			String [] artistSplits = artist.split(",");
341
			artist = artistSplits[0];
342 1454af38 Andreas Müller
343 91138b6c Andreas Müller
		}
344
		if (!artist.contains(" ")) {
345 1454af38 Andreas Müller
346 91138b6c Andreas Müller
			return artist;
347
		}
348
		try{
349
		return artist.substring(artist.lastIndexOf(' ')).replace(" ", "");
350
		}
351
		catch(Exception e){
352
			return "";
353
		}
354
	}
355 1454af38 Andreas Müller
356 91138b6c Andreas Müller
	protected void doCount(int count, int modCount, String pluralString){
357
		if ((count % modCount ) == 0 && count!= 0 ){ logger.info(pluralString + " handled: " + (count));}
358
	}
359
360
}