AT SPecimen imports updated (now with Identifications)
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / PalmaeImageImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io;
11
12 import java.io.File;
13 import java.io.IOException;
14 import java.net.MalformedURLException;
15 import java.net.URL;
16 import java.util.ArrayList;
17 import java.util.HashMap;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Map.Entry;
21 import java.util.Set;
22
23 import org.apache.log4j.Logger;
24 import org.apache.sanselan.ImageInfo;
25 import org.apache.sanselan.ImageReadException;
26 import org.apache.sanselan.Sanselan;
27 import org.apache.sanselan.common.IImageMetadata;
28 import org.apache.sanselan.common.ImageMetadata.Item;
29 import org.apache.sanselan.formats.jpeg.JpegImageMetadata;
30 import org.springframework.stereotype.Component;
31
32 import eu.etaxonomy.cdm.app.images.AbstractImageImporter;
33 import eu.etaxonomy.cdm.app.images.ImageImportState;
34 import eu.etaxonomy.cdm.model.agent.AgentBase;
35 import eu.etaxonomy.cdm.model.agent.Person;
36 import eu.etaxonomy.cdm.model.common.CdmBase;
37 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
38 import eu.etaxonomy.cdm.model.description.Feature;
39 import eu.etaxonomy.cdm.model.description.TaxonDescription;
40 import eu.etaxonomy.cdm.model.description.TextData;
41 import eu.etaxonomy.cdm.model.media.ImageFile;
42 import eu.etaxonomy.cdm.model.media.Media;
43 import eu.etaxonomy.cdm.model.media.MediaRepresentation;
44 import eu.etaxonomy.cdm.model.media.Rights;
45 import eu.etaxonomy.cdm.model.media.RightsTerm;
46 import eu.etaxonomy.cdm.model.reference.Reference;
47 import eu.etaxonomy.cdm.model.taxon.Taxon;
48 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
49 import eu.etaxonomy.cdm.strategy.match.DefaultMatchStrategy;
50 import eu.etaxonomy.cdm.strategy.match.IMatchStrategy;
51
52 /**
53 * TODO not working at the moment
54 *
55 * @author n.hoffmann
56 * @created 18.11.2008
57 * @version 1.0
58 */
59 @Component
60 public class PalmaeImageImport extends AbstractImageImporter {
61 private static final Logger logger = Logger.getLogger(PalmaeImageImport.class);
62
63 enum MetaData{
64 NAME,
65 ARTIST,
66 COPYRIGHT,
67 COPYRIGHTNOTICE,
68 OBJECTNAME
69 }
70
71 private static int modCount = 300;
72
73 private static String pluralString = "images";
74
75 /**
76 * Rudimetary implementation using apache sanselan. This implementation depends
77 * on the metadata standards used in the palmae images. The IPTC field ObjectName
78 * contains a string like this: "Arecaceae; Eugeissona utilis". The string
79 * in front of the semicolon is the family name and the one behind, the taxon name.
80 * So we basically assume, that if the string gets split by ";" the element at
81 * index 1 should be the taxon name.
82 * If this format changes this method breaks!
83 *
84 * TODO The ImageMetaData class of the commons package should provide
85 * convenient access to the metadata of an image as well as all the error handling
86 *
87 * @param imageFile
88 * @return the name of the taxon as stored in ObjectName IPTC tag
89 */
90 public String retrieveTaxonNameFromImageMetadata(File imageFile){
91 String name = null;
92
93 IImageMetadata metadata = null;
94
95 try {
96 metadata = Sanselan.getMetadata(imageFile);
97 } catch (ImageReadException e) {
98 logger.error("Error reading image" + " in " + imageFile.getName(), e);
99 } catch (IOException e) {
100 logger.error("Error reading file" + " in " + imageFile.getName(), e);
101 }
102
103 if(metadata instanceof JpegImageMetadata){
104 JpegImageMetadata jpegMetadata = (JpegImageMetadata) metadata;
105
106 for (Object object : jpegMetadata.getItems()){
107
108 Item item = (Item) object;
109
110 if(item.getKeyword().equals("ObjectName")){
111 logger.debug("File: " + imageFile.getName() + ". ObjectName string is: " + item.getText());
112 String[] objectNameSplit = item.getText().split(";");
113
114 try {
115 name = objectNameSplit[1].trim();
116 } catch (ArrayIndexOutOfBoundsException e) {
117 logger.warn("ObjectNameSplit has no second part: " + item.getText() + " in " + imageFile.getName());
118 //throw e;
119 }
120 }
121 }
122 }
123
124
125 return name;
126 }
127
128 public Map<MetaData, String> getMetaData(File imageFile, List<MetaData> metaData){
129 HashMap<MetaData, String> result = new HashMap<MetaData, String>();
130
131 IImageMetadata metadata = null;
132 List<String> metaDataStrings = new ArrayList<String>();
133
134 for (MetaData data: metaData){
135 metaDataStrings.add(data.name().toLowerCase());
136 }
137
138
139 try {
140 metadata = Sanselan.getMetadata(imageFile);
141 } catch (ImageReadException e) {
142 logger.error("Error reading image" + " in " + imageFile.getName(), e);
143 } catch (IOException e) {
144 logger.error("Error reading file" + " in " + imageFile.getName(), e);
145 }
146
147
148
149 if(metadata instanceof JpegImageMetadata){
150 JpegImageMetadata jpegMetadata = (JpegImageMetadata) metadata;
151
152 for (Object object : jpegMetadata.getItems()){
153 Item item = (Item) object;
154
155 if(metaDataStrings.contains(item.getKeyword().toLowerCase())){
156 logger.debug("File: " + imageFile.getName() + ". "+ item.getKeyword() +"string is: " + item.getText());
157 result.put(MetaData.valueOf(item.getKeyword().toUpperCase()), item.getText());
158 Set<Entry<MetaData, String>> resultSet = result.entrySet();
159 }
160 }
161 }
162
163 return result;
164 }
165
166
167
168 protected void invokeImageImport (ImageImportState state){
169
170 logger.info("Importing images from directory: " + state.getConfig().getSourceNameString());
171
172 File sourceFolder = new File(state.getConfig().getSource());
173 String taxonName;
174 if(sourceFolder.isDirectory()){
175 int count = 0;
176 for( File file : sourceFolder.listFiles()){
177 if(file.isFile()){
178 doCount(count++, modCount, pluralString);
179
180 taxonName= retrieveTaxonNameFromImageMetadata(file);
181 logger.debug("Looking up taxa with taxon name: " + taxonName);
182
183 //TODO:
184 ArrayList<MetaData> metaDataList = new ArrayList<MetaData>();
185 metaDataList.add (MetaData.ARTIST);
186 metaDataList.add (MetaData.COPYRIGHT);
187 metaDataList.add (MetaData.COPYRIGHTNOTICE);
188 metaDataList.add (MetaData.OBJECTNAME);
189 //metaDataList.add (MetaData.NAME);
190
191 Map<MetaData, String> metaData = getMetaData(file, metaDataList);
192
193
194
195 Reference sec = referenceService.find(state.getConfig().getSecUuid());
196
197 List<TaxonBase> taxa = new ArrayList<TaxonBase>();
198 if (taxonName != null){
199 taxa = taxonService.searchTaxaByName(taxonName, sec);
200 }else{
201 logger.error("TaxonName is null " + " in " + file.getName());
202 }
203 if(taxa.size() == 0){
204 logger.warn("no taxon with this name found: " + taxonName + " in " + file.getName());
205 }else if(taxa.size() > 1){
206 logger.error(taxa);
207 logger.error("multiple taxa with this name found: " + taxonName + " in " + file.getName());
208 }else{
209 Taxon taxon = (Taxon) taxa.get(0);
210
211 taxonService.saveOrUpdate(taxon);
212
213 //MetaDataFactory metaDataFactory = MetaDataFactory.getInstance();
214 //ImageMetaData imageMetaData = (ImageMetaData) metaDataFactory.readMediaData(file.toURI(), MimeType.IMAGE);
215 try{
216 ImageInfo imageinfo = Sanselan.getImageInfo(file);
217
218 String mimeType = imageinfo.getMimeType();
219 String suffix = "jpg";
220
221
222 // URL for this image
223 URL url = null;
224 try {
225 url = new URL(state.getConfig().getMediaUrlString() + file.getName());
226 } catch (MalformedURLException e) {
227 logger.warn("URL is malformed: "+ url);
228 }
229
230
231 ImageFile imageFile = ImageFile.NewInstance(url.toURI(),null, imageinfo.getHeight(), imageinfo.getWidth());
232
233
234 MediaRepresentation representation = MediaRepresentation.NewInstance(mimeType, suffix);
235 representation.addRepresentationPart(imageFile);
236
237 Media media = Media.NewInstance();
238 media.addRepresentation(representation);
239 if (metaData.containsKey(MetaData.OBJECTNAME)){
240 media.setTitleCache(metaData.get(MetaData.OBJECTNAME).replace("'", ""), true);
241 }
242 //TODO: add the rights and the author:
243 Person artist = null;
244 if (metaData.containsKey(MetaData.ARTIST)){
245 //TODO search for the person first and then create the object...
246 artist = Person.NewTitledInstance(metaData.get(MetaData.ARTIST).replace("'", ""));
247 artist.setFirstname(getFirstName(metaData.get(MetaData.ARTIST)).replace("'", ""));
248 artist.setLastname(getLastName(metaData.get(MetaData.ARTIST)).replace("'", ""));
249
250 IMatchStrategy matchStrategy = DefaultMatchStrategy.NewInstance(AgentBase.class);
251 try{
252 List<Person> agents = commonService.findMatching(artist, matchStrategy);
253
254 if (agents.size()!= 0){
255 artist = agents.get(0);
256 }
257 }catch(eu.etaxonomy.cdm.strategy.match.MatchException e){
258 logger.warn("MatchException occurred");
259 }
260
261 media.setArtist(artist);
262 }
263
264 if (metaData.containsKey(MetaData.COPYRIGHT)){
265 //TODO: maybe search for the identic right...
266 Rights copyright = Rights.NewInstance();
267 copyright.setType(RightsTerm.COPYRIGHT());
268 Person copyrightOwner;
269 if (artist != null && !artist.getLastname().equalsIgnoreCase(getLastName(metaData.get(MetaData.COPYRIGHT)))){
270 copyrightOwner = Person.NewInstance();
271
272 copyrightOwner.setFirstname(getFirstName(metaData.get(MetaData.COPYRIGHT)));
273 copyrightOwner.setLastname(getLastName(metaData.get(MetaData.COPYRIGHT)));
274 }else
275 {
276 copyrightOwner = artist;
277 }
278 copyright.setAgent(copyrightOwner);
279 //IMatchStrategy matchStrategy = DefaultMatchStrategy.NewInstance(Rights.class);
280 media.addRights(copyright);
281 }
282
283 Reference sourceRef = state.getConfig().getSourceReference();
284 TaxonDescription description = taxon.getOrCreateImageGallery(sourceRef == null ? null :sourceRef.getTitleCache());
285
286
287 TextData textData = null;
288 for (DescriptionElementBase element : description.getElements()){
289 if (element.isInstanceOf(TextData.class)){
290 textData = CdmBase.deproxy(element, TextData.class);
291 }
292 }
293 if (textData == null){
294 textData = TextData.NewInstance();
295 }
296
297
298 textData.addMedia(media);
299
300 textData.setFeature(Feature.IMAGE());
301
302 description.addElement(textData);
303
304 taxonService.saveOrUpdate(taxon);
305 }catch(Exception e) {
306 e.printStackTrace();
307 }
308 }
309 }
310 }
311 }else{
312 logger.error("given source folder is not a directory");
313 }
314 return;
315 }
316
317 private String getFirstName(String artist){
318 if (artist == null){
319 return "";
320 }
321 if (!artist.contains(" ")) {
322 return "";
323 }
324 if (artist.contains(",")){
325 String [] artistSplits = artist.split(",");
326 artist = artistSplits[0];
327
328 }
329
330 try{
331 return artist.substring(0, artist.lastIndexOf(' ')).replace("'", "");
332 }catch (Exception e){
333 return "";
334 }
335 }
336
337 private String getLastName(String artist){
338
339 if (artist.contains(",")){
340 String [] artistSplits = artist.split(",");
341 artist = artistSplits[0];
342
343 }
344 if (!artist.contains(" ")) {
345
346 return artist;
347 }
348 try{
349 return artist.substring(artist.lastIndexOf(' ')).replace(" ", "");
350 }
351 catch(Exception e){
352 return "";
353 }
354 }
355
356 protected void doCount(int count, int modCount, String pluralString){
357 if ((count % modCount ) == 0 && count!= 0 ){ logger.info(pluralString + " handled: " + (count));}
358 }
359
360 }