2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
;
13 import java
.io
.IOException
;
14 import java
.net
.MalformedURLException
;
16 import java
.util
.ArrayList
;
17 import java
.util
.HashMap
;
18 import java
.util
.List
;
20 import java
.util
.Map
.Entry
;
23 import org
.apache
.log4j
.Logger
;
24 import org
.apache
.sanselan
.ImageInfo
;
25 import org
.apache
.sanselan
.ImageReadException
;
26 import org
.apache
.sanselan
.Sanselan
;
27 import org
.apache
.sanselan
.common
.IImageMetadata
;
28 import org
.apache
.sanselan
.common
.ImageMetadata
.Item
;
29 import org
.apache
.sanselan
.formats
.jpeg
.JpegImageMetadata
;
30 import org
.springframework
.stereotype
.Component
;
32 import eu
.etaxonomy
.cdm
.app
.images
.AbstractImageImporter
;
33 import eu
.etaxonomy
.cdm
.app
.images
.ImageImportState
;
34 import eu
.etaxonomy
.cdm
.model
.agent
.AgentBase
;
35 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
36 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
37 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
38 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
39 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
40 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
41 import eu
.etaxonomy
.cdm
.model
.media
.ImageFile
;
42 import eu
.etaxonomy
.cdm
.model
.media
.Media
;
43 import eu
.etaxonomy
.cdm
.model
.media
.MediaRepresentation
;
44 import eu
.etaxonomy
.cdm
.model
.media
.Rights
;
45 import eu
.etaxonomy
.cdm
.model
.media
.RightsType
;
46 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
47 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
48 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
49 import eu
.etaxonomy
.cdm
.strategy
.match
.DefaultMatchStrategy
;
50 import eu
.etaxonomy
.cdm
.strategy
.match
.IMatchStrategy
;
53 * TODO not working at the moment
60 public class PalmaeImageImport
extends AbstractImageImporter
{
61 private static final Logger logger
= Logger
.getLogger(PalmaeImageImport
.class);
71 private static int modCount
= 300;
73 private static String pluralString
= "images";
76 * Rudimetary implementation using apache sanselan. This implementation depends
77 * on the metadata standards used in the palmae images. The IPTC field ObjectName
78 * contains a string like this: "Arecaceae; Eugeissona utilis". The string
79 * in front of the semicolon is the family name and the one behind, the taxon name.
80 * So we basically assume, that if the string gets split by ";" the element at
81 * index 1 should be the taxon name.
82 * If this format changes this method breaks!
84 * TODO The ImageMetaData class of the commons package should provide
85 * convenient access to the metadata of an image as well as all the error handling
88 * @return the name of the taxon as stored in ObjectName IPTC tag
90 public String
retrieveTaxonNameFromImageMetadata(File imageFile
){
93 IImageMetadata metadata
= null;
96 metadata
= Sanselan
.getMetadata(imageFile
);
97 } catch (ImageReadException e
) {
98 logger
.error("Error reading image" + " in " + imageFile
.getName(), e
);
99 } catch (IOException e
) {
100 logger
.error("Error reading file" + " in " + imageFile
.getName(), e
);
103 if(metadata
instanceof JpegImageMetadata
){
104 JpegImageMetadata jpegMetadata
= (JpegImageMetadata
) metadata
;
106 for (Object object
: jpegMetadata
.getItems()){
108 Item item
= (Item
) object
;
110 if(item
.getKeyword().equals("ObjectName")){
111 logger
.debug("File: " + imageFile
.getName() + ". ObjectName string is: " + item
.getText());
112 String
[] objectNameSplit
= item
.getText().split(";");
115 name
= objectNameSplit
[1].trim();
116 } catch (ArrayIndexOutOfBoundsException e
) {
117 logger
.warn("ObjectNameSplit has no second part: " + item
.getText() + " in " + imageFile
.getName());
128 public Map
<MetaData
, String
> getMetaData(File imageFile
, List
<MetaData
> metaData
){
129 HashMap
<MetaData
, String
> result
= new HashMap
<MetaData
, String
>();
131 IImageMetadata metadata
= null;
132 List
<String
> metaDataStrings
= new ArrayList
<String
>();
134 for (MetaData data
: metaData
){
135 metaDataStrings
.add(data
.name().toLowerCase());
140 metadata
= Sanselan
.getMetadata(imageFile
);
141 } catch (ImageReadException e
) {
142 logger
.error("Error reading image" + " in " + imageFile
.getName(), e
);
143 } catch (IOException e
) {
144 logger
.error("Error reading file" + " in " + imageFile
.getName(), e
);
149 if(metadata
instanceof JpegImageMetadata
){
150 JpegImageMetadata jpegMetadata
= (JpegImageMetadata
) metadata
;
152 for (Object object
: jpegMetadata
.getItems()){
153 Item item
= (Item
) object
;
155 if(metaDataStrings
.contains(item
.getKeyword().toLowerCase())){
156 logger
.debug("File: " + imageFile
.getName() + ". "+ item
.getKeyword() +"string is: " + item
.getText());
157 result
.put(MetaData
.valueOf(item
.getKeyword().toUpperCase()), item
.getText());
158 Set
<Entry
<MetaData
, String
>> resultSet
= result
.entrySet();
168 protected void invokeImageImport (ImageImportState state
){
170 logger
.info("Importing images from directory: " + state
.getConfig().getSourceNameString());
172 File sourceFolder
= new File(state
.getConfig().getSource());
174 if(sourceFolder
.isDirectory()){
176 for( File file
: sourceFolder
.listFiles()){
178 doCount(count
++, modCount
, pluralString
);
180 taxonName
= retrieveTaxonNameFromImageMetadata(file
);
181 logger
.debug("Looking up taxa with taxon name: " + taxonName
);
184 ArrayList
<MetaData
> metaDataList
= new ArrayList
<MetaData
>();
185 metaDataList
.add (MetaData
.ARTIST
);
186 metaDataList
.add (MetaData
.COPYRIGHT
);
187 metaDataList
.add (MetaData
.COPYRIGHTNOTICE
);
188 metaDataList
.add (MetaData
.OBJECTNAME
);
189 //metaDataList.add (MetaData.NAME);
191 Map
<MetaData
, String
> metaData
= getMetaData(file
, metaDataList
);
195 Reference sec
= referenceService
.find(state
.getConfig().getSecUuid());
197 List
<TaxonBase
> taxa
= new ArrayList
<TaxonBase
>();
198 if (taxonName
!= null){
199 taxa
= taxonService
.searchTaxaByName(taxonName
, sec
);
201 logger
.error("TaxonName is null " + " in " + file
.getName());
203 if(taxa
.size() == 0){
204 logger
.warn("no taxon with this name found: " + taxonName
+ " in " + file
.getName());
205 }else if(taxa
.size() > 1){
207 logger
.error("multiple taxa with this name found: " + taxonName
+ " in " + file
.getName());
209 Taxon taxon
= (Taxon
) taxa
.get(0);
211 taxonService
.saveOrUpdate(taxon
);
213 //MetaDataFactory metaDataFactory = MetaDataFactory.getInstance();
214 //ImageMetaData imageMetaData = (ImageMetaData) metaDataFactory.readMediaData(file.toURI(), MimeType.IMAGE);
216 ImageInfo imageinfo
= Sanselan
.getImageInfo(file
);
218 String mimeType
= imageinfo
.getMimeType();
219 String suffix
= "jpg";
222 // URL for this image
225 url
= new URL(state
.getConfig().getMediaUrlString() + file
.getName());
226 } catch (MalformedURLException e
) {
227 logger
.warn("URL is malformed: "+ url
);
231 ImageFile imageFile
= ImageFile
.NewInstance(url
.toURI(),null, imageinfo
.getHeight(), imageinfo
.getWidth());
234 MediaRepresentation representation
= MediaRepresentation
.NewInstance(mimeType
, suffix
);
235 representation
.addRepresentationPart(imageFile
);
237 Media media
= Media
.NewInstance();
238 media
.addRepresentation(representation
);
239 if (metaData
.containsKey(MetaData
.OBJECTNAME
)){
240 media
.setTitleCache(metaData
.get(MetaData
.OBJECTNAME
).replace("'", ""), true);
242 //TODO: add the rights and the author:
243 Person artist
= null;
244 if (metaData
.containsKey(MetaData
.ARTIST
)){
245 //TODO search for the person first and then create the object...
246 artist
= Person
.NewTitledInstance(metaData
.get(MetaData
.ARTIST
).replace("'", ""));
247 artist
.setFirstname(getFirstName(metaData
.get(MetaData
.ARTIST
)).replace("'", ""));
248 artist
.setLastname(getLastName(metaData
.get(MetaData
.ARTIST
)).replace("'", ""));
250 IMatchStrategy matchStrategy
= DefaultMatchStrategy
.NewInstance(AgentBase
.class);
252 List
<Person
> agents
= commonService
.findMatching(artist
, matchStrategy
);
254 if (agents
.size()!= 0){
255 artist
= agents
.get(0);
257 }catch(eu
.etaxonomy
.cdm
.strategy
.match
.MatchException e
){
258 logger
.warn("MatchException occurred");
261 media
.setArtist(artist
);
264 if (metaData
.containsKey(MetaData
.COPYRIGHT
)){
265 //TODO: maybe search for the identic right...
266 Rights copyright
= Rights
.NewInstance();
267 copyright
.setType(RightsType
.COPYRIGHT());
268 Person copyrightOwner
;
269 if (artist
!= null && !artist
.getLastname().equalsIgnoreCase(getLastName(metaData
.get(MetaData
.COPYRIGHT
)))){
270 copyrightOwner
= Person
.NewInstance();
272 copyrightOwner
.setFirstname(getFirstName(metaData
.get(MetaData
.COPYRIGHT
)));
273 copyrightOwner
.setLastname(getLastName(metaData
.get(MetaData
.COPYRIGHT
)));
276 copyrightOwner
= artist
;
278 copyright
.setAgent(copyrightOwner
);
279 //IMatchStrategy matchStrategy = DefaultMatchStrategy.NewInstance(Rights.class);
280 media
.addRights(copyright
);
283 Reference sourceRef
= state
.getConfig().getSourceReference();
284 TaxonDescription description
= taxon
.getOrCreateImageGallery(sourceRef
== null ?
null :sourceRef
.getTitleCache());
287 TextData textData
= null;
288 for (DescriptionElementBase element
: description
.getElements()){
289 if (element
.isInstanceOf(TextData
.class)){
290 textData
= CdmBase
.deproxy(element
, TextData
.class);
293 if (textData
== null){
294 textData
= TextData
.NewInstance();
298 textData
.addMedia(media
);
300 textData
.setFeature(Feature
.IMAGE());
302 description
.addElement(textData
);
304 taxonService
.saveOrUpdate(taxon
);
305 }catch(Exception e
) {
312 logger
.error("given source folder is not a directory");
317 private String
getFirstName(String artist
){
321 if (!artist
.contains(" ")) {
324 if (artist
.contains(",")){
325 String
[] artistSplits
= artist
.split(",");
326 artist
= artistSplits
[0];
331 return artist
.substring(0, artist
.lastIndexOf(' ')).replace("'", "");
332 }catch (Exception e
){
337 private String
getLastName(String artist
){
339 if (artist
.contains(",")){
340 String
[] artistSplits
= artist
.split(",");
341 artist
= artistSplits
[0];
344 if (!artist
.contains(" ")) {
349 return artist
.substring(artist
.lastIndexOf(' ')).replace(" ", "");
356 protected void doCount(int count
, int modCount
, String pluralString
){
357 if ((count
% modCount
) == 0 && count
!= 0 ){ logger
.info(pluralString
+ " handled: " + (count
));}