Revision 2f9e52e3
Added by Andreas Müller over 2 years ago
app-import/src/main/java/eu/etaxonomy/cdm/app/wp6/cichorieae/CichorieaImageUpdateActivator.java | ||
---|---|---|
1 |
/** |
|
2 |
* Copyright (C) 2007 EDIT |
|
3 |
* European Distributed Institute of Taxonomy |
|
4 |
* http://www.e-taxonomy.eu |
|
5 |
* |
|
6 |
* The contents of this file are subject to the Mozilla Public License Version 1.1 |
|
7 |
* See LICENSE.TXT at the top of this package for the full license terms. |
|
8 |
*/ |
|
9 |
package eu.etaxonomy.cdm.app.wp6.cichorieae; |
|
10 |
|
|
11 |
import java.io.File; |
|
12 |
import java.net.MalformedURLException; |
|
13 |
import java.net.URISyntaxException; |
|
14 |
import java.util.ArrayList; |
|
15 |
import java.util.HashMap; |
|
16 |
import java.util.HashSet; |
|
17 |
import java.util.Iterator; |
|
18 |
import java.util.List; |
|
19 |
import java.util.Map; |
|
20 |
import java.util.Set; |
|
21 |
import java.util.regex.Matcher; |
|
22 |
import java.util.regex.Pattern; |
|
23 |
|
|
24 |
import org.apache.log4j.Logger; |
|
25 |
import org.joda.time.DateTime; |
|
26 |
import org.joda.time.format.DateTimeFormat; |
|
27 |
import org.joda.time.format.DateTimeFormatter; |
|
28 |
import org.springframework.transaction.TransactionStatus; |
|
29 |
|
|
30 |
import eu.etaxonomy.cdm.api.application.CdmApplicationController; |
|
31 |
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator; |
|
32 |
import eu.etaxonomy.cdm.api.service.media.MediaInfoFactory; |
|
33 |
import eu.etaxonomy.cdm.api.service.media.MediaInfoFileReader; |
|
34 |
import eu.etaxonomy.cdm.app.common.CdmDestinations; |
|
35 |
import eu.etaxonomy.cdm.common.URI; |
|
36 |
import eu.etaxonomy.cdm.common.UTF8; |
|
37 |
import eu.etaxonomy.cdm.common.media.CdmImageInfo; |
|
38 |
import eu.etaxonomy.cdm.database.DbSchemaValidation; |
|
39 |
import eu.etaxonomy.cdm.database.ICdmDataSource; |
|
40 |
import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController; |
|
41 |
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper; |
|
42 |
import eu.etaxonomy.cdm.model.agent.AgentBase; |
|
43 |
import eu.etaxonomy.cdm.model.agent.Institution; |
|
44 |
import eu.etaxonomy.cdm.model.agent.Person; |
|
45 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
|
46 |
import eu.etaxonomy.cdm.model.common.Language; |
|
47 |
import eu.etaxonomy.cdm.model.common.TimePeriod; |
|
48 |
import eu.etaxonomy.cdm.model.description.DescriptionElementBase; |
|
49 |
import eu.etaxonomy.cdm.model.description.Feature; |
|
50 |
import eu.etaxonomy.cdm.model.description.TaxonDescription; |
|
51 |
import eu.etaxonomy.cdm.model.description.TextData; |
|
52 |
import eu.etaxonomy.cdm.model.media.ImageFile; |
|
53 |
import eu.etaxonomy.cdm.model.media.Media; |
|
54 |
import eu.etaxonomy.cdm.model.media.MediaRepresentation; |
|
55 |
import eu.etaxonomy.cdm.model.media.MediaRepresentationPart; |
|
56 |
import eu.etaxonomy.cdm.model.media.Rights; |
|
57 |
import eu.etaxonomy.cdm.model.media.RightsType; |
|
58 |
import eu.etaxonomy.cdm.model.taxon.Synonym; |
|
59 |
import eu.etaxonomy.cdm.model.taxon.Taxon; |
|
60 |
import eu.etaxonomy.cdm.model.taxon.TaxonBase; |
|
61 |
|
|
62 |
/** |
|
63 |
* TODO copied from cyprus |
|
64 |
* |
|
65 |
* Creates CDM Media from images stored in the given path. |
|
66 |
* |
|
67 |
* Note: Currently adapted to also change from Scaler IIF API to default Scaler API. |
|
68 |
* Note2: updateMetadata() still needs to be adapted to support 3 MediaRepresentations |
|
69 |
* |
|
70 |
* @author a.mueller |
|
71 |
* @since 05.2017 |
|
72 |
*/ |
|
73 |
public class CichorieaImageUpdateActivator { |
|
74 |
|
|
75 |
private static final Logger logger = Logger.getLogger(CichorieaImageUpdateActivator.class); |
|
76 |
|
|
77 |
// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_local_cichorieae(); |
|
78 |
// static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_cyprus(); |
|
79 |
static final ICdmDataSource cdmDestination = CdmDestinations.cdm_production_cichorieae(); |
|
80 |
|
|
81 |
static boolean testOnly = false; |
|
82 |
static boolean update_notCreate = true; |
|
83 |
//if true, data will always be updated, if false, only missing data will be updated |
|
84 |
static boolean forceUpdate = true; |
|
85 |
static boolean sizeOnly = true; |
|
86 |
|
|
87 |
private static final String path = "//media/digitalimages/EditWP6/Cichorieae/photos/"; |
|
88 |
private static final String oldUrlPath = "https://pictures.bgbm.org/digilib/Scaler/IIIF/Cichorieae!"; |
|
89 |
private static final String newUrlPath = "https://pictures.bgbm.org/digilib/Scaler?fn=Cichorieae/"; |
|
90 |
private static final String oldPostfix = "/full/full/0/default.jpg"; |
|
91 |
private static final String newPostfix = "&mo=file"; |
|
92 |
private static final String mediumPostfix ="&mo=fit&dw=400&dh=400&uvfix=1"; |
|
93 |
private static final String smallPostfix ="&mo=fit&dw=200&dh=200&uvfix=1"; |
|
94 |
|
|
95 |
private ImportDeduplicationHelper deduplicationHelper; |
|
96 |
|
|
97 |
private void doImport(ICdmDataSource cdmDestination){ |
|
98 |
|
|
99 |
CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE); |
|
100 |
TransactionStatus tx = app.startTransaction(); |
|
101 |
|
|
102 |
deduplicationHelper = ImportDeduplicationHelper.NewInstance(app, null); |
|
103 |
|
|
104 |
File file = new File(path); |
|
105 |
String[] fileList = file.list(); |
|
106 |
Set<String> notFound = new HashSet<>(); |
|
107 |
|
|
108 |
String regEx = "([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(?:jpg|JPG)"; |
|
109 |
Pattern pattern = Pattern.compile(regEx); |
|
110 |
|
|
111 |
String start = "O"; //O |
|
112 |
String end = "Q"; //Q |
|
113 |
String startLetter = ""; |
|
114 |
|
|
115 |
for (String fileName : fileList){ |
|
116 |
if(fileName.compareToIgnoreCase(start) < 0 || fileName.compareToIgnoreCase(end) >= 0){ |
|
117 |
continue; |
|
118 |
} |
|
119 |
Matcher matcher = pattern.matcher(fileName); |
|
120 |
if (matcher.matches() ){ |
|
121 |
// System.out.println(fileName); |
|
122 |
if (!fileName.substring(0,3).equals(startLetter)){ |
|
123 |
startLetter = fileName.substring(0,3); |
|
124 |
System.out.println(startLetter); |
|
125 |
} |
|
126 |
String taxonName = matcher.group(1); |
|
127 |
taxonName = taxonName.replace("_s_", " subsp. ").replace("_", " "); |
|
128 |
Taxon taxon = getAcceptedTaxon(app, taxonName); |
|
129 |
if (taxon == null){ |
|
130 |
if (!notFound.contains(taxonName)){ |
|
131 |
notFound.add(taxonName); |
|
132 |
logger.warn("Taxon not found: " + taxonName); |
|
133 |
} |
|
134 |
}else{ |
|
135 |
try { |
|
136 |
handleTaxon(app, taxon, fileName); |
|
137 |
} catch (Exception e) { |
|
138 |
logger.error("Unhandled exception ("+e.getMessage()+") when reading file " + fileName +". File not imported: "); |
|
139 |
e.printStackTrace(); |
|
140 |
} |
|
141 |
} |
|
142 |
}else{ |
|
143 |
if (!fileName.matches("(?:\\.erez|Thumbs\\.db.*|zypern_.*|__Keywords_template\\.txt)")){ |
|
144 |
logger.warn("Incorrect filename:" + fileName); |
|
145 |
}else{ |
|
146 |
System.out.println("Not clear yet: " + fileName); |
|
147 |
} |
|
148 |
} |
|
149 |
} |
|
150 |
|
|
151 |
// app.getTaxonService().saveOrUpdate(taxaToSave); |
|
152 |
|
|
153 |
if (testOnly){ |
|
154 |
tx.setRollbackOnly(); |
|
155 |
} |
|
156 |
app.commitTransaction(tx); |
|
157 |
} |
|
158 |
|
|
159 |
private void handleTaxon(CdmApplicationController app, Taxon taxon, String fileName) { |
|
160 |
Map<String, Media> existingUrls = getAllExistingUrls(taxon); |
|
161 |
String pathToOldImage = oldUrlPath + fileName + oldPostfix; |
|
162 |
|
|
163 |
String pathToFullImage = newUrlPath + fileName + newPostfix; |
|
164 |
String pathToMediumImage = newUrlPath + fileName + mediumPostfix; |
|
165 |
String pathToSmallImage = newUrlPath + fileName + smallPostfix; |
|
166 |
|
|
167 |
if (containsAll(existingUrls, pathToFullImage, pathToMediumImage, pathToSmallImage)){ |
|
168 |
return; |
|
169 |
}else{ |
|
170 |
Media media; |
|
171 |
if (containsAny(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage)){ |
|
172 |
media = getExistingMedia(existingUrls, pathToOldImage, pathToMediumImage, pathToSmallImage); |
|
173 |
if (media == null){ |
|
174 |
return; |
|
175 |
}else if (media.getAllTitles().isEmpty()){ |
|
176 |
media.setTitleCache(null, false); |
|
177 |
media.putTitle(Language.LATIN(), fileName); |
|
178 |
} |
|
179 |
}else{ |
|
180 |
media = Media.NewInstance(); |
|
181 |
makeMetaData(media, fileName, null, false, sizeOnly); |
|
182 |
|
|
183 |
makeTitle(media, fileName, false); |
|
184 |
if (!testOnly){ |
|
185 |
makeTextData(fileName, media, taxon); |
|
186 |
} |
|
187 |
} |
|
188 |
fillMediaWithAllRepresentations(media, pathToFullImage, pathToMediumImage, pathToSmallImage, pathToOldImage); |
|
189 |
} |
|
190 |
} |
|
191 |
|
|
192 |
private Media getExistingMedia(Map<String, Media> existingUrls, String pathToFullImage, String pathToMediumImage, |
|
193 |
String pathToSmallImage) { |
|
194 |
Set<Media> result = new HashSet<>(); |
|
195 |
for(String existingUrl : existingUrls.keySet()){ |
|
196 |
if (existingUrl.equals(pathToFullImage) || existingUrl.equals(pathToMediumImage) || |
|
197 |
existingUrl.equals(pathToSmallImage)){ |
|
198 |
result.add(existingUrls.get(existingUrl)); |
|
199 |
} |
|
200 |
} |
|
201 |
if (result.isEmpty()){ |
|
202 |
logger.warn("Media for existing URL not found. This should not happen."); |
|
203 |
return null; |
|
204 |
}else if (result.size() > 1){ |
|
205 |
logger.warn("Existing URLs have more than 1 Media. This should not happen."); |
|
206 |
return null; |
|
207 |
}else{ |
|
208 |
return result.iterator().next(); |
|
209 |
} |
|
210 |
} |
|
211 |
|
|
212 |
/** |
|
213 |
* <code>true</code> if all 3 paths exist in the URL set |
|
214 |
*/ |
|
215 |
private boolean containsAll(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage, |
|
216 |
String pathToSmallImage) { |
|
217 |
Set<String> existingUrls = existingUrlMap.keySet(); |
|
218 |
return existingUrls.contains(pathToFullImage) && existingUrls.contains(pathToMediumImage) |
|
219 |
&& existingUrls.contains(pathToSmallImage); |
|
220 |
} |
|
221 |
|
|
222 |
/** |
|
223 |
* <code>true</code> if any of the 3 paths exists in the URL set |
|
224 |
*/ |
|
225 |
private boolean containsAny(Map<String, Media> existingUrlMap, String pathToFullImage, String pathToMediumImage, |
|
226 |
String pathToSmallImage) { |
|
227 |
Set<String> existingUrls = existingUrlMap.keySet(); |
|
228 |
return existingUrls.contains(pathToFullImage) || existingUrls.contains(pathToMediumImage) |
|
229 |
|| existingUrls.contains(pathToSmallImage); |
|
230 |
} |
|
231 |
|
|
232 |
private void makeTitle(Media media, String fileName, boolean updateOnly) { |
|
233 |
String title = fileName.replace("_s_"," subsp. ") |
|
234 |
.replace("_"," ").replace(".jpg","").replace(".JPG",""); |
|
235 |
if ( (!updateOnly) || media.getAllTitles().isEmpty()){ |
|
236 |
media.putTitle(Language.LATIN(), title); |
|
237 |
} |
|
238 |
} |
|
239 |
|
|
240 |
private void makeMetaData(Media media, String fileName, ImageFile part, boolean updateOnly, boolean sizeOnly) { |
|
241 |
|
|
242 |
URI uri = part.getUri(); |
|
243 |
Map<String, String> keywords = new HashMap<>(); |
|
244 |
String copyright = null; |
|
245 |
String artistStr = null; |
|
246 |
String created = null; |
|
247 |
try{ |
|
248 |
MediaInfoFactory mediaFactory = new MediaInfoFactory(); |
|
249 |
CdmImageInfo imageInfo; |
|
250 |
try { |
|
251 |
imageInfo = mediaFactory.cdmImageInfo(uri, !sizeOnly); |
|
252 |
} catch (Exception e) { |
|
253 |
URI lowerCaseUri = URI.create(uri.toString().replace(".JPG", ".jpg")); |
|
254 |
try { |
|
255 |
imageInfo = mediaFactory.cdmImageInfo(lowerCaseUri, !sizeOnly); |
|
256 |
part.setUri(lowerCaseUri); //if no error arises we expect this to be the better URI |
|
257 |
} catch (Exception e1) { |
|
258 |
logger.error("Metadata not readable: " + uri.toString()); |
|
259 |
return; |
|
260 |
} |
|
261 |
} |
|
262 |
|
|
263 |
//size |
|
264 |
makeSize(part, imageInfo); |
|
265 |
if (sizeOnly){ |
|
266 |
return; |
|
267 |
} |
|
268 |
|
|
269 |
//additional metadata |
|
270 |
for (String metaDataKey : imageInfo.getMetaData().keySet()){ |
|
271 |
String value = imageInfo.getMetaData().get(metaDataKey); |
|
272 |
// System.out.println(metaDataKey + ": " + value); |
|
273 |
value = removeQuots(value); //not sure if still necessary |
|
274 |
if ("Copyright Notice".equalsIgnoreCase(metaDataKey)){ |
|
275 |
copyright = value; |
|
276 |
}else if ("artist".equals(metaDataKey)){ |
|
277 |
artistStr = value; |
|
278 |
}else if ("DateTimeOriginal".equalsIgnoreCase(metaDataKey)){ //TODO seems not to exist anymore |
|
279 |
created = value; |
|
280 |
}else{ |
|
281 |
keywords.put(metaDataKey.trim().toLowerCase(), value); |
|
282 |
} |
|
283 |
} |
|
284 |
} catch (Exception e1) { |
|
285 |
logger.warn(" Problem (" + e1.getMessage() + ") when reading metadata from uri: " + part); |
|
286 |
e1.printStackTrace(); |
|
287 |
return; |
|
288 |
} |
|
289 |
|
|
290 |
AgentBase<?> artistAgent = null; |
|
291 |
Rights right = null; |
|
292 |
DateTime createdDate = null; |
|
293 |
String locality = null; |
|
294 |
|
|
295 |
//artist |
|
296 |
if (keywords.get("photographer") != null){ |
|
297 |
String artist = keywords.get("photographer"); |
|
298 |
artistAgent = getOrCreatePerson(artist, fileName); |
|
299 |
} |
|
300 |
if (artistStr != null){ |
|
301 |
if (keywords.get("photographer") == null){ |
|
302 |
artistAgent = getOrCreatePerson(artistStr, fileName); |
|
303 |
}else if (!keywords.get("photographer").toLowerCase().replace(" ", "") |
|
304 |
.contains(artistStr.toLowerCase().replace(" ", ""))){ |
|
305 |
logger.warn("Artist '" + artistStr + "' could not be handled for " + fileName); |
|
306 |
} |
|
307 |
} |
|
308 |
|
|
309 |
//locality |
|
310 |
if (keywords.get("locality") != null){ |
|
311 |
locality = keywords.get("locality"); |
|
312 |
} |
|
313 |
|
|
314 |
//copyright |
|
315 |
if (copyright != null){ |
|
316 |
AgentBase<?> agent; |
|
317 |
if (copyright.equals("Botanic Garden and Botanical Museum Berlin-Dahlem (BGBM)")){ |
|
318 |
agent = Institution.NewNamedInstance(copyright); |
|
319 |
}else{ |
|
320 |
agent = getOrCreatePerson(copyright, fileName); |
|
321 |
} |
|
322 |
right = Rights.NewInstance(null, null, RightsType.COPYRIGHT()); |
|
323 |
right.setAgent(agent); |
|
324 |
right = deduplicationHelper.getExistingCopyright(right); |
|
325 |
} |
|
326 |
|
|
327 |
//created |
|
328 |
if (created != null){ |
|
329 |
DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss"); |
|
330 |
try { |
|
331 |
createdDate = f/*.withZone(DateTimeZone.forID("Europe/Athens"))*/.parseDateTime(created); |
|
332 |
} catch (Exception e) { |
|
333 |
logger.warn("Exception (" + e.getMessage() + ") when parsing create date " + created + " for file " + fileName); |
|
334 |
} |
|
335 |
} |
|
336 |
|
|
337 |
boolean force = !updateOnly || forceUpdate; |
|
338 |
//add to media |
|
339 |
if (artistAgent != null && (force || media.getArtist() == null)){ |
|
340 |
media.setArtist(artistAgent); |
|
341 |
} |
|
342 |
if (right != null && (force || media.getRights().isEmpty())){ |
|
343 |
media.removeRights(right); |
|
344 |
media.addRights(right); |
|
345 |
} |
|
346 |
if (createdDate != null && (force || media.getMediaCreated() == null)){ |
|
347 |
media.setMediaCreated(TimePeriod.NewInstance(createdDate)); |
|
348 |
} |
|
349 |
if (locality != null && (force || media.getDescription(Language.ENGLISH()) == null)){ |
|
350 |
media.putDescription(Language.ENGLISH(), locality); |
|
351 |
} |
|
352 |
} |
|
353 |
|
|
354 |
private void makeSize(ImageFile part, CdmImageInfo imageInfo) { |
|
355 |
//h |
|
356 |
Integer height = part.getHeight(); |
|
357 |
if (height == null || height != imageInfo.getHeight()){ |
|
358 |
part.setHeight(imageInfo.getHeight()); |
|
359 |
} |
|
360 |
//w |
|
361 |
Integer width = part.getWidth(); |
|
362 |
if (width == null || width != imageInfo.getWidth()){ |
|
363 |
part.setWidth(imageInfo.getWidth()); |
|
364 |
} |
|
365 |
//s |
|
366 |
Integer size = part.getSize(); |
|
367 |
if(size == null || size != imageInfo.getLength()){ |
|
368 |
part.setSize((int)imageInfo.getLength()); |
|
369 |
} |
|
370 |
} |
|
371 |
|
|
372 |
private Person getOrCreatePerson(String artist, String fileName) { |
|
373 |
artist = artist.trim(); |
|
374 |
String regEx = "((?:[A-Z](?:\\.|[a-z\\-\u00E4\u00F6\u00FC]+) ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)"; |
|
375 |
Matcher matcher = Pattern.compile(regEx).matcher(artist); |
|
376 |
Person person = Person.NewInstance(); |
|
377 |
if (matcher.matches()){ |
|
378 |
person.setGivenName(matcher.group(1).trim()); |
|
379 |
person.setFamilyName(matcher.group(2).trim()); |
|
380 |
}else{ |
|
381 |
person.setTitleCache(artist, true); |
|
382 |
logger.warn("Person could not be parsed: " + artist + " for file " + fileName); |
|
383 |
} |
|
384 |
|
|
385 |
person = deduplicationHelper.getExistingAuthor(person); |
|
386 |
return person; |
|
387 |
} |
|
388 |
|
|
389 |
private String removeQuots(String text) { |
|
390 |
if (text.startsWith("'") && text.endsWith("'")){ |
|
391 |
return text.substring(1, text.length() -1); |
|
392 |
}else{ |
|
393 |
return text; |
|
394 |
} |
|
395 |
} |
|
396 |
|
|
397 |
private void makeTextData(String fileStr, Media media, Taxon taxon) { |
|
398 |
TaxonDescription imageGallery = taxon.getImageGallery(true); |
|
399 |
TextData textData = null; |
|
400 |
if (!imageGallery.getElements().isEmpty()){ |
|
401 |
DescriptionElementBase el = imageGallery.getElements().iterator().next(); |
|
402 |
if (el.isInstanceOf(TextData.class)){ |
|
403 |
textData = CdmBase.deproxy(el, TextData.class); |
|
404 |
}else{ |
|
405 |
logger.warn("Image gallery had non-textdata description element: " + fileStr); |
|
406 |
} |
|
407 |
} |
|
408 |
if (textData == null){ |
|
409 |
textData = TextData.NewInstance(); |
|
410 |
textData.setFeature(Feature.IMAGE()); |
|
411 |
} |
|
412 |
imageGallery.addElement(textData); |
|
413 |
textData.addMedia(media); |
|
414 |
} |
|
415 |
|
|
416 |
private void fillMediaWithAllRepresentations(Media media, String fullPath, String mediumPath, String smallPath, String oldFullPath){ |
|
417 |
Set<MediaRepresentation> existingRepresentations = new HashSet<>(media.getRepresentations()); |
|
418 |
makeMediaRepresentation(oldFullPath, media, existingRepresentations, fullPath); |
|
419 |
makeMediaRepresentation(mediumPath, media, existingRepresentations, null); |
|
420 |
makeMediaRepresentation(smallPath, media, existingRepresentations, null); |
|
421 |
if(!existingRepresentations.isEmpty()){ |
|
422 |
logger.warn("Media contains existing representations which are not contained in the 3 paths: " + media.getTitleCache()); |
|
423 |
} |
|
424 |
} |
|
425 |
|
|
426 |
private void makeMediaRepresentation(String uriString, Media media, |
|
427 |
Set<MediaRepresentation> existingRepresentations, String replaceUri) { |
|
428 |
MediaRepresentation existingMediaRep = getExistingMediaRepresentation(uriString, existingRepresentations); |
|
429 |
boolean readMediaData = true; |
|
430 |
MediaRepresentation newMediaRep = makeMediaRepresentation(replaceUri != null? replaceUri : uriString, readMediaData); |
|
431 |
if (existingMediaRep == null){ |
|
432 |
media.addRepresentation(newMediaRep); |
|
433 |
}else{ |
|
434 |
existingRepresentations.remove(existingMediaRep); |
|
435 |
mergeToExistingRepresentation(existingMediaRep, newMediaRep); |
|
436 |
} |
|
437 |
} |
|
438 |
|
|
439 |
private void mergeToExistingRepresentation(MediaRepresentation existingMediaRep, MediaRepresentation newMediaRep) { |
|
440 |
existingMediaRep.setMimeType(newMediaRep.getMimeType()); |
|
441 |
existingMediaRep.setSuffix(newMediaRep.getSuffix()); |
|
442 |
if(!existingMediaRep.getParts().isEmpty() && !newMediaRep.getParts().isEmpty()){ |
|
443 |
MediaRepresentationPart existingPart = existingMediaRep.getParts().iterator().next(); |
|
444 |
ImageFile newPart = (ImageFile)newMediaRep.getParts().iterator().next(); |
|
445 |
if(existingPart.isInstanceOf(ImageFile.class)){ |
|
446 |
ImageFile existingImage = CdmBase.deproxy(existingPart, ImageFile.class); |
|
447 |
existingImage.setHeight(newPart.getHeight()); |
|
448 |
existingImage.setWidth(newPart.getWidth()); |
|
449 |
}else{ |
|
450 |
logger.warn("MediaRepresentationPart was not of type ImageFile. Height and width not merged: " + existingPart.getUri()); |
|
451 |
} |
|
452 |
existingPart.setSize(newPart.getSize()); |
|
453 |
existingPart.setUri(newPart.getUri()); |
|
454 |
} |
|
455 |
} |
|
456 |
|
|
457 |
private MediaRepresentation getExistingMediaRepresentation(String uriString, |
|
458 |
Set<MediaRepresentation> existingRepresentations) { |
|
459 |
for (MediaRepresentation rep : existingRepresentations){ |
|
460 |
for (MediaRepresentationPart part : rep.getParts()){ |
|
461 |
if (part.getUri() != null && part.getUri().toString().equals(uriString)){ |
|
462 |
return rep; |
|
463 |
} |
|
464 |
} |
|
465 |
} |
|
466 |
return null; |
|
467 |
} |
|
468 |
|
|
469 |
/** |
|
470 |
* Creates |
|
471 |
* @see #READ_MEDIA_DATA |
|
472 |
* @return |
|
473 |
* @throws MalformedURLException |
|
474 |
*/ |
|
475 |
protected Media getImageMedia(String uriString, String uriStrThumb, boolean readMediaData) throws MalformedURLException { |
|
476 |
if( uriString == null){ |
|
477 |
return null; |
|
478 |
} else { |
|
479 |
uriString = uriString.replace(" ", "%20"); //replace whitespace |
|
480 |
try { |
|
481 |
MediaRepresentation representation = makeMediaRepresentation(uriString, readMediaData); |
|
482 |
Media media = Media.NewInstance(); |
|
483 |
media.addRepresentation(representation); |
|
484 |
|
|
485 |
if (uriStrThumb != null){ |
|
486 |
CdmImageInfo imageInfoThumb = null; |
|
487 |
uriStrThumb = uriStrThumb.replace(" ", "%20"); //replace whitespace |
|
488 |
URI uriThumb = new URI(uriStrThumb); |
|
489 |
try { |
|
490 |
if (readMediaData){ |
|
491 |
logger.info("Read media data from: " + uriThumb); |
|
492 |
// //imageInfoThumb = CdmImageInfo.NewInstance(uriThumb, 0); |
|
493 |
imageInfoThumb = MediaInfoFileReader.legacyFactoryMethod(uriThumb) |
|
494 |
.readBaseInfo() |
|
495 |
.getCdmImageInfo(); |
|
496 |
} |
|
497 |
} catch (Exception e) { |
|
498 |
String message = "An error occurred when trying to read image meta data for " + uriThumb.toString() + ": " + e.getMessage(); |
|
499 |
logger.warn(message); |
|
500 |
} |
|
501 |
|
|
502 |
ImageFile imageFileFhumb = ImageFile.NewInstance(uriThumb, null, imageInfoThumb); |
|
503 |
MediaRepresentation reprThumb = MediaRepresentation.NewInstance(); |
|
504 |
if(imageInfoThumb != null){ |
|
505 |
reprThumb.setMimeType(imageInfoThumb.getMimeType()); |
|
506 |
reprThumb.setSuffix(imageInfoThumb.getSuffix()); |
|
507 |
} |
|
508 |
reprThumb.addRepresentationPart(imageFileFhumb); |
|
509 |
media.addRepresentation(reprThumb); |
|
510 |
} |
|
511 |
|
|
512 |
return media; |
|
513 |
} catch (URISyntaxException e1) { |
|
514 |
String message = "An URISyntaxException occurred when trying to create uri from multimedia objcet string: " + uriString; |
|
515 |
logger.warn(message); |
|
516 |
return null; |
|
517 |
} |
|
518 |
} |
|
519 |
} |
|
520 |
|
|
521 |
private MediaRepresentation makeMediaRepresentation(String uriString, boolean readMediaData) { |
|
522 |
|
|
523 |
uriString = uriString.replace(" ", "%20"); //replace whitespace |
|
524 |
CdmImageInfo imageInfo = null; |
|
525 |
URI uri; |
|
526 |
try { |
|
527 |
uri = new URI(uriString); |
|
528 |
} catch (URISyntaxException e1) { |
|
529 |
logger.error("Malformed URI. Could not create media representation: " + uriString); |
|
530 |
return null; |
|
531 |
} |
|
532 |
try { |
|
533 |
if (readMediaData){ |
|
534 |
logger.info("Read media data from: " + uri); |
|
535 |
//imageInfo = CdmImageInfo.NewInstance(uri, 0); |
|
536 |
imageInfo = MediaInfoFileReader.legacyFactoryMethod(uri) |
|
537 |
.readBaseInfo() |
|
538 |
.getCdmImageInfo(); |
|
539 |
} |
|
540 |
} catch (Exception e) { |
|
541 |
try { |
|
542 |
//try again |
|
543 |
//imageInfo = CdmImageInfo.NewInstance(uri, 0); |
|
544 |
imageInfo = MediaInfoFileReader.legacyFactoryMethod(uri) |
|
545 |
.readBaseInfo() |
|
546 |
.getCdmImageInfo(); |
|
547 |
} catch (Exception e1) { |
|
548 |
String message = "An error occurred when trying to read image meta data for " + uri.toString() + ": " + e1.getMessage(); |
|
549 |
e1.printStackTrace(); |
|
550 |
logger.warn(message); |
|
551 |
} |
|
552 |
} |
|
553 |
ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo); |
|
554 |
|
|
555 |
MediaRepresentation representation = MediaRepresentation.NewInstance(); |
|
556 |
|
|
557 |
if(imageInfo != null){ |
|
558 |
representation.setMimeType(imageInfo.getMimeType()); |
|
559 |
representation.setSuffix(imageInfo.getSuffix()); |
|
560 |
} |
|
561 |
representation.addRepresentationPart(imageFile); |
|
562 |
return representation; |
|
563 |
} |
|
564 |
|
|
565 |
private Map<String, Media> getAllExistingUrls(Taxon taxon) { |
|
566 |
Map<String, Media> result = new HashMap<>(); |
|
567 |
Set<TaxonDescription> descriptions = taxon.getDescriptions(); |
|
568 |
for (TaxonDescription td : descriptions){ |
|
569 |
if (td.isImageGallery()){ |
|
570 |
for (DescriptionElementBase deb : td.getElements()){ |
|
571 |
if (deb.isInstanceOf(TextData.class)){ |
|
572 |
TextData textData = CdmBase.deproxy(deb, TextData.class); |
|
573 |
for (Media media :textData.getMedia()){ |
|
574 |
for (MediaRepresentation rep : media.getRepresentations()){ |
|
575 |
for (MediaRepresentationPart part : rep.getParts()){ |
|
576 |
URI uri = part.getUri(); |
|
577 |
if (uri != null){ |
|
578 |
String uriStr = uri.toString(); |
|
579 |
result.put(uriStr, media); |
|
580 |
} |
|
581 |
} |
|
582 |
} |
|
583 |
} |
|
584 |
} |
|
585 |
} |
|
586 |
} |
|
587 |
} |
|
588 |
return result; |
|
589 |
} |
|
590 |
|
|
591 |
private Taxon getAcceptedTaxon(CdmApplicationController app, String taxonNameStr) { |
|
592 |
|
|
593 |
MatchingTaxonConfigurator config = new MatchingTaxonConfigurator(); |
|
594 |
taxonNameStr = adaptName(taxonNameStr); |
|
595 |
config.setTaxonNameTitle(taxonNameStr); |
|
596 |
config.setIncludeSynonyms(false); |
|
597 |
List<TaxonBase> list = app.getTaxonService().findTaxaByName(config); |
|
598 |
if (list.isEmpty()){ |
|
599 |
// logger.warn("Taxon not found for media: " + taxonNameStr); |
|
600 |
taxonNameStr = taxonNameStr.replaceFirst(" ", " " + UTF8.HYBRID.toString()); |
|
601 |
config.setTaxonNameTitle(taxonNameStr); |
|
602 |
list = app.getTaxonService().findTaxaByName(config); |
|
603 |
if (list.isEmpty()){ |
|
604 |
return null; |
|
605 |
}else if (list.size() > 1){ |
|
606 |
logger.warn("After searching for hybrids more than 1 taxon was foung: " + taxonNameStr); |
|
607 |
} |
|
608 |
} |
|
609 |
if (list.size()>1){ |
|
610 |
Iterator<TaxonBase> it = list.iterator(); |
|
611 |
while (it.hasNext()){ |
|
612 |
Taxon next = (Taxon)it.next(); |
|
613 |
if (next.getTaxonNodes().isEmpty() && !next.getTaxaForMisappliedName(true).isEmpty()){ |
|
614 |
it.remove(); |
|
615 |
} |
|
616 |
} |
|
617 |
if (list.size()>1){ |
|
618 |
logger.warn("More than 1 taxon found for media: " + taxonNameStr + " . Will now try to use only taxon with taxon node."); |
|
619 |
it = list.iterator(); |
|
620 |
while (it.hasNext()){ |
|
621 |
Taxon next = (Taxon)it.next(); |
|
622 |
if (next.getTaxonNodes().isEmpty()){ |
|
623 |
it.remove(); |
|
624 |
} |
|
625 |
} |
|
626 |
if (list.size()>1){ |
|
627 |
logger.warn("Still more than 1 taxon found for media: " + taxonNameStr); |
|
628 |
}else if (list.size() < 1){ |
|
629 |
logger.warn("After removing nodeless taxa no taxon was left: " + taxonNameStr); |
|
630 |
return null; |
|
631 |
} |
|
632 |
}else if (list.size() < 1){ |
|
633 |
logger.warn("After removing misapplications no taxon was left: " + taxonNameStr); |
|
634 |
return null; |
|
635 |
} |
|
636 |
} |
|
637 |
TaxonBase<?> taxonBase = list.get(0); |
|
638 |
Taxon result; |
|
639 |
if (taxonBase.isInstanceOf(Synonym.class)){ |
|
640 |
result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon(); |
|
641 |
}else{ |
|
642 |
result = CdmBase.deproxy(taxonBase, Taxon.class); |
|
643 |
} |
|
644 |
return result; |
|
645 |
} |
|
646 |
|
|
647 |
private String adaptName(String taxonNameStr) { |
|
648 |
// if (taxonNameStr.equals("Hypericum cerastoides")){ |
|
649 |
// taxonNameStr = "Hypericum cerastioides"; |
|
650 |
// } |
|
651 |
return taxonNameStr; |
|
652 |
} |
|
653 |
|
|
654 |
private void test(){ |
|
655 |
File f = new File(path); |
|
656 |
String[] list = f.list(); |
|
657 |
List<String> fullFileNames = new ArrayList<>(); |
|
658 |
for (String fileName : list){ |
|
659 |
fullFileNames.add(path + fileName); |
|
660 |
if (! fileName.matches("([A-Z][a-z]+_[a-z\\-]{3,}(?:_s_[a-z\\-]{3,})?)_[A-F]\\d{1,2}\\.(jpg|JPG)")){ |
|
661 |
System.out.println(fileName); |
|
662 |
} |
|
663 |
} |
|
664 |
} |
|
665 |
|
|
666 |
private void updateMetadata(ICdmDataSource cdmDestination){ |
|
667 |
CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, DbSchemaValidation.VALIDATE); |
|
668 |
TransactionStatus tx = app.startTransaction(); |
|
669 |
|
|
670 |
deduplicationHelper = ImportDeduplicationHelper.NewInstance(app, null); |
|
671 |
|
|
672 |
List<Media> list = app.getMediaService().list(Media.class, null, null, null, null); |
|
673 |
for (Media media : list){ |
|
674 |
handleSingleMediaUpdate(media); |
|
675 |
} |
|
676 |
|
|
677 |
if (testOnly){ |
|
678 |
tx.setRollbackOnly(); |
|
679 |
} |
|
680 |
app.commitTransaction(tx); |
|
681 |
} |
|
682 |
|
|
683 |
private void handleSingleMediaUpdate(Media media){ |
|
684 |
ImageFile part = getUrlStringForMedia(media); |
|
685 |
if (part == null || part.getUri() == null){ |
|
686 |
logger.warn("No uri found for media (id = " + media.getId() + ")"); |
|
687 |
return; |
|
688 |
} |
|
689 |
String url = part.getUri().toString(); |
|
690 |
if (url.startsWith(newUrlPath)){ |
|
691 |
String fileName = url.replace(newUrlPath, "").replace("&mo=file", ""); |
|
692 |
makeMetaData(media, fileName, part, true, sizeOnly); |
|
693 |
makeTitle(media, fileName, true); |
|
694 |
System.out.println(fileName); |
|
695 |
}else{ |
|
696 |
logger.warn("URL does not start with standard url path: " + url); |
|
697 |
} |
|
698 |
} |
|
699 |
|
|
700 |
private ImageFile getUrlStringForMedia(Media media) { |
|
701 |
ImageFile result = null; |
|
702 |
for (MediaRepresentation rep : media.getRepresentations()){ |
|
703 |
for (MediaRepresentationPart part : rep.getParts()){ |
|
704 |
URI uri = part.getUri(); |
|
705 |
if (uri != null){ |
|
706 |
if (result != null){ |
|
707 |
//TODO this still needs to be adapted to the 3 representations of media |
|
708 |
logger.warn("More than 1 uri exists for media "+ media.getId()); |
|
709 |
}else if (!part.isInstanceOf(ImageFile.class)){ |
|
710 |
logger.warn("MediaRepresentationPart is not an ImageFile: " + uri); |
|
711 |
}else{ |
|
712 |
result = CdmBase.deproxy(part, ImageFile.class); |
|
713 |
} |
|
714 |
} |
|
715 |
} |
|
716 |
} |
|
717 |
return result; |
|
718 |
} |
|
719 |
|
|
720 |
public static void main(String[] args) { |
|
721 |
CichorieaImageUpdateActivator me = new CichorieaImageUpdateActivator(); |
|
722 |
if (update_notCreate){ |
|
723 |
me.updateMetadata(cdmDestination); |
|
724 |
}else{ |
|
725 |
me.doImport(cdmDestination); |
|
726 |
} |
|
727 |
// me.test(); |
|
728 |
System.exit(0); |
|
729 |
} |
|
730 |
} |
Also available in: Unified diff
Add CichorieaImageUpdateActivator