1
|
/**
|
2
|
* Copyright (C) 2016 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.greece;
|
10
|
|
11
|
import java.io.File;
|
12
|
import java.io.IOException;
|
13
|
import java.net.URI;
|
14
|
import java.util.ArrayList;
|
15
|
import java.util.List;
|
16
|
|
17
|
import org.apache.log4j.Logger;
|
18
|
import org.apache.sanselan.ImageReadException;
|
19
|
import org.apache.sanselan.Sanselan;
|
20
|
import org.apache.sanselan.common.IImageMetadata;
|
21
|
import org.apache.sanselan.common.ImageMetadata.Item;
|
22
|
import org.joda.time.DateTime;
|
23
|
import org.joda.time.DateTimeZone;
|
24
|
import org.joda.time.format.DateTimeFormat;
|
25
|
import org.joda.time.format.DateTimeFormatter;
|
26
|
import org.springframework.stereotype.Component;
|
27
|
import org.springframework.transaction.TransactionStatus;
|
28
|
|
29
|
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
|
30
|
import eu.etaxonomy.cdm.io.common.CdmImportBase;
|
31
|
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
|
32
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
33
|
import eu.etaxonomy.cdm.model.agent.Person;
|
34
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
35
|
import eu.etaxonomy.cdm.model.common.Language;
|
36
|
import eu.etaxonomy.cdm.model.description.Feature;
|
37
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
38
|
import eu.etaxonomy.cdm.model.description.TextData;
|
39
|
import eu.etaxonomy.cdm.model.media.Media;
|
40
|
import eu.etaxonomy.cdm.model.media.Rights;
|
41
|
import eu.etaxonomy.cdm.model.media.RightsType;
|
42
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
43
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
44
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
45
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
46
|
/**
|
47
|
* Import for the Flora Hellenica images.
|
48
|
*
|
49
|
* @author a.mueller
|
50
|
* @since 03.04.2017
|
51
|
*/
|
52
|
|
53
|
@Component
|
54
|
public class FloraHellenicaImageImport<CONFIG extends FloraHellenicaImportConfigurator>
|
55
|
extends CdmImportBase<CONFIG,SimpleExcelTaxonImportState<CONFIG>>{
|
56
|
|
57
|
private static final long serialVersionUID = 7118028793298922703L;
|
58
|
private static final Logger logger = Logger.getLogger(FloraHellenicaImageImport.class);
|
59
|
|
60
|
private static final String BASE_URL = "https://media.e-taxonomy.eu/flora-greece/";
|
61
|
private static final String IMAGE_FOLDER = "////BGBM-PESIHPC/Greece/thumbs/";
|
62
|
|
63
|
@SuppressWarnings("unchecked")
|
64
|
private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper = (ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>>)ImportDeduplicationHelper.NewInstance(this);
|
65
|
|
66
|
/**
|
67
|
* {@inheritDoc}
|
68
|
*/
|
69
|
@Override
|
70
|
protected void doInvoke(SimpleExcelTaxonImportState<CONFIG> state) {
|
71
|
TransactionStatus tx = this.startTransaction();
|
72
|
for (int plate = 1; plate < 22 ; plate++){
|
73
|
try {
|
74
|
handleSinglePlate(state, plate);
|
75
|
} catch (Exception e) {
|
76
|
logger.error("Error when handling plate " + plate);
|
77
|
e.printStackTrace();
|
78
|
}
|
79
|
}
|
80
|
this.commitTransaction(tx);
|
81
|
}
|
82
|
|
83
|
/**
|
84
|
* @param state
|
85
|
* @param plate
|
86
|
*/
|
87
|
private void handleSinglePlate(SimpleExcelTaxonImportState<CONFIG> state, int plate) {
|
88
|
String fill = plate < 10 ? "0" : "";
|
89
|
String plateStr = "Plate_" + fill + plate + "/";
|
90
|
String fullFolderUrl = BASE_URL + plateStr;
|
91
|
String fullThumbUrl = BASE_URL + "thumbs/" + plateStr;
|
92
|
String folderStr = IMAGE_FOLDER + plateStr;
|
93
|
File file = new File(folderStr);
|
94
|
String[] list = file.list();
|
95
|
for (String fileStr : list){
|
96
|
try {
|
97
|
handleSingleFile(state, fullFolderUrl, fullThumbUrl, fileStr, plate);
|
98
|
} catch (Exception e) {
|
99
|
logger.error("Error when handling file: " + fileStr + " in plate " + plate);
|
100
|
e.printStackTrace();
|
101
|
}
|
102
|
}
|
103
|
}
|
104
|
|
105
|
/**
|
106
|
* @param state
|
107
|
* @param fullFolderUrl
|
108
|
* @param fullThumbUrl
|
109
|
* @param fileStr
|
110
|
* @param plate
|
111
|
*/
|
112
|
private void handleSingleFile(SimpleExcelTaxonImportState<CONFIG> state,
|
113
|
String fullFolderUrl, String fullThumbUrl, String fileStr, int plate) {
|
114
|
String[] taxonNameAndArtist = getTaxonName(fileStr);
|
115
|
String taxonNameStr = taxonNameAndArtist[0];
|
116
|
String taxonNameStr2 = null;
|
117
|
String artistStr = taxonNameAndArtist[1];
|
118
|
if (fileStr.equals("RamondaSerbica(L)+Nathaliae(R)1.jpg")){
|
119
|
taxonNameStr = "Ramonda serbica";
|
120
|
taxonNameStr2 = "Ramonda nathaliae";
|
121
|
}else if (fileStr.contains("HypericumCerastioides")){
|
122
|
taxonNameStr = taxonNameStr.replace("HypericumCerastoides", "HypericumCerastioides");
|
123
|
}else if (fileStr.contains("StachysScardica")){
|
124
|
taxonNameStr = taxonNameStr.replace("StachysScardica", "BetonicaScardica");
|
125
|
}else if (fileStr.contains("OleaEuropaeaOleaster ")){
|
126
|
taxonNameStr = taxonNameStr.replace("OleaEuropaeaOleaster", "OleaEuropaeaEuropaea");
|
127
|
}
|
128
|
|
129
|
try {
|
130
|
|
131
|
Media media = getImageMedia(fullFolderUrl + fileStr, fullThumbUrl + fileStr, true);
|
132
|
|
133
|
//image metadata
|
134
|
URI uri = URI.create(fullThumbUrl + fileStr);
|
135
|
try{
|
136
|
IImageMetadata metadata = Sanselan.getMetadata(uri.toURL().openStream(), null);
|
137
|
ArrayList<?> items = metadata.getItems();
|
138
|
for (Object object : items){
|
139
|
Item item = (Item) object;
|
140
|
// System.out.println(item.getKeyword() + ": " + item.getText());
|
141
|
String keyword = item.getKeyword().toLowerCase();
|
142
|
String value = removeQuots(item.getText());
|
143
|
if("image description".equals(keyword)){
|
144
|
media.putDescription(Language.DEFAULT(), value);
|
145
|
}else if ("artist".equals(keyword)){
|
146
|
if (isNotBlank(artistStr) && ! value.contains(artistStr)){
|
147
|
logger.warn("Artist and artistStr are different: " + artistStr + "; " + value);
|
148
|
}
|
149
|
artistStr = value;
|
150
|
}else if ("date time original".equalsIgnoreCase(item.getKeyword())){
|
151
|
DateTimeFormatter f = DateTimeFormat.forPattern("yyyy:MM:dd HH:mm:ss");
|
152
|
DateTime created = f.withZone(DateTimeZone.forID("Europe/Athens")).parseDateTime(value);
|
153
|
media.setMediaCreated(created);
|
154
|
}
|
155
|
}
|
156
|
} catch (ImageReadException | IOException e1) {
|
157
|
e1.printStackTrace();
|
158
|
}
|
159
|
if (isNotBlank(artistStr)){
|
160
|
Person person = Person.NewInstance();
|
161
|
String[] split = artistStr.split("\\+");
|
162
|
if (split.length == 1){
|
163
|
person.setFamilyName(artistStr);
|
164
|
}else if (split.length == 2){
|
165
|
person.setGivenName(split[0]);
|
166
|
person.setFamilyName(split[1]);
|
167
|
}else{
|
168
|
person.setTitleCache("artistStr", true);
|
169
|
}
|
170
|
person = (Person)deduplicationHelper.getExistingAuthor(state, person);
|
171
|
|
172
|
media.setArtist(person);
|
173
|
//copyright
|
174
|
Rights right = Rights.NewInstance();
|
175
|
right.setType(RightsType.COPYRIGHT());
|
176
|
right.setAgent(person);
|
177
|
right = deduplicationHelper.getExistingCopyright(state, right);
|
178
|
media.addRights(right);
|
179
|
}
|
180
|
|
181
|
String detail = "p. " + FloraHellenicaImageCaptionImport.startPage + 1 + plate *2;
|
182
|
media.addPrimaryMediaSource(getSecReference(state), detail);
|
183
|
|
184
|
|
185
|
Taxon taxon = getAcceptedTaxon(taxonNameStr);
|
186
|
makeTextData(fileStr, media, taxon);
|
187
|
if (taxonNameStr2 != null){
|
188
|
Taxon taxon2 = getAcceptedTaxon(taxonNameStr);
|
189
|
makeTextData(fileStr, media, taxon2);
|
190
|
}
|
191
|
|
192
|
|
193
|
if (taxonNameStr2 == null){
|
194
|
media.putTitle(Language.LATIN(), taxon == null ? taxonNameStr :
|
195
|
taxon.getName().getTitleCache());
|
196
|
}else{
|
197
|
media.putTitle(Language.LATIN(), "Ramonda serbica(L) + R. nathaliae(R)");
|
198
|
}
|
199
|
|
200
|
|
201
|
} catch (Exception e) {
|
202
|
e.printStackTrace();
|
203
|
return;
|
204
|
}
|
205
|
}
|
206
|
|
207
|
private String removeQuots(String text) {
|
208
|
if (text.startsWith("'") && text.endsWith("'")){
|
209
|
return text.substring(1, text.length() -1);
|
210
|
}else{
|
211
|
return text;
|
212
|
}
|
213
|
}
|
214
|
|
215
|
private Reference secReference;
|
216
|
private Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state) {
|
217
|
if (secReference != null){
|
218
|
secReference = getReferenceService().find(state.getConfig().getSecReference().getUuid());
|
219
|
}
|
220
|
return secReference;
|
221
|
}
|
222
|
|
223
|
|
224
|
/**
|
225
|
* Gets the image gallery, creates
|
226
|
*/
|
227
|
private void makeTextData(String fileStr, Media media, Taxon taxon) {
|
228
|
if (taxon == null){
|
229
|
logger.warn("Taxon not found for image " + fileStr + "."
|
230
|
+ "Media could not be attached to taxon.");
|
231
|
getMediaService().saveOrUpdate(media);
|
232
|
return;
|
233
|
}
|
234
|
TaxonDescription imageGallery = taxon.getImageGallery(true);
|
235
|
TextData textData;
|
236
|
if (imageGallery.getElements().isEmpty()){
|
237
|
textData = TextData.NewInstance();
|
238
|
textData.setFeature(Feature.IMAGE());
|
239
|
}else{
|
240
|
textData = CdmBase.deproxy(imageGallery.getElements().iterator().next(), TextData.class);
|
241
|
}
|
242
|
imageGallery.addElement(textData);
|
243
|
textData.addMedia(media);
|
244
|
}
|
245
|
|
246
|
/**
|
247
|
* @param taxonNameStr
|
248
|
* @return
|
249
|
*/
|
250
|
private Taxon getAcceptedTaxon(String taxonNameStr) {
|
251
|
|
252
|
MatchingTaxonConfigurator config = new MatchingTaxonConfigurator();
|
253
|
taxonNameStr = adaptName(taxonNameStr);
|
254
|
config.setTaxonNameTitle(taxonNameStr);
|
255
|
config.setIncludeSynonyms(true);
|
256
|
List<TaxonBase> list = getTaxonService().findTaxaByName(config);
|
257
|
if (list.isEmpty()){
|
258
|
logger.warn("Taxon not found for media: " + taxonNameStr);
|
259
|
return null;
|
260
|
}else{
|
261
|
if (list.size()>1){
|
262
|
logger.warn("More than 1 taxon found for media: " + taxonNameStr);
|
263
|
}
|
264
|
TaxonBase<?> taxonBase = list.get(0);
|
265
|
Taxon result;
|
266
|
if (taxonBase.isInstanceOf(Synonym.class)){
|
267
|
result = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
|
268
|
}else{
|
269
|
result = CdmBase.deproxy(taxonBase, Taxon.class);
|
270
|
}
|
271
|
return result;
|
272
|
}
|
273
|
}
|
274
|
|
275
|
/**
|
276
|
* @param taxonNameStr
|
277
|
* @return
|
278
|
*/
|
279
|
private String adaptName(String taxonNameStr) {
|
280
|
if (taxonNameStr.equals("Hypericum cerastoides")){
|
281
|
taxonNameStr = "Hypericum cerastioides";
|
282
|
}
|
283
|
return taxonNameStr;
|
284
|
}
|
285
|
|
286
|
/**
|
287
|
* @param fileStr
|
288
|
* @return
|
289
|
*/
|
290
|
private String[] getTaxonName(String fileStr) {
|
291
|
String[] result = new String[2];
|
292
|
fileStr = fileStr.split("\\.")[0];
|
293
|
fileStr = fileStr.replaceAll("[0-9]", "");
|
294
|
String[] x = fileStr.split("_");
|
295
|
if (x.length == 2){
|
296
|
result[1] = x[1];
|
297
|
}
|
298
|
|
299
|
fileStr = splitCamelCase(x[0]);
|
300
|
String[] split = fileStr.split(" ");
|
301
|
String name = split[0] + " " + split[1].toLowerCase() +
|
302
|
(split.length > 2 ? " subsp. " + split[2].toLowerCase() : "");
|
303
|
result[0] = name;
|
304
|
System.out.println(result[0] + (result[1] != null ? " Artist: " + result[1]: ""));
|
305
|
return result;
|
306
|
}
|
307
|
|
308
|
//from http://stackoverflow.com/questions/2559759/how-do-i-convert-camelcase-into-human-readable-names-in-java
|
309
|
static String splitCamelCase(String s) {
|
310
|
return s.replaceAll(
|
311
|
String.format("%s",
|
312
|
// "(?<=[A-Z])(?=[A-Z][a-z])",
|
313
|
"(?<=[^A-Z])(?=[A-Z])"
|
314
|
// "(?<=[A-Za-z])(?=[^A-Za-z])"
|
315
|
),
|
316
|
" "
|
317
|
);
|
318
|
}
|
319
|
|
320
|
/**
|
321
|
* {@inheritDoc}
|
322
|
*/
|
323
|
@Override
|
324
|
protected boolean doCheck(SimpleExcelTaxonImportState<CONFIG> state) {
|
325
|
return false;
|
326
|
}
|
327
|
|
328
|
/**
|
329
|
* {@inheritDoc}
|
330
|
*/
|
331
|
@Override
|
332
|
protected boolean isIgnore(SimpleExcelTaxonImportState<CONFIG> state) {
|
333
|
return ! state.getConfig().isDoImages();
|
334
|
}
|
335
|
|
336
|
|
337
|
|
338
|
}
|