1
|
/**
|
2
|
* Copyright (C) 2017 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.media.in;
|
10
|
|
11
|
import java.net.URI;
|
12
|
import java.util.ArrayList;
|
13
|
import java.util.Arrays;
|
14
|
import java.util.HashMap;
|
15
|
import java.util.List;
|
16
|
import java.util.regex.Matcher;
|
17
|
import java.util.regex.Pattern;
|
18
|
|
19
|
import org.joda.time.DateTime;
|
20
|
import org.joda.time.DateTimeFieldType;
|
21
|
import org.joda.time.Partial;
|
22
|
import org.springframework.stereotype.Component;
|
23
|
|
24
|
import eu.etaxonomy.cdm.common.media.ImageInfo;
|
25
|
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
|
26
|
import eu.etaxonomy.cdm.io.excel.common.ExcelImportBase;
|
27
|
import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase;
|
28
|
import eu.etaxonomy.cdm.io.media.in.MediaExcelImportConfigurator.MediaTitleEnum;
|
29
|
import eu.etaxonomy.cdm.model.agent.AgentBase;
|
30
|
import eu.etaxonomy.cdm.model.agent.Person;
|
31
|
import eu.etaxonomy.cdm.model.common.Language;
|
32
|
import eu.etaxonomy.cdm.model.common.TimePeriod;
|
33
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
34
|
import eu.etaxonomy.cdm.model.description.TextData;
|
35
|
import eu.etaxonomy.cdm.model.media.ImageFile;
|
36
|
import eu.etaxonomy.cdm.model.media.Media;
|
37
|
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
|
38
|
import eu.etaxonomy.cdm.model.media.Rights;
|
39
|
import eu.etaxonomy.cdm.model.media.RightsType;
|
40
|
import eu.etaxonomy.cdm.model.name.TaxonName;
|
41
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
42
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
43
|
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
|
44
|
|
45
|
/**
|
46
|
* @author a.mueller
|
47
|
* @since 30.10.2017
|
48
|
*/
|
49
|
@Component
|
50
|
public class MediaExcelImport
|
51
|
extends ExcelImportBase<MediaExcelImportState, MediaExcelImportConfigurator, ExcelRowBase>{
|
52
|
|
53
|
private static final long serialVersionUID = -428449749189166794L;
|
54
|
|
55
|
private static final String COL_TAXON_UUID = "taxonUuid";
|
56
|
private static final String COL_NAME_CACHE = "nameCache";
|
57
|
private static final String COL_NAME_TITLE = "nameTitle";
|
58
|
private static final String COL_TAXON_TITLE = "taxonTitle";
|
59
|
private static final String COL_DESCRIPTION = "description";
|
60
|
private static final String COL_TITLE = "title";
|
61
|
private static final String COL_COPYRIGHT = "copyright";
|
62
|
private static final String COL_ARTIST = "artist";
|
63
|
private static final String COL_DATE = "date";
|
64
|
|
65
|
private ImportDeduplicationHelper<MediaExcelImportState> deduplicationHelper;
|
66
|
|
67
|
/**
|
68
|
* {@inheritDoc}
|
69
|
*/
|
70
|
@Override
|
71
|
protected void analyzeRecord(HashMap<String, String> record, MediaExcelImportState state) {
|
72
|
// do nothing
|
73
|
}
|
74
|
|
75
|
/**
|
76
|
* {@inheritDoc}
|
77
|
*/
|
78
|
@Override
|
79
|
protected void firstPass(MediaExcelImportState state) {
|
80
|
HashMap<String, String> record = state.getOriginalRecord();
|
81
|
String line = "row " + state.getCurrentLine() + ": ";
|
82
|
String linePure = "row " + state.getCurrentLine();
|
83
|
System.out.println(linePure);
|
84
|
|
85
|
//taxon
|
86
|
Taxon taxon = getTaxonByCdmId(state, COL_TAXON_UUID,
|
87
|
COL_NAME_CACHE, COL_NAME_TITLE, COL_TAXON_TITLE,
|
88
|
Taxon.class, linePure);
|
89
|
|
90
|
//media
|
91
|
Media media = Media.NewInstance();
|
92
|
|
93
|
//description
|
94
|
String description = record.get(COL_DESCRIPTION);
|
95
|
if (isNotBlank(description)){
|
96
|
Language descriptionLanguage = state.getConfig().getDescriptionLanguage();
|
97
|
descriptionLanguage = descriptionLanguage == null? Language.UNKNOWN_LANGUAGE(): descriptionLanguage;
|
98
|
media.putDescription(descriptionLanguage, description);
|
99
|
}
|
100
|
|
101
|
//title
|
102
|
String title = record.get(COL_TITLE);
|
103
|
if (isBlank(title)){
|
104
|
title = makeTitle(state, taxon, line);
|
105
|
}
|
106
|
if (isNotBlank(title)){
|
107
|
Language titleLanguage = state.getConfig().getTitleLanguage();
|
108
|
titleLanguage = titleLanguage == null? Language.UNKNOWN_LANGUAGE(): titleLanguage;
|
109
|
media.putTitle(titleLanguage, title);
|
110
|
}
|
111
|
|
112
|
//copyright
|
113
|
String copyright = record.get(COL_COPYRIGHT);
|
114
|
if (isNotBlank(copyright)){
|
115
|
AgentBase<?> agent = makePerson(state, copyright, line);
|
116
|
Rights right = Rights.NewInstance(RightsType.COPYRIGHT(), agent);
|
117
|
right = getDeduplicationHelper(state).getExistingCopyright(state, right);
|
118
|
media.addRights(right);
|
119
|
}
|
120
|
|
121
|
//artist
|
122
|
String artistStr = record.get(COL_ARTIST);
|
123
|
if (isNotBlank(artistStr)){
|
124
|
AgentBase<?> artist = makePerson(state, artistStr, line);
|
125
|
media.setArtist(artist);
|
126
|
}
|
127
|
|
128
|
//date
|
129
|
String dateStr = record.get(COL_DATE);
|
130
|
if (isNotBlank(artistStr)){
|
131
|
TimePeriod timePeriod = TimePeriodParser.parseString(dateStr);
|
132
|
if (timePeriod.getFreeText()!= null){
|
133
|
String message = "Date could not be parsed: %s";
|
134
|
message = String.format(message, dateStr);
|
135
|
state.getResult().addWarning(message, null, line);
|
136
|
}
|
137
|
if (timePeriod.getEnd() != null){
|
138
|
String message = "Date is a period with an end date. Periods are currently not yet supported: %s";
|
139
|
message = String.format(message, dateStr);
|
140
|
state.getResult().addWarning(message, null, line);
|
141
|
}
|
142
|
|
143
|
Partial start = timePeriod.getStart();
|
144
|
DateTime dateTime = toDateTime(state, start, dateStr, line);
|
145
|
media.setMediaCreated(dateTime);
|
146
|
}
|
147
|
|
148
|
//URLs
|
149
|
List<URI> uris = getUrls(state, line);
|
150
|
for (URI uri : uris){
|
151
|
handleUri(state, uri, media, line);
|
152
|
|
153
|
}
|
154
|
|
155
|
|
156
|
// for (URI baseUrl : state.getConfig().getBaseUrls()){
|
157
|
// if (!baseUrl.toString().endsWith("/")){
|
158
|
// baseUrl = URI.create(baseUrl.toString() + "/"); //is this always correct?
|
159
|
// }
|
160
|
// String url = baseUrl + fileName;
|
161
|
// readImage
|
162
|
// }
|
163
|
|
164
|
//source
|
165
|
String id = null;
|
166
|
String idNamespace = null;
|
167
|
Reference reference = getSourceReference(state);
|
168
|
media.addImportSource(id, idNamespace, reference, linePure);
|
169
|
|
170
|
if (taxon == null){
|
171
|
return;
|
172
|
}
|
173
|
|
174
|
String taxonTitle = taxon.getName() == null ? taxon.getTitleCache() :
|
175
|
isBlank(taxon.getName().getNameCache()) ? taxon.getName().getTitleCache():
|
176
|
taxon.getName().getNameCache();
|
177
|
TaxonDescription taxonDescription = taxon.getOrCreateImageGallery(taxonTitle);
|
178
|
TextData textData = taxonDescription.getOrCreateImageTextData();
|
179
|
textData.addMedia(media);
|
180
|
}
|
181
|
|
182
|
|
183
|
|
184
|
/**
|
185
|
* @param state
|
186
|
* @param taxon
|
187
|
* @param line
|
188
|
* @return
|
189
|
*/
|
190
|
private String makeTitle(MediaExcelImportState state, Taxon taxon, String line) {
|
191
|
MediaTitleEnum mediaTitleType = state.getConfig().getMediaTitle();
|
192
|
if (mediaTitleType == null || mediaTitleType == MediaTitleEnum.NONE){
|
193
|
return null;
|
194
|
}else if(mediaTitleType == MediaTitleEnum.FILE_NAME){
|
195
|
URI source = state.getConfig().getSource();
|
196
|
if (source != null){
|
197
|
String result = source.toString();
|
198
|
while (result.endsWith("/")){
|
199
|
result = result.substring(0, result.length() - 1);
|
200
|
}
|
201
|
while (result.contains("/")){
|
202
|
result = result.substring(result.lastIndexOf("/"));
|
203
|
}
|
204
|
return result;
|
205
|
}else{
|
206
|
mediaTitleType = MediaTitleEnum.NAME_TITLE_CACHE;
|
207
|
}
|
208
|
}
|
209
|
if (taxon == null){
|
210
|
return null;
|
211
|
}
|
212
|
if (taxon.getName() == null || mediaTitleType == MediaTitleEnum.TAXON_TITLE_CACHE){
|
213
|
return taxon.getTitleCache();
|
214
|
}else{
|
215
|
TaxonName name = taxon.getName();
|
216
|
if (mediaTitleType == MediaTitleEnum.NAME_TITLE_CACHE || isBlank(name.getNameCache())){
|
217
|
return name.getTitleCache();
|
218
|
}else{
|
219
|
return name.getNameCache();
|
220
|
}
|
221
|
}
|
222
|
}
|
223
|
|
224
|
/**
|
225
|
* @param start
|
226
|
* @return
|
227
|
*/
|
228
|
private DateTime toDateTime(MediaExcelImportState state, Partial partial, String dateStr, String line) {
|
229
|
if (partial == null){
|
230
|
return null;
|
231
|
}
|
232
|
List<DateTimeFieldType> typeList = Arrays.asList(partial.getFieldTypes());
|
233
|
if ( typeList.contains(DateTimeFieldType.year())
|
234
|
&& typeList.contains(DateTimeFieldType.monthOfYear())
|
235
|
&& typeList.contains(DateTimeFieldType.dayOfMonth())
|
236
|
){
|
237
|
DateTime result = partial.toDateTime(DateTime.now());
|
238
|
return result;
|
239
|
}else{
|
240
|
String message = "Date time does not include year, month and day information. Currently all these 3 parts are required: %s";
|
241
|
message = String.format(message, dateStr);
|
242
|
state.getResult().addWarning(message, null, line);
|
243
|
return null;
|
244
|
}
|
245
|
}
|
246
|
|
247
|
/**
|
248
|
* @param state
|
249
|
* @param uri
|
250
|
* @param media
|
251
|
* @param line
|
252
|
*/
|
253
|
private void handleUri(MediaExcelImportState state, URI uri, Media media, String line) {
|
254
|
ImageInfo imageInfo = null;
|
255
|
try {
|
256
|
if (state.getConfig().isReadMediaData()){
|
257
|
imageInfo = ImageInfo.NewInstance(uri, 0);
|
258
|
}
|
259
|
} catch (Exception e) {
|
260
|
String message = "An error occurred when trying to read image meta data for %s. Image was created but without metadata.";
|
261
|
message = String.format(message, uri.toString());
|
262
|
state.getResult().addException(e, message, null, line);
|
263
|
}
|
264
|
ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
|
265
|
|
266
|
MediaRepresentation representation = MediaRepresentation.NewInstance();
|
267
|
|
268
|
if(imageInfo != null){
|
269
|
representation.setMimeType(imageInfo.getMimeType());
|
270
|
representation.setSuffix(imageInfo.getSuffix());
|
271
|
}
|
272
|
representation.addRepresentationPart(imageFile);
|
273
|
media.addRepresentation(representation);
|
274
|
}
|
275
|
|
276
|
/**
|
277
|
* @param state
|
278
|
* @return
|
279
|
*/
|
280
|
private List<URI> getUrls(MediaExcelImportState state, String line) {
|
281
|
List<URI> list = new ArrayList<>();
|
282
|
HashMap<String, String> record = state.getOriginalRecord();
|
283
|
for (String str : record.keySet()){
|
284
|
if (str.equals("url") || str.matches("url_size\\d+") ){
|
285
|
String url = record.get(str);
|
286
|
try {
|
287
|
url = url.replace(" ", "%20"); //replace whitespace
|
288
|
URI uri = URI.create(url);
|
289
|
list.add(uri);
|
290
|
} catch (Exception e) {
|
291
|
String msg = "Incorrect url " + url;
|
292
|
state.getResult().addError(msg, e, null, line);
|
293
|
}
|
294
|
}
|
295
|
}
|
296
|
|
297
|
return list;
|
298
|
}
|
299
|
|
300
|
/**
|
301
|
* @param state
|
302
|
* @return
|
303
|
*/
|
304
|
private ImportDeduplicationHelper<MediaExcelImportState> getDeduplicationHelper(MediaExcelImportState state) {
|
305
|
if (this.deduplicationHelper == null){
|
306
|
this.deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state);
|
307
|
}
|
308
|
return deduplicationHelper;
|
309
|
}
|
310
|
|
311
|
private Person makePerson(MediaExcelImportState state, String artist, String line) {
|
312
|
Person person = Person.NewInstance();
|
313
|
artist = artist.trim();
|
314
|
|
315
|
String regExAbbrev = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
|
316
|
Matcher matcherAbbrev = Pattern.compile(regExAbbrev).matcher(artist);
|
317
|
|
318
|
String regExFull = "([A-Z][a-z\\-\u00E4\u00F6\u00FC]+\\s)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
|
319
|
Matcher matcherFull = Pattern.compile(regExFull).matcher(artist);
|
320
|
|
321
|
if (matcherAbbrev.matches()){
|
322
|
person.setGivenName(matcherAbbrev.group(1).trim());
|
323
|
person.setFamilyName(matcherAbbrev.group(2).trim());
|
324
|
}else if (matcherFull.matches()){
|
325
|
person.setGivenName(matcherFull.group(1).trim());
|
326
|
person.setFamilyName(matcherFull.group(2).trim());
|
327
|
}else{
|
328
|
person.setTitleCache(artist, true);
|
329
|
String message = "A name of a person can not be atomized: %s";
|
330
|
message = String.format(message, artist);
|
331
|
state.getResult().addWarning(message, null, line);
|
332
|
|
333
|
}
|
334
|
|
335
|
Person result = (Person)getDeduplicationHelper(state).getExistingAuthor(null, person);
|
336
|
return person;
|
337
|
}
|
338
|
|
339
|
/**
|
340
|
* {@inheritDoc}
|
341
|
*/
|
342
|
@Override
|
343
|
protected void secondPass(MediaExcelImportState state) {
|
344
|
// TODO Auto-generated method stub
|
345
|
|
346
|
}
|
347
|
|
348
|
/**
|
349
|
* {@inheritDoc}
|
350
|
*/
|
351
|
@Override
|
352
|
protected boolean isIgnore(MediaExcelImportState state) {
|
353
|
return false;
|
354
|
}
|
355
|
}
|