Project

General

Profile

Download (12.8 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.media.in;
10

    
11
import java.net.URI;
12
import java.util.ArrayList;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.List;
16
import java.util.regex.Matcher;
17
import java.util.regex.Pattern;
18

    
19
import org.joda.time.DateTime;
20
import org.joda.time.DateTimeFieldType;
21
import org.joda.time.Partial;
22
import org.springframework.stereotype.Component;
23

    
24
import eu.etaxonomy.cdm.common.media.ImageInfo;
25
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
26
import eu.etaxonomy.cdm.io.excel.common.ExcelImportBase;
27
import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase;
28
import eu.etaxonomy.cdm.io.media.in.MediaExcelImportConfigurator.MediaTitleEnum;
29
import eu.etaxonomy.cdm.model.agent.AgentBase;
30
import eu.etaxonomy.cdm.model.agent.Person;
31
import eu.etaxonomy.cdm.model.common.Language;
32
import eu.etaxonomy.cdm.model.common.TimePeriod;
33
import eu.etaxonomy.cdm.model.description.TaxonDescription;
34
import eu.etaxonomy.cdm.model.description.TextData;
35
import eu.etaxonomy.cdm.model.media.ImageFile;
36
import eu.etaxonomy.cdm.model.media.Media;
37
import eu.etaxonomy.cdm.model.media.MediaRepresentation;
38
import eu.etaxonomy.cdm.model.media.Rights;
39
import eu.etaxonomy.cdm.model.media.RightsType;
40
import eu.etaxonomy.cdm.model.name.TaxonName;
41
import eu.etaxonomy.cdm.model.reference.Reference;
42
import eu.etaxonomy.cdm.model.taxon.Taxon;
43
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
44

    
45
/**
46
 * @author a.mueller
47
 * @since 30.10.2017
48
 */
49
@Component
50
public class MediaExcelImport
51
        extends ExcelImportBase<MediaExcelImportState, MediaExcelImportConfigurator, ExcelRowBase>{
52

    
53
    private static final long serialVersionUID = -428449749189166794L;
54

    
55
    private static final String COL_TAXON_UUID = "taxonUuid";
56
    private static final String COL_NAME_CACHE = "nameCache";
57
    private static final String COL_NAME_TITLE = "nameTitle";
58
    private static final String COL_TAXON_TITLE = "taxonTitle";
59
    private static final String COL_DESCRIPTION = "description";
60
    private static final String COL_TITLE = "title";
61
    private static final String COL_COPYRIGHT = "copyright";
62
    private static final String COL_ARTIST = "artist";
63
    private static final String COL_DATE = "date";
64

    
65
    private ImportDeduplicationHelper<MediaExcelImportState> deduplicationHelper;
66

    
67
    /**
68
     * {@inheritDoc}
69
     */
70
    @Override
71
    protected void analyzeRecord(HashMap<String, String> record, MediaExcelImportState state) {
72
        // do nothing
73
    }
74

    
75
    /**
76
     * {@inheritDoc}
77
     */
78
    @Override
79
    protected void firstPass(MediaExcelImportState state) {
80
        HashMap<String, String> record = state.getOriginalRecord();
81
        String line = "row " + state.getCurrentLine() + ": ";
82
        String linePure = "row " + state.getCurrentLine();
83
        System.out.println(linePure);
84

    
85
        //taxon
86
        Taxon taxon = getTaxonByCdmId(state, COL_TAXON_UUID,
87
                COL_NAME_CACHE, COL_NAME_TITLE, COL_TAXON_TITLE,
88
                Taxon.class, linePure);
89

    
90
        //media
91
        Media media = Media.NewInstance();
92

    
93
        //description
94
        String description = record.get(COL_DESCRIPTION);
95
        if (isNotBlank(description)){
96
            Language descriptionLanguage = state.getConfig().getDescriptionLanguage();
97
            descriptionLanguage = descriptionLanguage == null? Language.UNKNOWN_LANGUAGE(): descriptionLanguage;
98
            media.putDescription(descriptionLanguage, description);
99
        }
100

    
101
        //title
102
        String title = record.get(COL_TITLE);
103
        if (isBlank(title)){
104
            title = makeTitle(state, taxon, line);
105
        }
106
        if (isNotBlank(title)){
107
            Language titleLanguage = state.getConfig().getTitleLanguage();
108
            titleLanguage = titleLanguage == null? Language.UNKNOWN_LANGUAGE(): titleLanguage;
109
            media.putTitle(titleLanguage, title);
110
        }
111

    
112
        //copyright
113
        String copyright = record.get(COL_COPYRIGHT);
114
        if (isNotBlank(copyright)){
115
            AgentBase<?> agent = makePerson(state, copyright, line);
116
            Rights right = Rights.NewInstance(RightsType.COPYRIGHT(), agent);
117
            right = getDeduplicationHelper(state).getExistingCopyright(state, right);
118
            media.addRights(right);
119
        }
120

    
121
        //artist
122
        String artistStr = record.get(COL_ARTIST);
123
        if (isNotBlank(artistStr)){
124
            AgentBase<?> artist = makePerson(state, artistStr, line);
125
            media.setArtist(artist);
126
        }
127

    
128
        //date
129
        String dateStr = record.get(COL_DATE);
130
        if (isNotBlank(artistStr)){
131
            TimePeriod timePeriod = TimePeriodParser.parseString(dateStr);
132
            if (timePeriod.getFreeText()!=  null){
133
                String message = "Date could not be parsed: %s";
134
                message = String.format(message, dateStr);
135
                state.getResult().addWarning(message, null, line);
136
            }
137
            if (timePeriod.getEnd() !=  null){
138
                String message = "Date is a period with an end date. Periods are currently not yet supported: %s";
139
                message = String.format(message, dateStr);
140
                state.getResult().addWarning(message, null, line);
141
            }
142

    
143
            Partial start = timePeriod.getStart();
144
            DateTime dateTime = toDateTime(state, start, dateStr, line);
145
            media.setMediaCreated(dateTime);
146
        }
147

    
148
        //URLs
149
        List<URI> uris = getUrls(state, line);
150
        for (URI uri : uris){
151
            handleUri(state, uri, media, line);
152

    
153
        }
154

    
155

    
156
//        for (URI baseUrl : state.getConfig().getBaseUrls()){
157
//            if (!baseUrl.toString().endsWith("/")){
158
//                baseUrl = URI.create(baseUrl.toString() +  "/"); //is this always correct?
159
//            }
160
//            String url = baseUrl + fileName;
161
//            readImage
162
//        }
163

    
164
        //source
165
        String id = null;
166
        String idNamespace = null;
167
        Reference reference = getSourceReference(state);
168
        media.addImportSource(id, idNamespace, reference, linePure);
169

    
170
        if (taxon == null){
171
            return;
172
        }
173

    
174
        String taxonTitle = taxon.getName() == null ? taxon.getTitleCache() :
175
            isBlank(taxon.getName().getNameCache()) ? taxon.getName().getTitleCache():
176
                taxon.getName().getNameCache();
177
        TaxonDescription taxonDescription = taxon.getOrCreateImageGallery(taxonTitle);
178
        TextData textData = taxonDescription.getOrCreateImageTextData();
179
        textData.addMedia(media);
180
    }
181

    
182

    
183

    
184
    /**
185
     * @param state
186
     * @param taxon
187
     * @param line
188
     * @return
189
     */
190
    private String makeTitle(MediaExcelImportState state, Taxon taxon, String line) {
191
        MediaTitleEnum mediaTitleType = state.getConfig().getMediaTitle();
192
        if (mediaTitleType == null || mediaTitleType == MediaTitleEnum.NONE){
193
            return null;
194
        }else if(mediaTitleType == MediaTitleEnum.FILE_NAME){
195
            URI source = state.getConfig().getSource();
196
            if (source != null){
197
                String result = source.toString();
198
                while (result.endsWith("/")){
199
                    result = result.substring(0, result.length() - 1);
200
                }
201
                while (result.contains("/")){
202
                    result = result.substring(result.lastIndexOf("/"));
203
                }
204
                return result;
205
            }else{
206
               mediaTitleType = MediaTitleEnum.NAME_TITLE_CACHE;
207
            }
208
        }
209
        if (taxon == null){
210
            return null;
211
        }
212
        if (taxon.getName() == null || mediaTitleType == MediaTitleEnum.TAXON_TITLE_CACHE){
213
            return taxon.getTitleCache();
214
        }else{
215
            TaxonName name = taxon.getName();
216
            if (mediaTitleType == MediaTitleEnum.NAME_TITLE_CACHE || isBlank(name.getNameCache())){
217
                return name.getTitleCache();
218
            }else{
219
                return name.getNameCache();
220
            }
221
        }
222
    }
223

    
224
    /**
225
     * @param start
226
     * @return
227
     */
228
    private DateTime toDateTime(MediaExcelImportState state, Partial partial, String dateStr, String line) {
229
        if (partial == null){
230
            return null;
231
        }
232
        List<DateTimeFieldType> typeList = Arrays.asList(partial.getFieldTypes());
233
        if ( typeList.contains(DateTimeFieldType.year())
234
                && typeList.contains(DateTimeFieldType.monthOfYear())
235
                && typeList.contains(DateTimeFieldType.dayOfMonth())
236
                ){
237
            DateTime result = partial.toDateTime(DateTime.now());
238
            return result;
239
        }else{
240
            String message = "Date time does not include year, month and day information. Currently all these 3 parts are required: %s";
241
            message = String.format(message, dateStr);
242
            state.getResult().addWarning(message, null, line);
243
            return null;
244
        }
245
    }
246

    
247
    /**
248
     * @param state
249
     * @param uri
250
     * @param media
251
     * @param line
252
     */
253
    private void handleUri(MediaExcelImportState state, URI uri, Media media, String line) {
254
            ImageInfo imageInfo = null;
255
            try {
256
                if (state.getConfig().isReadMediaData()){
257
                    imageInfo = ImageInfo.NewInstance(uri, 0);
258
                }
259
            } catch (Exception e) {
260
                String message = "An error occurred when trying to read image meta data for %s. Image was created but without metadata.";
261
                message = String.format(message, uri.toString());
262
                state.getResult().addException(e, message, null, line);
263
            }
264
            ImageFile imageFile = ImageFile.NewInstance(uri, null, imageInfo);
265

    
266
            MediaRepresentation representation = MediaRepresentation.NewInstance();
267

    
268
            if(imageInfo != null){
269
                representation.setMimeType(imageInfo.getMimeType());
270
                representation.setSuffix(imageInfo.getSuffix());
271
            }
272
            representation.addRepresentationPart(imageFile);
273
            media.addRepresentation(representation);
274
    }
275

    
276
    /**
277
     * @param state
278
     * @return
279
     */
280
    private List<URI> getUrls(MediaExcelImportState state, String line) {
281
        List<URI> list = new ArrayList<>();
282
        HashMap<String, String> record = state.getOriginalRecord();
283
        for (String str : record.keySet()){
284
            if (str.equals("url") || str.matches("url_size\\d+") ){
285
                String url = record.get(str);
286
                try {
287
                    url = url.replace(" ", "%20");  //replace whitespace
288
                    URI uri = URI.create(url);
289
                    list.add(uri);
290
                } catch (Exception e) {
291
                    String msg = "Incorrect url " + url;
292
                    state.getResult().addError(msg, e, null, line);
293
                }
294
            }
295
        }
296

    
297
        return list;
298
    }
299

    
300
    /**
301
     * @param state
302
     * @return
303
     */
304
    private ImportDeduplicationHelper<MediaExcelImportState> getDeduplicationHelper(MediaExcelImportState state) {
305
        if (this.deduplicationHelper == null){
306
            this.deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state);
307
        }
308
        return deduplicationHelper;
309
    }
310

    
311
    private Person makePerson(MediaExcelImportState state, String artist, String line) {
312
        Person person = Person.NewInstance();
313
        artist = artist.trim();
314

    
315
        String regExAbbrev = "((?:[A-Z]\\. ?)+)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
316
        Matcher matcherAbbrev = Pattern.compile(regExAbbrev).matcher(artist);
317

    
318
        String regExFull = "([A-Z][a-z\\-\u00E4\u00F6\u00FC]+\\s)([A-Z][a-z\\-\u00E4\u00F6\u00FC]+)";
319
        Matcher matcherFull = Pattern.compile(regExFull).matcher(artist);
320

    
321
        if (matcherAbbrev.matches()){
322
            person.setGivenName(matcherAbbrev.group(1).trim());
323
            person.setFamilyName(matcherAbbrev.group(2).trim());
324
        }else if (matcherFull.matches()){
325
            person.setGivenName(matcherFull.group(1).trim());
326
            person.setFamilyName(matcherFull.group(2).trim());
327
        }else{
328
            person.setTitleCache(artist, true);
329
            String message = "A name of a person can not be atomized: %s";
330
            message = String.format(message, artist);
331
            state.getResult().addWarning(message, null, line);
332

    
333
        }
334

    
335
        Person result = (Person)getDeduplicationHelper(state).getExistingAuthor(null, person);
336
        return person;
337
    }
338

    
339
    /**
340
     * {@inheritDoc}
341
     */
342
    @Override
343
    protected void secondPass(MediaExcelImportState state) {
344
        // TODO Auto-generated method stub
345

    
346
    }
347

    
348
    /**
349
     * {@inheritDoc}
350
     */
351
    @Override
352
    protected boolean isIgnore(MediaExcelImportState state) {
353
        return false;
354
    }
355
}
(1-1/4)