Project

General

Profile

Download (17.8 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.reference.ris.in;
10

    
11
import java.io.ByteArrayInputStream;
12
import java.io.InputStreamReader;
13
import eu.etaxonomy.cdm.common.URI;
14
import java.util.ArrayList;
15
import java.util.Arrays;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20

    
21
import org.apache.log4j.Logger;
22
import org.springframework.stereotype.Component;
23

    
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.common.DOI;
26
import eu.etaxonomy.cdm.io.common.CdmImportBase;
27
import eu.etaxonomy.cdm.io.reference.ris.in.RisRecordReader.RisValue;
28
import eu.etaxonomy.cdm.model.agent.Person;
29
import eu.etaxonomy.cdm.model.agent.Team;
30
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
31
import eu.etaxonomy.cdm.model.common.Annotation;
32
import eu.etaxonomy.cdm.model.common.AnnotationType;
33
import eu.etaxonomy.cdm.model.common.Language;
34
import eu.etaxonomy.cdm.model.common.TimePeriod;
35
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
36
import eu.etaxonomy.cdm.model.reference.Reference;
37
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
38
import eu.etaxonomy.cdm.model.reference.ReferenceType;
39

    
40
/**
41
 * @author a.mueller
42
 * @since 11.05.2017
43
 *
44
 */
45
@Component
46
public class RisReferenceImport
47
        extends CdmImportBase<RisReferenceImportConfigurator, RisReferenceImportState>{
48

    
49
    private static final long serialVersionUID = 7022034669942979722L;
50
    @SuppressWarnings("unused")
51
    private static final Logger logger = Logger.getLogger(RisReferenceImport.class);
52

    
53
    /**
54
     * {@inheritDoc}
55
     */
56
    @Override
57
    protected void doInvoke(RisReferenceImportState state) {
58
        RisReferenceImportConfigurator config = state.getConfig();
59
        try {
60
//            new FileReader(file)
61
            byte[] data = config.getStream();
62

    
63
            ByteArrayInputStream stream = new ByteArrayInputStream(data);
64
            InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
65
            RisRecordReader risReader = new RisRecordReader(state, reader);
66

    
67
            Set<Reference> referencesToSave = new HashSet<>();
68

    
69

    
70
            Map<RisReferenceTag, List<RisValue>> next = risReader.readRecord();
71
            while (next != RisRecordReader.EOF){
72
                Reference ref;
73
                String location = "";
74
                try {
75
                    location = recordLocation(state, next);
76
                    ref = makeReference(state, next);
77
                    referencesToSave.add(ref);
78
                    if (ref.getInReference() != null){
79
                        referencesToSave.add(ref.getInReference());
80
                    }
81
                } catch (Exception e) {
82
                    String message = "Unexpected exception during RIS Reference Import";
83
                    state.getResult().addException(e, message, location);
84
                }
85

    
86
                next = risReader.readRecord();
87
            }
88

    
89
            getReferenceService().saveOrUpdate(referencesToSave);
90
            state.getResult().addNewRecords(Reference.class.getSimpleName(), referencesToSave.size());
91

    
92
        } catch (Exception e) {
93
            String message = "Unexpected exception during RIS Reference Import";
94
            state.getResult().addException(e, message);
95
        }
96

    
97
        //unhandled
98
        Map<RisReferenceTag, Integer> unhandled = state.getUnhandled();
99
        for (RisReferenceTag tag : unhandled.keySet()){
100
            String message = "RIS tag %s (%s) not yet handled. n = %d";
101
            message = String .format(message, tag.name(), tag.getDescription(), unhandled.get(tag));
102
            state.getResult().addWarning(message);
103
        }
104
    }
105

    
106
    /**
107
     * @param state
108
     * @param next
109
     * @return
110
     */
111
    private Reference makeReference(RisReferenceImportState state,
112
            Map<RisReferenceTag, List<RisValue>> record) {
113

    
114
        //type
115
        ReferenceType type = makeReferenceType(state, record);
116
        Reference ref = ReferenceFactory.newReference(type);
117
        Reference inRef = null;
118
        if (hasInRef(ref)){
119
            ReferenceType inRefType =
120
                    type == ReferenceType.Article ? ReferenceType.Journal:
121
                    type == ReferenceType.BookSection ? ReferenceType.Book :
122
                        ReferenceType.Generic;
123
            inRef = ReferenceFactory.newReference(inRefType);
124
            ref.setInReference(inRef);
125
        }
126
        Reference higherRef = inRef == null ? ref : inRef;
127

    
128

    
129
        //Title
130
        RisValue t1 = getSingleValue(state, record, RisReferenceTag.T1);
131
        RisValue ti = getSingleValue(state, record, RisReferenceTag.TI);
132
        RisValue value = assertEqual(state, "title", t1, ti);
133
        if (value != null){
134
            ref.setTitle(value.value);
135
        }
136

    
137
        //Journal title
138
        RisValue t2 = getSingleValue(state, record, RisReferenceTag.T2); //Secondary Title (journal title, if applicable)
139

    
140
        if (higherRef.getType() == ReferenceType.Journal){
141
            RisValue jf = getSingleValue(state, record, RisReferenceTag.JF); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
142
            RisValue jo = getSingleValue(state, record, RisReferenceTag.JO); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
143
            RisValue x = assertEqual(state, "Journal/Periodical name: full format", jf, jo);
144
            x = assertEqual(state, "Journal title", t2, x);
145
            if (x != null){
146
                higherRef.setTitle(x.value);
147
            }
148
        }else{
149
            //TODO
150
        }
151

    
152
        //ST  (remove as same as TI or T1), not handled otherwise
153
        RisValue st = getSingleValue(state, record, RisReferenceTag.ST, false); //Short title
154
        if (st != null && st.value.equals(ref.getTitle())){
155
            record.remove(RisReferenceTag.ST);
156
        }
157

    
158
        //Author
159
        List<RisValue> list = getListValue(record, RisReferenceTag.AU);
160
        if (!list.isEmpty()){
161
            TeamOrPersonBase<?> author = makeAuthor(state, list);
162
            ref.setAuthorship(author);
163
        }
164

    
165
        //Date
166
//        RisValue y1 = getSingleValue(state, record, RisReferenceTag.Y1); //Primary Date
167
        RisValue py = getSingleValue(state, record, RisReferenceTag.PY);
168
        RisValue da = getSingleValue(state, record, RisReferenceTag.DA);
169
        Integer year = makeYear(state, py);
170
        VerbatimTimePeriod date = makeDate(state, da);
171
        assertDateYear(state, year, date, py);
172
        ref.setDatePublished(date);
173
        //TODO y1 not yet handled
174

    
175
        //Note
176
        RisValue n1 = getSingleValue(state, record, RisReferenceTag.N1); //Note
177
        if (n1 != null){
178
            Annotation annotation = Annotation.NewInstance(n1.value, AnnotationType.EDITORIAL(), Language.DEFAULT());
179
            ref.addAnnotation(annotation);
180
        }
181

    
182
        //DOI
183
        RisValue doiVal = getSingleValue(state, record, RisReferenceTag.DO); //Doi
184
        if (doiVal != null){
185
            DOI doi;
186
            try {
187
                String doiStr = doiVal.value;
188
                if (doiStr.toLowerCase().startsWith("doi ")){
189
                    doiStr = doiStr.substring(4).trim();
190
                }
191
                doi = DOI.fromString(doiStr);
192
                ref.setDoi(doi);
193
            } catch (IllegalArgumentException e) {
194
                String message = "DOI could not be recognized: " + doiVal.value;
195
                state.getResult().addWarning(message, null, doiVal.location);
196
            }
197
        }
198

    
199
        //UR
200
        RisValue ur = getSingleValue(state, record, RisReferenceTag.UR); //URL
201
        if (ur != null){
202
            URI uri;
203
            try {
204
                String urStr = ur.value;
205
                uri = URI.create(urStr);
206
                ref.setUri(uri);
207
            } catch (Exception e) {
208
                String message = "URL could not be recognized: " + ur.value;
209
                state.getResult().addWarning(message, null, ur.location);
210
            }
211
        }
212

    
213
        //Pages
214
        RisValue sp = getSingleValue(state, record, RisReferenceTag.SP);
215
        RisValue ep = getSingleValue(state, record, RisReferenceTag.EP);
216
        String pages = CdmUtils.concat("-", sp != null ? sp.value : null, ep != null ? ep.value : null);
217
        ref.setPages(pages);
218

    
219
        //Volume
220
        RisValue vl = getSingleValue(state, record, RisReferenceTag.VL);
221
        RisValue is = getSingleValue(state, record, RisReferenceTag.IS);
222
        String vol = vl == null? "": vl.value + (is != null ? "("+ is.value + ")": "");
223
        ref.setVolume(vol);
224

    
225
        //Publisher
226
        RisValue pb = getSingleValue(state, record, RisReferenceTag.PB);
227
        if (pb != null){
228
            higherRef.setPublisher(pb.value);
229
        }
230

    
231
        //Abstract
232
        RisValue ab = getSingleValue(state, record, RisReferenceTag.AB);
233
        RisValue n2 = getSingleValue(state, record, RisReferenceTag.N2);
234
        RisValue abst = assertEqual(state, "Abstract", ab, n2);
235
        if (abst != null){
236
            ref.setReferenceAbstract(abst.value);
237
        }
238

    
239
        //ISSN/ISBN
240
        RisValue sn = getSingleValue(state, record, RisReferenceTag.SN);
241
        if (sn != null){
242
            if (higherRef.getType() == ReferenceType.Journal){
243
                higherRef.setIssn(sn.value);
244
            }else{
245
                higherRef.setIsbn(sn.value);
246
            }
247
        }
248

    
249
        //ID
250
        RisValue id = getSingleValue(state, record, RisReferenceTag.ID);
251
        String idStr = id != null? id.value: null;
252
        String recLoc = recordLocation(state, record);
253
        ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
254
        if (inRef != null){
255
            ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
256

    
257
        }
258

    
259
        //remove
260
        record.remove(RisReferenceTag.ER);
261
        record.remove(RisReferenceTag.TY);
262

    
263
        for (RisReferenceTag tag : record.keySet()){
264
//            String message = "RIS Tag " + tag.name() +  " not yet handled";
265
//            state.getResult().addWarning(message, record.get(tag).get(0).location);
266
            state.addUnhandled(tag);
267

    
268
            //TODO add as annotation or extension
269
        }
270

    
271
        return ref;
272
    }
273

    
274
    /**
275
     * @param ref
276
     * @return
277
     */
278
    private boolean hasInRef(Reference ref) {
279
        return ref.getType() == ReferenceType.BookSection || ref.getType() == ReferenceType.Article ;
280
    }
281

    
282

    
283
    /**
284
     * @param state
285
     * @param record
286
     * @return
287
     */
288
    private String recordLocation(RisReferenceImportState state,
289
            Map<RisReferenceTag, List<RisValue>> record) {
290
        RisValue typeTag = this.getSingleValue(state, record, RisReferenceTag.TY, false);
291
        RisValue erTag = this.getSingleValue(state, record, RisReferenceTag.ER, false);
292

    
293
        String start = typeTag == null ? "??" : typeTag.location;
294
        String end = erTag == null ? "??" : erTag.location;
295

    
296
        String result = "line " + CdmUtils.concat(" - ", start, end);
297

    
298
        return result;
299
    }
300

    
301
    /**
302
     * @param state
303
     * @param year
304
     * @param date
305
     */
306
    private void assertDateYear(RisReferenceImportState state, Integer year, TimePeriod date, RisValue py) {
307
        if (year != null && date != null && !year.equals(date.getStartYear())){
308
            String message = "Year 'PY' and date 'DA' are not consistent. PY is neglected.";
309
            state.getResult().addWarning(message, null, py.location);
310
        }
311
    }
312

    
313
    private RisValue assertEqual(RisReferenceImportState state, String meaning, RisValue val1, RisValue val2) {
314
        if (val1 != null && val2 != null && !val1.value.equals(val2.value)){
315
            String message = "The tags '%s' and '%s' are not equal but have a similar meaning ('%s'). "
316
                    + "%s was used and %s neglected";
317
            message = String.format(message, val1.tag.name(), val2.tag.name(), meaning , val1.tag.name(), val2.tag.name());
318
            state.getResult().addWarning(message, null, val1.location);
319
        }
320
        return val1 != null ? val1 : val2;
321
    }
322

    
323
    /**
324
     * @param state
325
     * @param da
326
     * @return
327
     */
328
    private VerbatimTimePeriod makeDate(RisReferenceImportState state, RisValue da) {
329
        if (da == null){
330
            return null;
331
        }
332
        if (! da.value.matches("([0-9]{4})?(\\/([0-9]{2})?(\\/([0-9]{2})?(\\/.*)?)?)?")){
333
            String message = "Tag '%s' has incorrect format. Only exactly 'dddd/dd/dd/any text' is allowed (where d is a digit), but was '%s'";
334
            message = String.format(message, da.tag.name(), da.value);
335
            state.getResult().addWarning(message, null, da.location);
336
            return null;
337
        }
338
        String[] split = da.value.split("/");
339
        VerbatimTimePeriod tp = VerbatimTimePeriod.NewVerbatimInstance();
340
        if (split.length > 0 && isNotBlank(split[0])){
341
            tp.setStartYear(Integer.valueOf(split[0]));
342
        }
343
        if (split.length > 1 && isNotBlank(split[1])){
344
            tp.setStartMonth(Integer.valueOf(split[1]));
345
        }
346
        if (split.length > 2 && isNotBlank(split[2])){
347
            tp.setStartDay(Integer.valueOf(split[2]));
348
        }
349
        if (split.length > 3 && isNotBlank(split[3])){
350
            List<String> other = Arrays.asList(split).subList(3, split.length);
351
            String otherStr = CdmUtils.concat("/", other.toArray(new String[other.size()]));
352
            tp.setFreeText(tp.toString() + " " + otherStr);
353
        }
354
        return tp;
355
    }
356

    
357
    /**
358
     * @param state
359
     * @param py
360
     * @return
361
     */
362
    private Integer makeYear(RisReferenceImportState state, RisValue py) {
363
        if (py == null){
364
            return null;
365
        }
366
        if (py.value.matches("[0-9]{4}")){
367
            return Integer.valueOf(py.value);
368
        }else{
369
            String message = "Tag '%s' has incorrect format. Only exactly 4 digits are allowed, but was '%s'";
370
            message = String.format(message, py.tag.name(), py.value);
371
            state.getResult().addWarning(message, null, py.location);
372
            return null;
373
        }
374
    }
375

    
376
    /**
377
     * @param state
378
     * @param list
379
     * @return
380
     */
381
    private TeamOrPersonBase<?> makeAuthor(RisReferenceImportState state, List<RisValue> list) {
382
        if (list.size() == 1){
383
            return makePerson(state, list.get(0));
384
        }else{
385
            Team team = Team.NewInstance();
386
            for (RisValue value : list){
387
                team.addTeamMember(makePerson(state, value));
388
            }
389
            return team;
390
        }
391
    }
392

    
393
    /**
394
     * @param state
395
     * @param risValue
396
     * @return
397
     */
398
    private Person makePerson(RisReferenceImportState state, RisValue risValue) {
399
        Person person = Person.NewInstance();
400
        String[] split = risValue.value.split(",");
401
        if (split.length >= 1){
402
            person.setFamilyName(split[0].trim());
403
        }
404
        if (split.length >= 2){
405
            person.setGivenName(split[1].trim());
406
        }
407
        if (split.length >= 3){
408
            person.setSuffix(split[2].trim());
409
        }
410

    
411
        return person;
412
    }
413

    
414
    /**
415
     * Returns the single value for the given tag
416
     * and removes the tag from the record.
417
     * If more than 1 value exists this is logged
418
     * as a warning.
419
     */
420
    private RisValue getSingleValue(RisReferenceImportState state,
421
            Map<RisReferenceTag, List<RisValue>> record,
422
            RisReferenceTag tag) {
423
        return getSingleValue(state, record, tag, true);
424
    }
425

    
426
    /**
427
     * Returns the single value for the given tag
428
     * and removes the tag from the record.
429
     * If more than 1 value exists this is logged
430
     * as a warning.
431
     */
432
    private RisValue getSingleValue(RisReferenceImportState state,
433
            Map<RisReferenceTag, List<RisValue>> record,
434
            RisReferenceTag tag, boolean remove) {
435
        List<RisValue> list = record.get(tag);
436
        if (list == null){
437
            return null;
438
        }
439
        assertSingle(state, list, tag);
440
        if (remove){
441
            record.remove(tag);
442
        }
443
        return list.get(0);
444
    }
445

    
446
    private List<RisValue> getListValue(Map<RisReferenceTag, List<RisValue>> record,
447
            RisReferenceTag tag) {
448
        List<RisValue> list = record.get(tag);
449
        record.remove(tag);
450
        if (list == null){
451
            list = new ArrayList<>();
452
        }
453
        return list;
454
    }
455

    
456
    /**
457
     * @param state
458
     * @param list
459
     * @param tag
460
     */
461
    private void assertSingle(RisReferenceImportState state, List<RisValue> list, RisReferenceTag tag) {
462
        if (list.size() > 1){
463
            String message = "There is more than 1 tag '%s' but only 1 tag is supported by RIS format or"
464
                    + " by the current import implementation.";
465
            message = String.format(message, tag.name());
466
            state.getResult().addWarning(message, null, list.get(0).location + "ff");
467
        }else if (list.isEmpty()){
468
            state.getResult().addError("A tag list was empty. This should not happen and is a programming code error");
469
        }
470
    }
471

    
472
    /**
473
     * @param state
474
     * @param next
475
     * @return
476
     */
477
    private ReferenceType makeReferenceType(RisReferenceImportState state,
478
            Map<RisReferenceTag, List<RisValue>> record) {
479
        RisReferenceTag tyTag = RisReferenceTag.TY;
480
        RisValue value = this.getSingleValue(state, record, tyTag, false);
481
        String typeStr = value.value;
482
        RisRecordType type = RisRecordType.valueOf(typeStr);
483
        ReferenceType cdmType = type.getCdmReferenceType();
484
        return cdmType;
485
    }
486

    
487
    /**
488
     * {@inheritDoc}
489
     */
490
    @Override
491
    protected boolean doCheck(RisReferenceImportState state) {
492
        return true;
493
    }
494

    
495
    /**
496
     * {@inheritDoc}
497
     */
498
    @Override
499
    protected boolean isIgnore(RisReferenceImportState state) {
500
        return false;
501
    }
502
}
(3-3/6)