Project

General

Profile

Download (17.7 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.reference.ris.in;
10

    
11
import java.io.ByteArrayInputStream;
12
import java.io.InputStreamReader;
13
import java.net.URI;
14
import java.util.ArrayList;
15
import java.util.Arrays;
16
import java.util.HashSet;
17
import java.util.List;
18
import java.util.Map;
19
import java.util.Set;
20

    
21
import org.apache.log4j.Logger;
22
import org.springframework.stereotype.Component;
23

    
24
import eu.etaxonomy.cdm.common.CdmUtils;
25
import eu.etaxonomy.cdm.common.DOI;
26
import eu.etaxonomy.cdm.io.common.CdmImportBase;
27
import eu.etaxonomy.cdm.io.reference.ris.in.RisRecordReader.RisValue;
28
import eu.etaxonomy.cdm.model.agent.Person;
29
import eu.etaxonomy.cdm.model.agent.Team;
30
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
31
import eu.etaxonomy.cdm.model.common.Annotation;
32
import eu.etaxonomy.cdm.model.common.AnnotationType;
33
import eu.etaxonomy.cdm.model.common.Language;
34
import eu.etaxonomy.cdm.model.common.TimePeriod;
35
import eu.etaxonomy.cdm.model.reference.Reference;
36
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
37
import eu.etaxonomy.cdm.model.reference.ReferenceType;
38

    
39
/**
40
 * @author a.mueller
41
 * @since 11.05.2017
42
 *
43
 */
44
@Component
45
public class RisReferenceImport
46
        extends CdmImportBase<RisReferenceImportConfigurator, RisReferenceImportState>{
47

    
48
    private static final long serialVersionUID = 7022034669942979722L;
49
    @SuppressWarnings("unused")
50
    private static final Logger logger = Logger.getLogger(RisReferenceImport.class);
51

    
52
    /**
53
     * {@inheritDoc}
54
     */
55
    @Override
56
    protected void doInvoke(RisReferenceImportState state) {
57
        RisReferenceImportConfigurator config = state.getConfig();
58
        try {
59
//            new FileReader(file)
60
            byte[] data = config.getStream();
61

    
62
            ByteArrayInputStream stream = new ByteArrayInputStream(data);
63
            InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
64
            RisRecordReader risReader = new RisRecordReader(state, reader);
65

    
66
            Set<Reference> referencesToSave = new HashSet<>();
67

    
68

    
69
            Map<RisReferenceTag, List<RisValue>> next = risReader.readRecord();
70
            while (next != RisRecordReader.EOF){
71
                Reference ref;
72
                String location = "";
73
                try {
74
                    location = recordLocation(state, next);
75
                    ref = makeReference(state, next);
76
                    referencesToSave.add(ref);
77
                    if (ref.getInReference() != null){
78
                        referencesToSave.add(ref.getInReference());
79
                    }
80
                } catch (Exception e) {
81
                    String message = "Unexpected exception during RIS Reference Import";
82
                    state.getResult().addException(e, message, location);
83
                }
84

    
85
                next = risReader.readRecord();
86
            }
87

    
88
            getReferenceService().saveOrUpdate(referencesToSave);
89
            state.getResult().addNewRecords(Reference.class.getSimpleName(), referencesToSave.size());
90

    
91
        } catch (Exception e) {
92
            String message = "Unexpected exception during RIS Reference Import";
93
            state.getResult().addException(e, message);
94
        }
95

    
96
        //unhandled
97
        Map<RisReferenceTag, Integer> unhandled = state.getUnhandled();
98
        for (RisReferenceTag tag : unhandled.keySet()){
99
            String message = "RIS tag %s (%s) not yet handled. n = %d";
100
            message = String .format(message, tag.name(), tag.getDescription(), unhandled.get(tag));
101
            state.getResult().addWarning(message);
102
        }
103
    }
104

    
105
    /**
106
     * @param state
107
     * @param next
108
     * @return
109
     */
110
    private Reference makeReference(RisReferenceImportState state,
111
            Map<RisReferenceTag, List<RisValue>> record) {
112

    
113
        //type
114
        ReferenceType type = makeReferenceType(state, record);
115
        Reference ref = ReferenceFactory.newReference(type);
116
        Reference inRef = null;
117
        if (hasInRef(ref)){
118
            ReferenceType inRefType =
119
                    type == ReferenceType.Article ? ReferenceType.Journal:
120
                    type == ReferenceType.BookSection ? ReferenceType.Book :
121
                        ReferenceType.Generic;
122
            inRef = ReferenceFactory.newReference(inRefType);
123
            ref.setInReference(inRef);
124
        }
125
        Reference higherRef = inRef == null ? ref : inRef;
126

    
127

    
128
        //Title
129
        RisValue t1 = getSingleValue(state, record, RisReferenceTag.T1);
130
        RisValue ti = getSingleValue(state, record, RisReferenceTag.TI);
131
        RisValue value = assertEqual(state, "title", t1, ti);
132
        if (value != null){
133
            ref.setTitle(value.value);
134
        }
135

    
136
        //Journal title
137
        RisValue t2 = getSingleValue(state, record, RisReferenceTag.T2); //Secondary Title (journal title, if applicable)
138

    
139
        if (higherRef.getType() == ReferenceType.Journal){
140
            RisValue jf = getSingleValue(state, record, RisReferenceTag.JF); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
141
            RisValue jo = getSingleValue(state, record, RisReferenceTag.JO); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
142
            RisValue x = assertEqual(state, "Journal/Periodical name: full format", jf, jo);
143
            x = assertEqual(state, "Journal title", t2, x);
144
            if (x != null){
145
                higherRef.setTitle(x.value);
146
            }
147
        }else{
148
            //TODO
149
        }
150

    
151
        //ST  (remove as same as TI or T1), not handled otherwise
152
        RisValue st = getSingleValue(state, record, RisReferenceTag.ST, false); //Short title
153
        if (st != null && st.value.equals(ref.getTitle())){
154
            record.remove(RisReferenceTag.ST);
155
        }
156

    
157
        //Author
158
        List<RisValue> list = getListValue(record, RisReferenceTag.AU);
159
        if (!list.isEmpty()){
160
            TeamOrPersonBase<?> author = makeAuthor(state, list);
161
            ref.setAuthorship(author);
162
        }
163

    
164
        //Date
165
//        RisValue y1 = getSingleValue(state, record, RisReferenceTag.Y1); //Primary Date
166
        RisValue py = getSingleValue(state, record, RisReferenceTag.PY);
167
        RisValue da = getSingleValue(state, record, RisReferenceTag.DA);
168
        Integer year = makeYear(state, py);
169
        TimePeriod date = makeDate(state, da);
170
        assertDateYear(state, year, date, py);
171
        ref.setDatePublished(date);
172
        //TODO y1 not yet handled
173

    
174
        //Note
175
        RisValue n1 = getSingleValue(state, record, RisReferenceTag.N1); //Note
176
        if (n1 != null){
177
            Annotation annotation = Annotation.NewInstance(n1.value, AnnotationType.EDITORIAL(), Language.DEFAULT());
178
            ref.addAnnotation(annotation);
179
        }
180

    
181
        //DOI
182
        RisValue doiVal = getSingleValue(state, record, RisReferenceTag.DO); //Doi
183
        if (doiVal != null){
184
            DOI doi;
185
            try {
186
                String doiStr = doiVal.value;
187
                if (doiStr.toLowerCase().startsWith("doi ")){
188
                    doiStr = doiStr.substring(4).trim();
189
                }
190
                doi = DOI.fromString(doiStr);
191
                ref.setDoi(doi);
192
            } catch (IllegalArgumentException e) {
193
                String message = "DOI could not be recognized: " + doiVal.value;
194
                state.getResult().addWarning(message, null, doiVal.location);
195
            }
196
        }
197

    
198
        //UR
199
        RisValue ur = getSingleValue(state, record, RisReferenceTag.UR); //URL
200
        if (ur != null){
201
            URI uri;
202
            try {
203
                String urStr = ur.value;
204
                uri = URI.create(urStr);
205
                ref.setUri(uri);
206
            } catch (Exception e) {
207
                String message = "URL could not be recognized: " + ur.value;
208
                state.getResult().addWarning(message, null, ur.location);
209
            }
210
        }
211

    
212
        //Pages
213
        RisValue sp = getSingleValue(state, record, RisReferenceTag.SP);
214
        RisValue ep = getSingleValue(state, record, RisReferenceTag.EP);
215
        String pages = CdmUtils.concat("-", sp != null ? sp.value : null, ep != null ? ep.value : null);
216
        ref.setPages(pages);
217

    
218
        //Volume
219
        RisValue vl = getSingleValue(state, record, RisReferenceTag.VL);
220
        RisValue is = getSingleValue(state, record, RisReferenceTag.IS);
221
        String vol = vl == null? "": vl.value + (is != null ? "("+ is.value + ")": "");
222
        ref.setVolume(vol);
223

    
224
        //Publisher
225
        RisValue pb = getSingleValue(state, record, RisReferenceTag.PB);
226
        if (pb != null){
227
            higherRef.setPublisher(pb.value);
228
        }
229

    
230
        //Abstract
231
        RisValue ab = getSingleValue(state, record, RisReferenceTag.AB);
232
        RisValue n2 = getSingleValue(state, record, RisReferenceTag.N2);
233
        RisValue abst = assertEqual(state, "Abstract", ab, n2);
234
        if (abst != null){
235
            ref.setReferenceAbstract(abst.value);
236
        }
237

    
238
        //ISSN/ISBN
239
        RisValue sn = getSingleValue(state, record, RisReferenceTag.SN);
240
        if (sn != null){
241
            if (higherRef.getType() == ReferenceType.Journal){
242
                higherRef.setIssn(sn.value);
243
            }else{
244
                higherRef.setIsbn(sn.value);
245
            }
246
        }
247

    
248
        //ID
249
        RisValue id = getSingleValue(state, record, RisReferenceTag.ID);
250
        String idStr = id != null? id.value: null;
251
        String recLoc = recordLocation(state, record);
252
        ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
253
        if (inRef != null){
254
            ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
255

    
256
        }
257

    
258
        //remove
259
        record.remove(RisReferenceTag.ER);
260
        record.remove(RisReferenceTag.TY);
261

    
262
        for (RisReferenceTag tag : record.keySet()){
263
//            String message = "RIS Tag " + tag.name() +  " not yet handled";
264
//            state.getResult().addWarning(message, record.get(tag).get(0).location);
265
            state.addUnhandled(tag);
266

    
267
            //TODO add as annotation or extension
268
        }
269

    
270
        return ref;
271
    }
272

    
273
    /**
274
     * @param ref
275
     * @return
276
     */
277
    private boolean hasInRef(Reference ref) {
278
        return ref.getType() == ReferenceType.BookSection || ref.getType() == ReferenceType.Article ;
279
    }
280

    
281

    
282
    /**
283
     * @param state
284
     * @param record
285
     * @return
286
     */
287
    private String recordLocation(RisReferenceImportState state,
288
            Map<RisReferenceTag, List<RisValue>> record) {
289
        RisValue typeTag = this.getSingleValue(state, record, RisReferenceTag.TY, false);
290
        RisValue erTag = this.getSingleValue(state, record, RisReferenceTag.ER, false);
291

    
292
        String start = typeTag == null ? "??" : typeTag.location;
293
        String end = erTag == null ? "??" : erTag.location;
294

    
295
        String result = "line " + CdmUtils.concat(" - ", start, end);
296

    
297
        return result;
298
    }
299

    
300
    /**
301
     * @param state
302
     * @param year
303
     * @param date
304
     */
305
    private void assertDateYear(RisReferenceImportState state, Integer year, TimePeriod date, RisValue py) {
306
        if (year != null && date != null && !year.equals(date.getStartYear())){
307
            String message = "Year 'PY' and date 'DA' are not consistent. PY is neglected.";
308
            state.getResult().addWarning(message, null, py.location);
309
        }
310
    }
311

    
312
    private RisValue assertEqual(RisReferenceImportState state, String meaning, RisValue val1, RisValue val2) {
313
        if (val1 != null && val2 != null && !val1.value.equals(val2.value)){
314
            String message = "The tags '%s' and '%s' are not equal but have a similar meaning ('%s'). "
315
                    + "%s was used and %s neglected";
316
            message = String.format(message, val1.tag.name(), val2.tag.name(), meaning , val1.tag.name(), val2.tag.name());
317
            state.getResult().addWarning(message, null, val1.location);
318
        }
319
        return val1 != null ? val1 : val2;
320
    }
321

    
322
    /**
323
     * @param state
324
     * @param da
325
     * @return
326
     */
327
    private TimePeriod makeDate(RisReferenceImportState state, RisValue da) {
328
        if (da == null){
329
            return null;
330
        }
331
        if (! da.value.matches("([0-9]{4})?(\\/([0-9]{2})?(\\/([0-9]{2})?(\\/.*)?)?)?")){
332
            String message = "Tag '%s' has incorrect format. Only exactly 'dddd/dd/dd/any text' is allowed (where d is a digit), but was '%s'";
333
            message = String.format(message, da.tag.name(), da.value);
334
            state.getResult().addWarning(message, null, da.location);
335
            return null;
336
        }
337
        String[] split = da.value.split("/");
338
        TimePeriod tp = TimePeriod.NewInstance();
339
        if (split.length > 0 && isNotBlank(split[0])){
340
            tp.setStartYear(Integer.valueOf(split[0]));
341
        }
342
        if (split.length > 1 && isNotBlank(split[1])){
343
            tp.setStartMonth(Integer.valueOf(split[1]));
344
        }
345
        if (split.length > 2 && isNotBlank(split[2])){
346
            tp.setStartDay(Integer.valueOf(split[2]));
347
        }
348
        if (split.length > 3 && isNotBlank(split[3])){
349
            List<String> other = Arrays.asList(split).subList(3, split.length);
350
            String otherStr = CdmUtils.concat("/", other.toArray(new String[other.size()]));
351
            tp.setFreeText(tp.toString() + " " + otherStr);
352
        }
353
        return tp;
354
    }
355

    
356
    /**
357
     * @param state
358
     * @param py
359
     * @return
360
     */
361
    private Integer makeYear(RisReferenceImportState state, RisValue py) {
362
        if (py == null){
363
            return null;
364
        }
365
        if (py.value.matches("[0-9]{4}")){
366
            return Integer.valueOf(py.value);
367
        }else{
368
            String message = "Tag '%s' has incorrect format. Only exactly 4 digits are allowed, but was '%s'";
369
            message = String.format(message, py.tag.name(), py.value);
370
            state.getResult().addWarning(message, null, py.location);
371
            return null;
372
        }
373
    }
374

    
375
    /**
376
     * @param state
377
     * @param list
378
     * @return
379
     */
380
    private TeamOrPersonBase<?> makeAuthor(RisReferenceImportState state, List<RisValue> list) {
381
        if (list.size() == 1){
382
            return makePerson(state, list.get(0));
383
        }else{
384
            Team team = Team.NewInstance();
385
            for (RisValue value : list){
386
                team.addTeamMember(makePerson(state, value));
387
            }
388
            return team;
389
        }
390
    }
391

    
392
    /**
393
     * @param state
394
     * @param risValue
395
     * @return
396
     */
397
    private Person makePerson(RisReferenceImportState state, RisValue risValue) {
398
        Person person = Person.NewInstance();
399
        String[] split = risValue.value.split(",");
400
        if (split.length >= 1){
401
            person.setFamilyName(split[0].trim());
402
        }
403
        if (split.length >= 2){
404
            person.setGivenName(split[1].trim());
405
        }
406
        if (split.length >= 3){
407
            person.setSuffix(split[2].trim());
408
        }
409

    
410
        return person;
411
    }
412

    
413
    /**
414
     * Returns the single value for the given tag
415
     * and removes the tag from the record.
416
     * If more than 1 value exists this is logged
417
     * as a warning.
418
     */
419
    private RisValue getSingleValue(RisReferenceImportState state,
420
            Map<RisReferenceTag, List<RisValue>> record,
421
            RisReferenceTag tag) {
422
        return getSingleValue(state, record, tag, true);
423
    }
424

    
425
    /**
426
     * Returns the single value for the given tag
427
     * and removes the tag from the record.
428
     * If more than 1 value exists this is logged
429
     * as a warning.
430
     */
431
    private RisValue getSingleValue(RisReferenceImportState state,
432
            Map<RisReferenceTag, List<RisValue>> record,
433
            RisReferenceTag tag, boolean remove) {
434
        List<RisValue> list = record.get(tag);
435
        if (list == null){
436
            return null;
437
        }
438
        assertSingle(state, list, tag);
439
        if (remove){
440
            record.remove(tag);
441
        }
442
        return list.get(0);
443
    }
444

    
445
    private List<RisValue> getListValue(Map<RisReferenceTag, List<RisValue>> record,
446
            RisReferenceTag tag) {
447
        List<RisValue> list = record.get(tag);
448
        record.remove(tag);
449
        if (list == null){
450
            list = new ArrayList<>();
451
        }
452
        return list;
453
    }
454

    
455
    /**
456
     * @param state
457
     * @param list
458
     * @param tag
459
     */
460
    private void assertSingle(RisReferenceImportState state, List<RisValue> list, RisReferenceTag tag) {
461
        if (list.size() > 1){
462
            String message = "There is more than 1 tag '%s' but only 1 tag is supported by RIS format or"
463
                    + " by the current import implementation.";
464
            message = String.format(message, tag.name());
465
            state.getResult().addWarning(message, null, list.get(0).location + "ff");
466
        }else if (list.isEmpty()){
467
            state.getResult().addError("A tag list was empty. This should not happen and is a programming code error");
468
        }
469
    }
470

    
471
    /**
472
     * @param state
473
     * @param next
474
     * @return
475
     */
476
    private ReferenceType makeReferenceType(RisReferenceImportState state,
477
            Map<RisReferenceTag, List<RisValue>> record) {
478
        RisReferenceTag tyTag = RisReferenceTag.TY;
479
        RisValue value = this.getSingleValue(state, record, tyTag, false);
480
        String typeStr = value.value;
481
        RisRecordType type = RisRecordType.valueOf(typeStr);
482
        ReferenceType cdmType = type.getCdmReferenceType();
483
        return cdmType;
484
    }
485

    
486
    /**
487
     * {@inheritDoc}
488
     */
489
    @Override
490
    protected boolean doCheck(RisReferenceImportState state) {
491
        return true;
492
    }
493

    
494
    /**
495
     * {@inheritDoc}
496
     */
497
    @Override
498
    protected boolean isIgnore(RisReferenceImportState state) {
499
        return false;
500
    }
501
}
(3-3/6)