Project

General

Profile

Download (21.3 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.reference.ris.in;
10

    
11
import java.io.ByteArrayInputStream;
12
import java.io.InputStreamReader;
13
import java.util.ArrayList;
14
import java.util.Arrays;
15
import java.util.HashSet;
16
import java.util.List;
17
import java.util.Map;
18
import java.util.Set;
19

    
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.common.DOI;
25
import eu.etaxonomy.cdm.common.URI;
26
import eu.etaxonomy.cdm.io.common.CdmImportBase;
27
import eu.etaxonomy.cdm.io.reference.ris.in.RisRecordReader.RisValue;
28
import eu.etaxonomy.cdm.model.agent.Person;
29
import eu.etaxonomy.cdm.model.agent.Team;
30
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
31
import eu.etaxonomy.cdm.model.common.Annotation;
32
import eu.etaxonomy.cdm.model.common.AnnotationType;
33
import eu.etaxonomy.cdm.model.common.Language;
34
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
35
import eu.etaxonomy.cdm.model.reference.Reference;
36
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
37
import eu.etaxonomy.cdm.model.reference.ReferenceType;
38
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
39

    
40
/**
41
 * @author a.mueller
42
 * @since 11.05.2017
43
 */
44
@Component
45
public class RisReferenceImport
46
        extends CdmImportBase<RisReferenceImportConfigurator, RisReferenceImportState>{
47

    
48
    private static final long serialVersionUID = 7022034669942979722L;
49
    @SuppressWarnings("unused")
50
    private static final Logger logger = Logger.getLogger(RisReferenceImport.class);
51

    
52
    @Override
53
    protected void doInvoke(RisReferenceImportState state) {
54
        RisReferenceImportConfigurator config = state.getConfig();
55
        try {
56
//            new FileReader(file)
57
            byte[] data = config.getStream();
58

    
59
            ByteArrayInputStream stream = new ByteArrayInputStream(data);
60
            InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
61
            RisRecordReader risReader = new RisRecordReader(state, reader);
62

    
63
            Set<Reference> referencesToSave = new HashSet<>();
64

    
65
            Map<RisReferenceTag, List<RisValue>> next = risReader.readRecord();
66
            while (next != RisRecordReader.EOF){
67
                Reference ref;
68
                String location = "";
69
                try {
70
                    location = recordLocation(state, next);
71
                    ref = makeReference(state, next);
72
                    referencesToSave.add(ref);
73
                    if (ref.getInReference() != null){
74
                        referencesToSave.add(ref.getInReference());
75
                    }
76
                } catch (Exception e) {
77
                    String message = "Unexpected exception during RIS Reference Import";
78
                    state.getResult().addException(e, message, location);
79
                }
80

    
81
                next = risReader.readRecord();
82
            }
83

    
84
            getReferenceService().saveOrUpdate(referencesToSave);
85
            state.getResult().addNewRecords(Reference.class.getSimpleName(), referencesToSave.size());
86

    
87
        } catch (Exception e) {
88
            String message = "Unexpected exception during RIS Reference Import";
89
            state.getResult().addException(e, message);
90
        }
91

    
92
        //unhandled
93
        Map<RisReferenceTag, Integer> unhandled = state.getUnhandled();
94
        for (RisReferenceTag tag : unhandled.keySet()){
95
            String message = "RIS tag %s (%s) not yet handled. n = %d";
96
            message = String .format(message, tag.name(), tag.getDescription(), unhandled.get(tag));
97
            state.getResult().addWarning(message);
98
        }
99
    }
100

    
101
    private Reference makeReference(RisReferenceImportState state,
102
            Map<RisReferenceTag, List<RisValue>> record) {
103

    
104
        //type
105
        ReferenceType type = makeReferenceType(state, record);
106
        Reference ref = ReferenceFactory.newReference(type);
107
        Reference inRef = null;
108
        if (hasInRef(ref)){
109
            ReferenceType inRefType =
110
                    type == ReferenceType.Article ? ReferenceType.Journal:
111
                    type == ReferenceType.BookSection ? ReferenceType.Book :
112
                        ReferenceType.Generic;
113
            inRef = ReferenceFactory.newReference(inRefType);
114
            ref.setInReference(inRef);
115
        }
116
        Reference higherRef = inRef == null ? ref : inRef;
117

    
118
        //titles
119
        handleTitle(state, record, ref, inRef, higherRef);
120

    
121
        //authors
122
        handleAuthors(state, record, ref, inRef);
123

    
124
        //Date
125
//        RisValue y1 = getSingleValue(state, record, RisReferenceTag.Y1); //Primary Date
126
        RisValue py = getSingleValue(state, record, RisReferenceTag.PY);
127
        RisValue da = getSingleValue(state, record, RisReferenceTag.DA);
128
        Integer year = makeYear(state, py);
129
        VerbatimTimePeriod date = makeDate(state, da);
130
        date = assertDateYear(state, year, date, py);
131
        ref.setDatePublished(date);
132
        //TODO y1 not yet handled
133

    
134
        //Note
135
        RisValue n1 = getSingleValue(state, record, RisReferenceTag.N1); //Note
136
        if (n1 != null){
137
            Annotation annotation = Annotation.NewInstance(n1.value, AnnotationType.EDITORIAL(), Language.DEFAULT());
138
            ref.addAnnotation(annotation);
139
        }
140

    
141
        //DOI
142
        handleDoi(state, record, ref);
143

    
144
        //UR
145
        RisValue ur = getSingleValue(state, record, RisReferenceTag.UR); //URL
146
        if (ur != null){
147
            URI uri;
148
            try {
149
                String urStr = ur.value;
150
                uri = URI.create(urStr);
151
                ref.setUri(uri);
152
            } catch (Exception e) {
153
                String message = "URL could not be recognized: " + ur.value;
154
                state.getResult().addWarning(message, null, ur.location);
155
            }
156
        }
157

    
158
        //Pages
159
        RisValue sp = getSingleValue(state, record, RisReferenceTag.SP);
160
        RisValue ep = getSingleValue(state, record, RisReferenceTag.EP);
161
        String pages = CdmUtils.concat("-", sp != null ? sp.value : null, ep != null ? ep.value : null);
162
        ref.setPages(pages);
163

    
164
        //Volume
165
        RisValue vl = getSingleValue(state, record, RisReferenceTag.VL);
166
        RisValue is = getSingleValue(state, record, RisReferenceTag.IS);
167
        String vol = (vl == null)? "": vl.value + (is != null ? "("+ is.value + ")": "");
168
        if (inRef != null && inRef.getType() == ReferenceType.Book){
169
            inRef.setVolume(vol);
170
        }else{
171
            ref.setVolume(vol);
172
        }
173

    
174
        //Publisher
175
        RisValue pb = getSingleValue(state, record, RisReferenceTag.PB);
176
        if (pb != null){
177
            higherRef.setPublisher(pb.value);
178
        }
179

    
180
        //CY - Place published
181
        RisValue cy = getSingleValue(state, record, RisReferenceTag.CY);
182
        if (cy != null){
183
            higherRef.setPlacePublished(cy.value);
184
        }
185

    
186
        //Abstract
187
        RisValue ab = getSingleValue(state, record, RisReferenceTag.AB);
188
        RisValue n2 = getSingleValue(state, record, RisReferenceTag.N2);
189
        RisValue abst = assertEqual(state, "Abstract", ab, n2);
190
        if (abst != null){
191
            ref.setReferenceAbstract(abst.value);
192
        }
193

    
194
        //ISSN/ISBN
195
        RisValue sn = getSingleValue(state, record, RisReferenceTag.SN);
196
        if (sn != null){
197
            if (higherRef.getType() == ReferenceType.Journal){
198
                higherRef.setIssn(sn.value);
199
            }else{
200
                higherRef.setIsbn(sn.value);
201
            }
202
        }
203

    
204
        //ID
205
        RisValue id = getSingleValue(state, record, RisReferenceTag.ID);
206
        String idStr = id != null? id.value: null;
207
        String recLoc = recordLocation(state, record);
208
        ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
209
        if (inRef != null){
210
            inRef.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
211
        }
212

    
213
        //remove
214
        record.remove(RisReferenceTag.ER);
215
        record.remove(RisReferenceTag.TY);
216

    
217
        for (RisReferenceTag tag : record.keySet()){
218
//            String message = "RIS Tag " + tag.name() +  " not yet handled";
219
//            state.getResult().addWarning(message, record.get(tag).get(0).location);
220
            state.addUnhandled(tag);
221

    
222
            //TODO add as annotation or extension
223
        }
224

    
225
        return ref;
226
    }
227

    
228
    /**
229
     * @param state
230
     * @param record
231
     * @param ref
232
     */
233
    private void handleDoi(RisReferenceImportState state, Map<RisReferenceTag, List<RisValue>> record, Reference ref) {
234
        RisValue doiVal = getSingleValue(state, record, RisReferenceTag.DO); //Doi
235
        if (doiVal != null){
236
            DOI doi;
237
            try {
238
                String doiStr = doiVal.value;
239
                if (doiStr.toLowerCase().startsWith("doi ")){
240
                    doiStr = doiStr.substring(4).trim();
241
                }
242
                doi = DOI.fromString(doiStr);
243
                ref.setDoi(doi);
244
            } catch (IllegalArgumentException e) {
245
                String message = "DOI could not be recognized: " + doiVal.value;
246
                state.getResult().addWarning(message, null, doiVal.location);
247
            }
248
        }
249
    }
250

    
251
    /**
252
     * @param state
253
     * @param record
254
     * @param ref
255
     * @param inRef
256
     * @param higherRef
257
     */
258
    private void handleTitle(RisReferenceImportState state, Map<RisReferenceTag, List<RisValue>> record, Reference ref,
259
            Reference inRef, Reference higherRef) {
260
        //Title
261
        RisValue t1 = getSingleValue(state, record, RisReferenceTag.T1);
262
        RisValue ti = getSingleValue(state, record, RisReferenceTag.TI);
263
        RisValue title = assertEqual(state, "title", t1, ti);
264
        if (title != null){
265
            ref.setTitle(title.value);
266
        }
267

    
268
        //Journal title
269
        RisValue t2 = getSingleValue(state, record, RisReferenceTag.T2); //Secondary Title (journal title, if applicable)
270

    
271
        if (higherRef.getType() == ReferenceType.Journal){
272
            RisValue jf = getSingleValue(state, record, RisReferenceTag.JF); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
273
            RisValue jo = getSingleValue(state, record, RisReferenceTag.JO); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
274
            RisValue jf_jo = assertEqual(state, "Journal/Periodical name: full format", jf, jo);
275
            RisValue journalTitle = assertEqual(state, "Journal title", t2, jf_jo);
276
            if (journalTitle != null){
277
                higherRef.setTitle(journalTitle.value);
278
            }
279
        }else if (t2 != null && inRef != null){
280
            inRef.setTitle(t2.value);
281
        }else if (t2 != null){
282
            String message = "The tag %s ('%s') exists but the reference type usually has no in-reference."
283
                    + "This part of the title was neglected: %s";
284
            message = String.format(message, t2.tag.name(), t2.tag.getDescription(), t2.value);
285
            state.getResult().addWarning(message, null, t2.location);
286
        }else if (inRef != null){
287
            String message = "The reference type typically has an inreference but no secondary title (tag T2) was given.";
288
            state.getResult().addWarning(message, null, (title != null)? title.location : null);
289
        }
290

    
291
        //ST  (remove as same as TI or T1), not handled otherwise
292
        RisValue st = getSingleValue(state, record, RisReferenceTag.ST, false); //Short title
293
        if (st != null && st.value.equals(ref.getTitle())){
294
            record.remove(RisReferenceTag.ST);
295
        }
296
    }
297

    
298
    private void handleAuthors(RisReferenceImportState state, Map<RisReferenceTag, List<RisValue>> record,
299
            Reference ref, Reference inRef) {
300
        List<RisValue> authorList = getListValue(record, RisReferenceTag.AU);
301
        if (!authorList.isEmpty()){
302
            TeamOrPersonBase<?> author = makeAuthor(state, authorList);
303
            ref.setAuthorship(author);
304
        }
305
        List<RisValue> secondaryAuthorList = getListValue(record, RisReferenceTag.A2);
306
        if (!secondaryAuthorList.isEmpty()){
307
            if (inRef != null){
308
                if (inRef.getType() != ReferenceType.Journal){
309
                    TeamOrPersonBase<?> secAuthor = makeAuthor(state, secondaryAuthorList);
310
                    inRef.setAuthorship(secAuthor);
311
                }else{
312
                    String message = "The tag %s ('%s') exists but the in-reference type is 'journal' which typically has no author."
313
                            + "The secondary author(s) was/were neglected: %s";
314
                    message = String.format(message, RisReferenceTag.AU.name(), RisReferenceTag.AU.getDescription(), secondaryAuthorList.toString());
315
                    state.getResult().addWarning(message, null, secondaryAuthorList.get(0).location);
316
                }
317
            }else{
318
                String message = "The tag %s ('%s') exists but the reference type usually has no in-reference."
319
                        + "The secondary author(s) was/were neglected: %s";
320
                message = String.format(message, RisReferenceTag.AU.name(), RisReferenceTag.AU.getDescription(), secondaryAuthorList.toString());
321
                state.getResult().addWarning(message, null, secondaryAuthorList.get(0).location);
322
            }
323
        }
324
    }
325

    
326
    private boolean hasInRef(Reference ref) {
327
        return ref.getType() == ReferenceType.BookSection || ref.getType() == ReferenceType.Article ;
328
    }
329

    
330
    private String recordLocation(RisReferenceImportState state,
331
            Map<RisReferenceTag, List<RisValue>> record) {
332
        RisValue typeTag = this.getSingleValue(state, record, RisReferenceTag.TY, false);
333
        RisValue erTag = this.getSingleValue(state, record, RisReferenceTag.ER, false);
334

    
335
        String start = typeTag == null ? "??" : typeTag.location;
336
        String end = erTag == null ? "??" : erTag.location;
337

    
338
        String result = "line " + CdmUtils.concat(" - ", start, end);
339

    
340
        return result;
341
    }
342

    
343
    private VerbatimTimePeriod assertDateYear(RisReferenceImportState state, Integer year, VerbatimTimePeriod date, RisValue py) {
344
        if (year == null && date == null){
345
            return null;
346
        }else if (year == null){
347
            return date;
348
        }else if (date == null){
349
            return TimePeriodParser.parseStringVerbatim(String.valueOf(year));
350
        }else{
351
            if  (!year.equals(date.getStartYear())){
352
                if (date.getStartYear() == null){
353
                    date.setStartYear(year);
354
                }else if (isNotBlank(date.getFreeText())){
355
                    date.setStartYear(year);  //does this happen at all?
356
                    String message = "Year 'PY' and date 'DA' are not consistent. PY is neglected.";
357
                    state.getResult().addWarning(message, null, py.location);
358
                    return date;
359
                }else{
360
                    String message = "Year 'PY' and date 'DA' are not consistent. DA is used for freetext and PY is used for (start) year.";
361
                    state.getResult().addWarning(message, null, py.location);
362
                    return date;
363
                }
364
            }
365
            return date;
366
        }
367
    }
368

    
369
    private RisValue assertEqual(RisReferenceImportState state, String meaning, RisValue val1, RisValue val2) {
370
        if (val1 != null && val2 != null && !val1.value.equals(val2.value)){
371
            String message = "The tags '%s' and '%s' are not equal but have a similar meaning ('%s'). "
372
                    + "%s was used and %s neglected";
373
            message = String.format(message, val1.tag.name(), val2.tag.name(), meaning , val1.tag.name(), val2.tag.name());
374
            state.getResult().addWarning(message, null, val1.location);
375
        }
376
        return val1 != null ? val1 : val2;
377
    }
378

    
379
    private VerbatimTimePeriod makeDate(RisReferenceImportState state, RisValue da) {
380
        if (da == null){
381
            return null;
382
        }
383
        if (! da.value.matches("([0-9]{4})?(\\/([0-9]{2})?(\\/([0-9]{2})?(\\/.*)?)?)?")){
384
            String message = "Tag '%s' has incorrect format. Only exactly 'dddd/dd/dd/any text' is allowed (where d is a digit), but was '%s'";
385
            message = String.format(message, da.tag.name(), da.value);
386
            state.getResult().addWarning(message, null, da.location);
387
            return null;
388
        }
389
        String[] split = da.value.split("/");
390
        VerbatimTimePeriod tp = VerbatimTimePeriod.NewVerbatimInstance();
391
        if (split.length > 0 && isNotBlank(split[0])){
392
            tp.setStartYear(Integer.valueOf(split[0]));
393
        }
394
        if (split.length > 1 && isNotBlank(split[1])){
395
            tp.setStartMonth(Integer.valueOf(split[1]));
396
        }
397
        if (split.length > 2 && isNotBlank(split[2])){
398
            tp.setStartDay(Integer.valueOf(split[2]));
399
        }
400
        if (split.length > 3 && isNotBlank(split[3])){
401
            List<String> other = Arrays.asList(split).subList(3, split.length);
402
            String otherStr = CdmUtils.concat("/", other.toArray(new String[other.size()]));
403
            tp.setFreeText(tp.toString() + " " + otherStr);
404
        }
405
        return tp;
406
    }
407

    
408
    private Integer makeYear(RisReferenceImportState state, RisValue py) {
409
        if (py == null){
410
            return null;
411
        }
412
        if (py.value.matches("[0-9]{4}")){
413
            return Integer.valueOf(py.value);
414
        }else{
415
            String message = "Tag '%s' has incorrect format. Only exactly 4 digits are allowed, but was '%s'";
416
            message = String.format(message, py.tag.name(), py.value);
417
            state.getResult().addWarning(message, null, py.location);
418
            return null;
419
        }
420
    }
421

    
422
    private TeamOrPersonBase<?> makeAuthor(RisReferenceImportState state, List<RisValue> list) {
423
        if (list.size() == 1){
424
            return makePerson(state, list.get(0));
425
        }else{
426
            Team team = Team.NewInstance();
427
            for (RisValue value : list){
428
                team.addTeamMember(makePerson(state, value));
429
            }
430
            return team;
431
        }
432
    }
433

    
434
    private Person makePerson(RisReferenceImportState state, RisValue risValue) {
435
        Person person = Person.NewInstance();
436
        String[] split = risValue.value.split(",");
437
        if (split.length >= 1){
438
            person.setFamilyName(split[0].trim());
439
        }
440
        if (split.length >= 2){
441
            String givenNameOrInitial = split[1].trim();
442
            if (givenNameOrInitial.matches("[A-Za-z]\\.(\\s*[A-Za-z]\\.)*")){
443
                person.setInitials(givenNameOrInitial);
444
            }else{
445
                person.setGivenName(givenNameOrInitial);
446
            }
447
        }
448
        if (split.length >= 3){
449
            person.setSuffix(split[2].trim());
450
        }
451

    
452
        return person;
453
    }
454

    
455
    /**
456
     * Returns the single value for the given tag
457
     * and removes the tag from the record.
458
     * If more than 1 value exists this is logged
459
     * as a warning.
460
     */
461
    private RisValue getSingleValue(RisReferenceImportState state,
462
            Map<RisReferenceTag, List<RisValue>> record,
463
            RisReferenceTag tag) {
464
        return getSingleValue(state, record, tag, true);
465
    }
466

    
467
    /**
468
     * Returns the single value for the given tag
469
     * and removes the tag from the record.
470
     * If more than 1 value exists this is logged
471
     * as a warning.
472
     */
473
    private RisValue getSingleValue(RisReferenceImportState state,
474
            Map<RisReferenceTag, List<RisValue>> record,
475
            RisReferenceTag tag, boolean remove) {
476
        List<RisValue> list = record.get(tag);
477
        if (list == null){
478
            return null;
479
        }
480
        assertSingle(state, list, tag);
481
        if (remove){
482
            record.remove(tag);
483
        }
484
        return list.get(0);
485
    }
486

    
487
    private List<RisValue> getListValue(Map<RisReferenceTag, List<RisValue>> record,
488
            RisReferenceTag tag) {
489
        List<RisValue> list = record.get(tag);
490
        record.remove(tag);
491
        if (list == null){
492
            list = new ArrayList<>();
493
        }
494
        return list;
495
    }
496

    
497
    private void assertSingle(RisReferenceImportState state, List<RisValue> list, RisReferenceTag tag) {
498
        if (list.size() > 1){
499
            String message = "There is more than 1 tag '%s' but only 1 tag is supported by RIS format or"
500
                    + " by the current import implementation.";
501
            message = String.format(message, tag.name());
502
            state.getResult().addWarning(message, null, list.get(0).location + "ff");
503
        }else if (list.isEmpty()){
504
            state.getResult().addError("A tag list was empty. This should not happen and is a programming code error");
505
        }
506
    }
507

    
508
    private ReferenceType makeReferenceType(RisReferenceImportState state,
509
            Map<RisReferenceTag, List<RisValue>> record) {
510
        RisReferenceTag tyTag = RisReferenceTag.TY;
511
        RisValue value = this.getSingleValue(state, record, tyTag, false);
512
        String typeStr = value.value;
513
        RisRecordType type = RisRecordType.valueOf(typeStr);
514
        ReferenceType cdmType = type.getCdmReferenceType();
515
        return cdmType;
516
    }
517

    
518
    @Override
519
    protected boolean doCheck(RisReferenceImportState state) {
520
        return true;
521
    }
522

    
523
    @Override
524
    protected boolean isIgnore(RisReferenceImportState state) {
525
        return false;
526
    }
527
}
(3-3/6)