Project

General

Profile

Download (21.2 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.reference.ris.in;
10

    
11
import java.io.ByteArrayInputStream;
12
import java.io.InputStreamReader;
13
import java.util.ArrayList;
14
import java.util.Arrays;
15
import java.util.HashSet;
16
import java.util.List;
17
import java.util.Map;
18
import java.util.Set;
19

    
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.common.DOI;
25
import eu.etaxonomy.cdm.common.URI;
26
import eu.etaxonomy.cdm.io.common.CdmImportBase;
27
import eu.etaxonomy.cdm.io.reference.ris.in.RisRecordReader.RisValue;
28
import eu.etaxonomy.cdm.model.agent.Person;
29
import eu.etaxonomy.cdm.model.agent.Team;
30
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
31
import eu.etaxonomy.cdm.model.common.Annotation;
32
import eu.etaxonomy.cdm.model.common.AnnotationType;
33
import eu.etaxonomy.cdm.model.common.Language;
34
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
35
import eu.etaxonomy.cdm.model.reference.Reference;
36
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
37
import eu.etaxonomy.cdm.model.reference.ReferenceType;
38
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
39

    
40
/**
41
 * @author a.mueller
42
 * @since 11.05.2017
43
 */
44
@Component
45
public class RisReferenceImport
46
        extends CdmImportBase<RisReferenceImportConfigurator, RisReferenceImportState>{
47

    
48
    private static final long serialVersionUID = 7022034669942979722L;
49
    @SuppressWarnings("unused")
50
    private static final Logger logger = Logger.getLogger(RisReferenceImport.class);
51

    
52
    @Override
53
    protected void doInvoke(RisReferenceImportState state) {
54
        RisReferenceImportConfigurator config = state.getConfig();
55
        try {
56
//            new FileReader(file)
57
            byte[] data = config.getStream();
58

    
59
            ByteArrayInputStream stream = new ByteArrayInputStream(data);
60
            InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
61
            RisRecordReader risReader = new RisRecordReader(state, reader);
62

    
63
            Set<Reference> referencesToSave = new HashSet<>();
64

    
65
            Map<RisReferenceTag, List<RisValue>> next = risReader.readRecord();
66
            while (next != RisRecordReader.EOF){
67
                Reference ref;
68
                String location = "";
69
                try {
70
                    location = recordLocation(state, next);
71
                    ref = handleSingleReference(state, next);
72
                    referencesToSave.add(ref);
73
                    if (ref.getInReference() != null){
74
                        referencesToSave.add(ref.getInReference());
75
                    }
76
                } catch (Exception e) {
77
                    String message = "Unexpected exception during RIS Reference Import";
78
                    state.getResult().addException(e, message, location);
79
                }
80

    
81
                next = risReader.readRecord();
82
            }
83

    
84

    
85
            getReferenceService().saveOrUpdate(referencesToSave);
86
            state.getResult().addNewRecords(Reference.class.getSimpleName(), referencesToSave.size());
87

    
88
        } catch (Exception e) {
89
            String message = "Unexpected exception during RIS Reference Import";
90
            state.getResult().addException(e, message);
91
        }
92

    
93
        //unhandled
94
        Map<RisReferenceTag, Integer> unhandled = state.getUnhandled();
95
        for (RisReferenceTag tag : unhandled.keySet()){
96
            String message = "RIS tag %s (%s) not yet handled. n = %d";
97
            message = String .format(message, tag.name(), tag.getDescription(), unhandled.get(tag));
98
            state.getResult().addWarning(message);
99
        }
100
    }
101

    
102
    private Reference handleSingleReference(RisReferenceImportState state,
103
            Map<RisReferenceTag, List<RisValue>> record) {
104

    
105
        //type
106
        ReferenceType type = makeReferenceType(state, record);
107
        Reference ref = ReferenceFactory.newReference(type);
108
        Reference inRef = null;
109
        if (hasInRef(ref)){
110
            ReferenceType inRefType =
111
                    type == ReferenceType.Article ? ReferenceType.Journal:
112
                    type == ReferenceType.BookSection ? ReferenceType.Book :
113
                        ReferenceType.Generic;
114
            inRef = ReferenceFactory.newReference(inRefType);
115
            ref.setInReference(inRef);
116
        }
117
        Reference higherRef = (inRef == null) ? ref : inRef;
118

    
119
        //titles
120
        handleTitle(state, record, ref, inRef, higherRef);
121

    
122
        //authors
123
        handleAuthors(state, record, ref, inRef);
124

    
125
        //Date
126
//        RisValue y1 = getSingleValue(state, record, RisReferenceTag.Y1); //Primary Date
127
        RisValue py = getSingleValue(state, record, RisReferenceTag.PY);
128
        RisValue da = getSingleValue(state, record, RisReferenceTag.DA);
129
        Integer year = makeYear(state, py);
130
        VerbatimTimePeriod date = makeDate(state, da);
131
        date = assertDateYear(state, year, date, py);
132
        ref.setDatePublished(date);
133
        //TODO y1 not yet handled
134

    
135
        //Note
136
        RisValue n1 = getSingleValue(state, record, RisReferenceTag.N1); //Note
137
        if (n1 != null){
138
            Annotation annotation = Annotation.NewInstance(n1.value, AnnotationType.EDITORIAL(), Language.DEFAULT());
139
            ref.addAnnotation(annotation);
140
        }
141

    
142
        //DOI
143
        handleDoi(state, record, ref);
144

    
145
        //UR
146
        RisValue ur = getSingleValue(state, record, RisReferenceTag.UR); //URL
147
        if (ur != null){
148
            URI uri;
149
            try {
150
                String urStr = ur.value;
151
                uri = URI.create(urStr);
152
                ref.setUri(uri);
153
            } catch (Exception e) {
154
                String message = "URL could not be recognized: " + ur.value;
155
                state.getResult().addWarning(message, null, ur.location);
156
            }
157
        }
158

    
159
        //Pages
160
        RisValue sp = getSingleValue(state, record, RisReferenceTag.SP);
161
        RisValue ep = getSingleValue(state, record, RisReferenceTag.EP);
162
        String pages = CdmUtils.concat("-", sp != null ? sp.value : null, ep != null ? ep.value : null);
163
        ref.setPages(pages);
164

    
165
        //Volume
166
        RisValue vl = getSingleValue(state, record, RisReferenceTag.VL);
167
        RisValue is = getSingleValue(state, record, RisReferenceTag.IS);
168
        String vol = (vl == null)? "": vl.value + (is != null ? "("+ is.value + ")": "");
169
        if (inRef != null && inRef.getType() == ReferenceType.Book){
170
            inRef.setVolume(vol);
171
        }else{
172
            ref.setVolume(vol);
173
        }
174

    
175
        //Publisher
176
        RisValue pb = getSingleValue(state, record, RisReferenceTag.PB);
177
        if (pb != null){
178
            higherRef.setPublisher(pb.value);
179
        }
180

    
181
        //CY - Place published
182
        RisValue cy = getSingleValue(state, record, RisReferenceTag.CY);
183
        if (cy != null){
184
            higherRef.setPlacePublished(cy.value);
185
        }
186

    
187
        //Abstract
188
        RisValue ab = getSingleValue(state, record, RisReferenceTag.AB);
189
        RisValue n2 = getSingleValue(state, record, RisReferenceTag.N2);
190
        RisValue abst = assertEqual(state, "Abstract", ab, n2);
191
        if (abst != null){
192
            ref.setReferenceAbstract(abst.value);
193
        }
194

    
195
        //ISSN/ISBN
196
        RisValue sn = getSingleValue(state, record, RisReferenceTag.SN);
197
        if (sn != null){
198
            if (higherRef.getType() == ReferenceType.Journal){
199
                higherRef.setIssn(sn.value);
200
            }else{
201
                higherRef.setIsbn(sn.value);
202
            }
203
        }
204

    
205
        //ID
206
        RisValue id = getSingleValue(state, record, RisReferenceTag.ID);
207
        String idStr = id != null? id.value: null;
208
        String recLoc = recordLocation(state, record);
209
        ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
210
        if (inRef != null){
211
            inRef.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
212
        }
213

    
214
        //remove
215
        record.remove(RisReferenceTag.ER);
216
        record.remove(RisReferenceTag.TY);
217

    
218
        for (RisReferenceTag tag : record.keySet()){
219
//            String message = "RIS Tag " + tag.name() +  " not yet handled";
220
//            state.getResult().addWarning(message, record.get(tag).get(0).location);
221
            state.addUnhandled(tag);
222

    
223
            //TODO add as annotation or extension
224
        }
225

    
226
        return ref;
227
    }
228

    
229
    private void handleDoi(RisReferenceImportState state, Map<RisReferenceTag, List<RisValue>> record, Reference ref) {
230
        RisValue doiVal = getSingleValue(state, record, RisReferenceTag.DO); //Doi
231
        if (doiVal != null){
232
            DOI doi;
233
            try {
234
                String doiStr = doiVal.value;
235
                if (doiStr.toLowerCase().startsWith("doi ")){
236
                    doiStr = doiStr.substring(4).trim();
237
                }
238
                doi = DOI.fromString(doiStr);
239
                ref.setDoi(doi);
240
            } catch (IllegalArgumentException e) {
241
                String message = "DOI could not be recognized: " + doiVal.value;
242
                state.getResult().addWarning(message, null, doiVal.location);
243
            }
244
        }
245
    }
246

    
247
    private void handleTitle(RisReferenceImportState state, Map<RisReferenceTag, List<RisValue>> record, Reference ref,
248
            Reference inRef, Reference higherRef) {
249
        //Title
250
        RisValue t1 = getSingleValue(state, record, RisReferenceTag.T1);
251
        RisValue ti = getSingleValue(state, record, RisReferenceTag.TI);
252
        RisValue title = assertEqual(state, "title", t1, ti);
253
        if (title != null){
254
            ref.setTitle(title.value);
255
        }
256

    
257
        //Journal title
258
        RisValue t2 = getSingleValue(state, record, RisReferenceTag.T2); //Secondary Title (journal title, if applicable)
259

    
260
        if (higherRef.getType() == ReferenceType.Journal){
261
            RisValue jf = getSingleValue(state, record, RisReferenceTag.JF); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
262
            RisValue jo = getSingleValue(state, record, RisReferenceTag.JO); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
263
            RisValue jf_jo = assertEqual(state, "Journal/Periodical name: full format", jf, jo);
264
            RisValue journalTitle = assertEqual(state, "Journal title", t2, jf_jo);
265
            if (journalTitle != null){
266
                higherRef.setTitle(journalTitle.value);
267
            }
268
        }else if (t2 != null && inRef != null){
269
            inRef.setTitle(t2.value);
270
        }else if (t2 != null){
271
            String message = "The tag %s ('%s') exists but the reference type usually has no in-reference."
272
                    + "This part of the title was neglected: %s";
273
            message = String.format(message, t2.tag.name(), t2.tag.getDescription(), t2.value);
274
            state.getResult().addWarning(message, null, t2.location);
275
        }else if (inRef != null){
276
            String message = "The reference type typically has an inreference but no secondary title (tag T2) was given.";
277
            state.getResult().addWarning(message, null, (title != null)? title.location : null);
278
        }
279

    
280
        //ST  (remove as same as TI or T1), not handled otherwise
281
        RisValue st = getSingleValue(state, record, RisReferenceTag.ST, false); //Short title
282
        if (st != null && st.value.equals(ref.getTitle())){
283
            record.remove(RisReferenceTag.ST);
284
        }
285
    }
286

    
287
    private void handleAuthors(RisReferenceImportState state, Map<RisReferenceTag, List<RisValue>> record,
288
            Reference ref, Reference inRef) {
289
        List<RisValue> authorList = getListValue(record, RisReferenceTag.AU);
290
        if (!authorList.isEmpty()){
291
            TeamOrPersonBase<?> author = makeAuthor(state, authorList);
292
            ref.setAuthorship(author);
293
        }
294
        List<RisValue> secondaryAuthorList = getListValue(record, RisReferenceTag.A2);
295
        if (!secondaryAuthorList.isEmpty()){
296
            if (inRef != null){
297
                if (inRef.getType() != ReferenceType.Journal){
298
                    TeamOrPersonBase<?> secAuthor = makeAuthor(state, secondaryAuthorList);
299
                    inRef.setAuthorship(secAuthor);
300
                }else{
301
                    String message = "The tag %s ('%s') exists but the in-reference type is 'journal' which typically has no author."
302
                            + "The secondary author(s) was/were neglected: %s";
303
                    message = String.format(message, RisReferenceTag.AU.name(), RisReferenceTag.AU.getDescription(), secondaryAuthorList.toString());
304
                    state.getResult().addWarning(message, null, secondaryAuthorList.get(0).location);
305
                }
306
            }else{
307
                String message = "The tag %s ('%s') exists but the reference type usually has no in-reference."
308
                        + "The secondary author(s) was/were neglected: %s";
309
                message = String.format(message, RisReferenceTag.AU.name(), RisReferenceTag.AU.getDescription(), secondaryAuthorList.toString());
310
                state.getResult().addWarning(message, null, secondaryAuthorList.get(0).location);
311
            }
312
        }
313
    }
314

    
315
    private boolean hasInRef(Reference ref) {
316
        return ref.getType() == ReferenceType.BookSection || ref.getType() == ReferenceType.Article ;
317
    }
318

    
319
    private String recordLocation(RisReferenceImportState state,
320
            Map<RisReferenceTag, List<RisValue>> record) {
321
        RisValue typeTag = this.getSingleValue(state, record, RisReferenceTag.TY, false);
322
        RisValue erTag = this.getSingleValue(state, record, RisReferenceTag.ER, false);
323

    
324
        String start = typeTag == null ? "??" : typeTag.location;
325
        String end = erTag == null ? "??" : erTag.location;
326

    
327
        String result = "line " + CdmUtils.concat(" - ", start, end);
328

    
329
        return result;
330
    }
331

    
332
    private VerbatimTimePeriod assertDateYear(RisReferenceImportState state, Integer year, VerbatimTimePeriod date, RisValue py) {
333
        if (year == null && date == null){
334
            return null;
335
        }else if (year == null){
336
            return date;
337
        }else if (date == null){
338
            return TimePeriodParser.parseStringVerbatim(String.valueOf(year));
339
        }else{
340
            if  (!year.equals(date.getStartYear())){
341
                if (date.getStartYear() == null){
342
                    date.setStartYear(year);
343
                }else if (isNotBlank(date.getFreeText())){
344
                    date.setStartYear(year);  //does this happen at all?
345
                    String message = "Year 'PY' and date 'DA' are not consistent. PY is neglected.";
346
                    state.getResult().addWarning(message, null, py.location);
347
                    return date;
348
                }else{
349
                    String message = "Year 'PY' and date 'DA' are not consistent. DA is used for freetext and PY is used for (start) year.";
350
                    state.getResult().addWarning(message, null, py.location);
351
                    return date;
352
                }
353
            }
354
            return date;
355
        }
356
    }
357

    
358
    /**
359
     * If val1 and val2 are both <code>not null</code> and not equal a warning is logged.
360
     * @return val1 if val1 is not null, val2 otherwise
361
     */
362
    private RisValue assertEqual(RisReferenceImportState state, String meaning, RisValue val1, RisValue val2) {
363
        if (val1 != null && val2 != null && !val1.value.equals(val2.value)){
364
            String message = "The tags '%s' and '%s' are not equal but have a similar meaning ('%s'). "
365
                    + "%s was used and %s neglected";
366
            message = String.format(message, val1.tag.name(), val2.tag.name(), meaning , val1.tag.name(), val2.tag.name());
367
            state.getResult().addWarning(message, null, val1.location);
368
        }
369
        return val1 != null ? val1 : val2;
370
    }
371

    
372
    private VerbatimTimePeriod makeDate(RisReferenceImportState state, RisValue da) {
373
        if (da == null){
374
            return null;
375
        }
376
        if (! da.value.matches("([0-9]{4})?(\\/([0-9]{2})?(\\/([0-9]{2})?(\\/.*)?)?)?")){
377
            String message = "Tag '%s' has incorrect format. Only exactly 'dddd/dd/dd/any text' is allowed (where d is a digit), but was '%s'";
378
            message = String.format(message, da.tag.name(), da.value);
379
            state.getResult().addWarning(message, null, da.location);
380
            return null;
381
        }
382
        String[] split = da.value.split("/");
383
        VerbatimTimePeriod tp = VerbatimTimePeriod.NewVerbatimInstance();
384
        if (split.length > 0 && isNotBlank(split[0])){
385
            tp.setStartYear(Integer.valueOf(split[0]));
386
        }
387
        if (split.length > 1 && isNotBlank(split[1])){
388
            tp.setStartMonth(Integer.valueOf(split[1]));
389
        }
390
        if (split.length > 2 && isNotBlank(split[2])){
391
            tp.setStartDay(Integer.valueOf(split[2]));
392
        }
393
        if (split.length > 3 && isNotBlank(split[3])){
394
            List<String> other = Arrays.asList(split).subList(3, split.length);
395
            String otherStr = CdmUtils.concat("/", other.toArray(new String[other.size()]));
396
            tp.setFreeText(tp.toString() + " " + otherStr);
397
        }
398
        return tp;
399
    }
400

    
401
    private Integer makeYear(RisReferenceImportState state, RisValue py) {
402
        if (py == null){
403
            return null;
404
        }
405
        if (py.value.matches("[0-9]{4}")){
406
            return Integer.valueOf(py.value);
407
        }else{
408
            String message = "Tag '%s' has incorrect format. Only exactly 4 digits are allowed, but was '%s'";
409
            message = String.format(message, py.tag.name(), py.value);
410
            state.getResult().addWarning(message, null, py.location);
411
            return null;
412
        }
413
    }
414

    
415
    private TeamOrPersonBase<?> makeAuthor(RisReferenceImportState state, List<RisValue> list) {
416
        if (list.size() == 1){
417
            return makePerson(state, list.get(0));
418
        }else{
419
            Team team = Team.NewInstance();
420
            for (RisValue value : list){
421
                team.addTeamMember(makePerson(state, value));
422
            }
423
            return team;
424
        }
425
    }
426

    
427
    private Person makePerson(RisReferenceImportState state, RisValue risValue) {
428
        Person person = Person.NewInstance();
429
        String[] split = risValue.value.split(",");
430
        if (split.length >= 1){
431
            person.setFamilyName(split[0].trim());
432
        }
433
        if (split.length >= 2){
434
            String givenNameOrInitial = split[1].trim();
435
            if (givenNameOrInitial.matches("[A-Za-z]\\.(\\s*[A-Za-z]\\.)*")){
436
                person.setInitials(givenNameOrInitial);
437
            }else{
438
                person.setGivenName(givenNameOrInitial);
439
            }
440
        }
441
        if (split.length >= 3){
442
            person.setSuffix(split[2].trim());
443
        }
444

    
445
        return person;
446
    }
447

    
448
    /**
449
     * Returns the single value for the given tag
450
     * and removes the tag from the record.
451
     * If more than 1 value exists this is logged
452
     * as a warning.
453
     */
454
    private RisValue getSingleValue(RisReferenceImportState state,
455
            Map<RisReferenceTag, List<RisValue>> record,
456
            RisReferenceTag tag) {
457
        return getSingleValue(state, record, tag, true);
458
    }
459

    
460
    /**
461
     * Returns the single value for the given tag
462
     * and removes the tag from the record.
463
     * If more than 1 value exists this is logged
464
     * as a warning.
465
     */
466
    private RisValue getSingleValue(RisReferenceImportState state,
467
            Map<RisReferenceTag, List<RisValue>> record,
468
            RisReferenceTag tag, boolean remove) {
469

    
470
        List<RisValue> list = record.get(tag);
471
        if (list == null){
472
            return null;
473
        }
474
        assertSingle(state, list, tag);
475
        if (remove){
476
            record.remove(tag);
477
        }
478
        return list.get(0);
479
    }
480

    
481
    private List<RisValue> getListValue(Map<RisReferenceTag, List<RisValue>> record,
482
            RisReferenceTag tag) {
483

    
484
        List<RisValue> list = record.get(tag);
485
        record.remove(tag);
486
        if (list == null){
487
            list = new ArrayList<>();
488
        }
489
        return list;
490
    }
491

    
492
    private void assertSingle(RisReferenceImportState state, List<RisValue> list, RisReferenceTag tag) {
493
        if (list.size() > 1){
494
            String message = "There is more than 1 tag '%s' but only 1 tag is supported by RIS format or"
495
                    + " by the current import implementation.";
496
            message = String.format(message, tag.name());
497
            state.getResult().addWarning(message, null, list.get(0).location + "ff");
498
        }else if (list.isEmpty()){
499
            state.getResult().addError("A tag list was empty. This should not happen and is a programming code error");
500
        }
501
    }
502

    
503
    private ReferenceType makeReferenceType(RisReferenceImportState state,
504
            Map<RisReferenceTag, List<RisValue>> record) {
505
        RisReferenceTag tyTag = RisReferenceTag.TY;
506
        RisValue value = this.getSingleValue(state, record, tyTag, false);
507
        String typeStr = value.value;
508
        RisRecordType type = RisRecordType.valueOf(typeStr);
509
        ReferenceType cdmType = type.getCdmReferenceType();
510
        return cdmType;
511
    }
512

    
513
    @Override
514
    protected boolean doCheck(RisReferenceImportState state) {
515
        return true;
516
    }
517

    
518
    @Override
519
    protected boolean isIgnore(RisReferenceImportState state) {
520
        return false;
521
    }
522
}
(3-3/6)