Project

General

Profile

Download (18 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.reference.ris.in;
10

    
11
import java.io.ByteArrayInputStream;
12
import java.io.InputStreamReader;
13
import java.util.ArrayList;
14
import java.util.Arrays;
15
import java.util.HashSet;
16
import java.util.List;
17
import java.util.Map;
18
import java.util.Set;
19

    
20
import org.apache.log4j.Logger;
21
import org.springframework.stereotype.Component;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.common.DOI;
25
import eu.etaxonomy.cdm.common.URI;
26
import eu.etaxonomy.cdm.io.common.CdmImportBase;
27
import eu.etaxonomy.cdm.io.reference.ris.in.RisRecordReader.RisValue;
28
import eu.etaxonomy.cdm.model.agent.Person;
29
import eu.etaxonomy.cdm.model.agent.Team;
30
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
31
import eu.etaxonomy.cdm.model.common.Annotation;
32
import eu.etaxonomy.cdm.model.common.AnnotationType;
33
import eu.etaxonomy.cdm.model.common.Language;
34
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
35
import eu.etaxonomy.cdm.model.reference.Reference;
36
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
37
import eu.etaxonomy.cdm.model.reference.ReferenceType;
38
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
39

    
40
/**
41
 * @author a.mueller
42
 * @since 11.05.2017
43
 */
44
@Component
45
public class RisReferenceImport
46
        extends CdmImportBase<RisReferenceImportConfigurator, RisReferenceImportState>{
47

    
48
    private static final long serialVersionUID = 7022034669942979722L;
49
    @SuppressWarnings("unused")
50
    private static final Logger logger = Logger.getLogger(RisReferenceImport.class);
51

    
52
    @Override
53
    protected void doInvoke(RisReferenceImportState state) {
54
        RisReferenceImportConfigurator config = state.getConfig();
55
        try {
56
//            new FileReader(file)
57
            byte[] data = config.getStream();
58

    
59
            ByteArrayInputStream stream = new ByteArrayInputStream(data);
60
            InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
61
            RisRecordReader risReader = new RisRecordReader(state, reader);
62

    
63
            Set<Reference> referencesToSave = new HashSet<>();
64

    
65
            Map<RisReferenceTag, List<RisValue>> next = risReader.readRecord();
66
            while (next != RisRecordReader.EOF){
67
                Reference ref;
68
                String location = "";
69
                try {
70
                    location = recordLocation(state, next);
71
                    ref = makeReference(state, next);
72
                    referencesToSave.add(ref);
73
                    if (ref.getInReference() != null){
74
                        referencesToSave.add(ref.getInReference());
75
                    }
76
                } catch (Exception e) {
77
                    String message = "Unexpected exception during RIS Reference Import";
78
                    state.getResult().addException(e, message, location);
79
                }
80

    
81
                next = risReader.readRecord();
82
            }
83

    
84
            getReferenceService().saveOrUpdate(referencesToSave);
85
            state.getResult().addNewRecords(Reference.class.getSimpleName(), referencesToSave.size());
86

    
87
        } catch (Exception e) {
88
            String message = "Unexpected exception during RIS Reference Import";
89
            state.getResult().addException(e, message);
90
        }
91

    
92
        //unhandled
93
        Map<RisReferenceTag, Integer> unhandled = state.getUnhandled();
94
        for (RisReferenceTag tag : unhandled.keySet()){
95
            String message = "RIS tag %s (%s) not yet handled. n = %d";
96
            message = String .format(message, tag.name(), tag.getDescription(), unhandled.get(tag));
97
            state.getResult().addWarning(message);
98
        }
99
    }
100

    
101
    private Reference makeReference(RisReferenceImportState state,
102
            Map<RisReferenceTag, List<RisValue>> record) {
103

    
104
        //type
105
        ReferenceType type = makeReferenceType(state, record);
106
        Reference ref = ReferenceFactory.newReference(type);
107
        Reference inRef = null;
108
        if (hasInRef(ref)){
109
            ReferenceType inRefType =
110
                    type == ReferenceType.Article ? ReferenceType.Journal:
111
                    type == ReferenceType.BookSection ? ReferenceType.Book :
112
                        ReferenceType.Generic;
113
            inRef = ReferenceFactory.newReference(inRefType);
114
            ref.setInReference(inRef);
115
        }
116
        Reference higherRef = inRef == null ? ref : inRef;
117

    
118
        //Title
119
        RisValue t1 = getSingleValue(state, record, RisReferenceTag.T1);
120
        RisValue ti = getSingleValue(state, record, RisReferenceTag.TI);
121
        RisValue value = assertEqual(state, "title", t1, ti);
122
        if (value != null){
123
            ref.setTitle(value.value);
124
        }
125

    
126
        //Journal title
127
        RisValue t2 = getSingleValue(state, record, RisReferenceTag.T2); //Secondary Title (journal title, if applicable)
128

    
129
        if (higherRef.getType() == ReferenceType.Journal){
130
            RisValue jf = getSingleValue(state, record, RisReferenceTag.JF); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
131
            RisValue jo = getSingleValue(state, record, RisReferenceTag.JO); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
132
            RisValue x = assertEqual(state, "Journal/Periodical name: full format", jf, jo);
133
            x = assertEqual(state, "Journal title", t2, x);
134
            if (x != null){
135
                higherRef.setTitle(x.value);
136
            }
137
        }else{
138
            //TODO
139
        }
140

    
141
        //ST  (remove as same as TI or T1), not handled otherwise
142
        RisValue st = getSingleValue(state, record, RisReferenceTag.ST, false); //Short title
143
        if (st != null && st.value.equals(ref.getTitle())){
144
            record.remove(RisReferenceTag.ST);
145
        }
146

    
147
        //Author
148
        List<RisValue> list = getListValue(record, RisReferenceTag.AU);
149
        if (!list.isEmpty()){
150
            TeamOrPersonBase<?> author = makeAuthor(state, list);
151
            ref.setAuthorship(author);
152
        }
153

    
154
        //Date
155
//        RisValue y1 = getSingleValue(state, record, RisReferenceTag.Y1); //Primary Date
156
        RisValue py = getSingleValue(state, record, RisReferenceTag.PY);
157
        RisValue da = getSingleValue(state, record, RisReferenceTag.DA);
158
        Integer year = makeYear(state, py);
159
        VerbatimTimePeriod date = makeDate(state, da);
160
        date = assertDateYear(state, year, date, py);
161
        ref.setDatePublished(date);
162
        //TODO y1 not yet handled
163

    
164
        //Note
165
        RisValue n1 = getSingleValue(state, record, RisReferenceTag.N1); //Note
166
        if (n1 != null){
167
            Annotation annotation = Annotation.NewInstance(n1.value, AnnotationType.EDITORIAL(), Language.DEFAULT());
168
            ref.addAnnotation(annotation);
169
        }
170

    
171
        //DOI
172
        RisValue doiVal = getSingleValue(state, record, RisReferenceTag.DO); //Doi
173
        if (doiVal != null){
174
            DOI doi;
175
            try {
176
                String doiStr = doiVal.value;
177
                if (doiStr.toLowerCase().startsWith("doi ")){
178
                    doiStr = doiStr.substring(4).trim();
179
                }
180
                doi = DOI.fromString(doiStr);
181
                ref.setDoi(doi);
182
            } catch (IllegalArgumentException e) {
183
                String message = "DOI could not be recognized: " + doiVal.value;
184
                state.getResult().addWarning(message, null, doiVal.location);
185
            }
186
        }
187

    
188
        //UR
189
        RisValue ur = getSingleValue(state, record, RisReferenceTag.UR); //URL
190
        if (ur != null){
191
            URI uri;
192
            try {
193
                String urStr = ur.value;
194
                uri = URI.create(urStr);
195
                ref.setUri(uri);
196
            } catch (Exception e) {
197
                String message = "URL could not be recognized: " + ur.value;
198
                state.getResult().addWarning(message, null, ur.location);
199
            }
200
        }
201

    
202
        //Pages
203
        RisValue sp = getSingleValue(state, record, RisReferenceTag.SP);
204
        RisValue ep = getSingleValue(state, record, RisReferenceTag.EP);
205
        String pages = CdmUtils.concat("-", sp != null ? sp.value : null, ep != null ? ep.value : null);
206
        ref.setPages(pages);
207

    
208
        //Volume
209
        RisValue vl = getSingleValue(state, record, RisReferenceTag.VL);
210
        RisValue is = getSingleValue(state, record, RisReferenceTag.IS);
211
        String vol = vl == null? "": vl.value + (is != null ? "("+ is.value + ")": "");
212
        ref.setVolume(vol);
213

    
214
        //Publisher
215
        RisValue pb = getSingleValue(state, record, RisReferenceTag.PB);
216
        if (pb != null){
217
            higherRef.setPublisher(pb.value);
218
        }
219

    
220
        //CY - Place published
221
        RisValue cy = getSingleValue(state, record, RisReferenceTag.CY);
222
        if (cy != null){
223
            higherRef.setPlacePublished(cy.value);
224
        }
225

    
226
        //Abstract
227
        RisValue ab = getSingleValue(state, record, RisReferenceTag.AB);
228
        RisValue n2 = getSingleValue(state, record, RisReferenceTag.N2);
229
        RisValue abst = assertEqual(state, "Abstract", ab, n2);
230
        if (abst != null){
231
            ref.setReferenceAbstract(abst.value);
232
        }
233

    
234
        //ISSN/ISBN
235
        RisValue sn = getSingleValue(state, record, RisReferenceTag.SN);
236
        if (sn != null){
237
            if (higherRef.getType() == ReferenceType.Journal){
238
                higherRef.setIssn(sn.value);
239
            }else{
240
                higherRef.setIsbn(sn.value);
241
            }
242
        }
243

    
244
        //ID
245
        RisValue id = getSingleValue(state, record, RisReferenceTag.ID);
246
        String idStr = id != null? id.value: null;
247
        String recLoc = recordLocation(state, record);
248
        ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
249
        if (inRef != null){
250
            inRef.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
251
        }
252

    
253
        //remove
254
        record.remove(RisReferenceTag.ER);
255
        record.remove(RisReferenceTag.TY);
256

    
257
        for (RisReferenceTag tag : record.keySet()){
258
//            String message = "RIS Tag " + tag.name() +  " not yet handled";
259
//            state.getResult().addWarning(message, record.get(tag).get(0).location);
260
            state.addUnhandled(tag);
261

    
262
            //TODO add as annotation or extension
263
        }
264

    
265
        return ref;
266
    }
267

    
268
    private boolean hasInRef(Reference ref) {
269
        return ref.getType() == ReferenceType.BookSection || ref.getType() == ReferenceType.Article ;
270
    }
271

    
272
    private String recordLocation(RisReferenceImportState state,
273
            Map<RisReferenceTag, List<RisValue>> record) {
274
        RisValue typeTag = this.getSingleValue(state, record, RisReferenceTag.TY, false);
275
        RisValue erTag = this.getSingleValue(state, record, RisReferenceTag.ER, false);
276

    
277
        String start = typeTag == null ? "??" : typeTag.location;
278
        String end = erTag == null ? "??" : erTag.location;
279

    
280
        String result = "line " + CdmUtils.concat(" - ", start, end);
281

    
282
        return result;
283
    }
284

    
285
    private VerbatimTimePeriod assertDateYear(RisReferenceImportState state, Integer year, VerbatimTimePeriod date, RisValue py) {
286
        if (year == null && date == null){
287
            return null;
288
        }else if (year == null){
289
            return date;
290
        }else if (date == null){
291
            return TimePeriodParser.parseStringVerbatim(String.valueOf(year));
292
        }else{
293
            if  (!year.equals(date.getStartYear())){
294
                if (date.getStartYear() == null){
295
                    date.setStartYear(year);
296
                }else if (isNotBlank(date.getFreeText())){
297
                    date.setStartYear(year);  //does this happen at all?
298
                    String message = "Year 'PY' and date 'DA' are not consistent. PY is neglected.";
299
                    state.getResult().addWarning(message, null, py.location);
300
                    return date;
301
                }else{
302
                    String message = "Year 'PY' and date 'DA' are not consistent. DA is used for freetext and PY is used for (start) year.";
303
                    state.getResult().addWarning(message, null, py.location);
304
                    return date;
305
                }
306
            }
307
            return date;
308
        }
309
    }
310

    
311
    private RisValue assertEqual(RisReferenceImportState state, String meaning, RisValue val1, RisValue val2) {
312
        if (val1 != null && val2 != null && !val1.value.equals(val2.value)){
313
            String message = "The tags '%s' and '%s' are not equal but have a similar meaning ('%s'). "
314
                    + "%s was used and %s neglected";
315
            message = String.format(message, val1.tag.name(), val2.tag.name(), meaning , val1.tag.name(), val2.tag.name());
316
            state.getResult().addWarning(message, null, val1.location);
317
        }
318
        return val1 != null ? val1 : val2;
319
    }
320

    
321
    private VerbatimTimePeriod makeDate(RisReferenceImportState state, RisValue da) {
322
        if (da == null){
323
            return null;
324
        }
325
        if (! da.value.matches("([0-9]{4})?(\\/([0-9]{2})?(\\/([0-9]{2})?(\\/.*)?)?)?")){
326
            String message = "Tag '%s' has incorrect format. Only exactly 'dddd/dd/dd/any text' is allowed (where d is a digit), but was '%s'";
327
            message = String.format(message, da.tag.name(), da.value);
328
            state.getResult().addWarning(message, null, da.location);
329
            return null;
330
        }
331
        String[] split = da.value.split("/");
332
        VerbatimTimePeriod tp = VerbatimTimePeriod.NewVerbatimInstance();
333
        if (split.length > 0 && isNotBlank(split[0])){
334
            tp.setStartYear(Integer.valueOf(split[0]));
335
        }
336
        if (split.length > 1 && isNotBlank(split[1])){
337
            tp.setStartMonth(Integer.valueOf(split[1]));
338
        }
339
        if (split.length > 2 && isNotBlank(split[2])){
340
            tp.setStartDay(Integer.valueOf(split[2]));
341
        }
342
        if (split.length > 3 && isNotBlank(split[3])){
343
            List<String> other = Arrays.asList(split).subList(3, split.length);
344
            String otherStr = CdmUtils.concat("/", other.toArray(new String[other.size()]));
345
            tp.setFreeText(tp.toString() + " " + otherStr);
346
        }
347
        return tp;
348
    }
349

    
350
    private Integer makeYear(RisReferenceImportState state, RisValue py) {
351
        if (py == null){
352
            return null;
353
        }
354
        if (py.value.matches("[0-9]{4}")){
355
            return Integer.valueOf(py.value);
356
        }else{
357
            String message = "Tag '%s' has incorrect format. Only exactly 4 digits are allowed, but was '%s'";
358
            message = String.format(message, py.tag.name(), py.value);
359
            state.getResult().addWarning(message, null, py.location);
360
            return null;
361
        }
362
    }
363

    
364
    private TeamOrPersonBase<?> makeAuthor(RisReferenceImportState state, List<RisValue> list) {
365
        if (list.size() == 1){
366
            return makePerson(state, list.get(0));
367
        }else{
368
            Team team = Team.NewInstance();
369
            for (RisValue value : list){
370
                team.addTeamMember(makePerson(state, value));
371
            }
372
            return team;
373
        }
374
    }
375

    
376
    private Person makePerson(RisReferenceImportState state, RisValue risValue) {
377
        Person person = Person.NewInstance();
378
        String[] split = risValue.value.split(",");
379
        if (split.length >= 1){
380
            person.setFamilyName(split[0].trim());
381
        }
382
        if (split.length >= 2){
383
            person.setGivenName(split[1].trim());
384
        }
385
        if (split.length >= 3){
386
            person.setSuffix(split[2].trim());
387
        }
388

    
389
        return person;
390
    }
391

    
392
    /**
393
     * Returns the single value for the given tag
394
     * and removes the tag from the record.
395
     * If more than 1 value exists this is logged
396
     * as a warning.
397
     */
398
    private RisValue getSingleValue(RisReferenceImportState state,
399
            Map<RisReferenceTag, List<RisValue>> record,
400
            RisReferenceTag tag) {
401
        return getSingleValue(state, record, tag, true);
402
    }
403

    
404
    /**
405
     * Returns the single value for the given tag
406
     * and removes the tag from the record.
407
     * If more than 1 value exists this is logged
408
     * as a warning.
409
     */
410
    private RisValue getSingleValue(RisReferenceImportState state,
411
            Map<RisReferenceTag, List<RisValue>> record,
412
            RisReferenceTag tag, boolean remove) {
413
        List<RisValue> list = record.get(tag);
414
        if (list == null){
415
            return null;
416
        }
417
        assertSingle(state, list, tag);
418
        if (remove){
419
            record.remove(tag);
420
        }
421
        return list.get(0);
422
    }
423

    
424
    private List<RisValue> getListValue(Map<RisReferenceTag, List<RisValue>> record,
425
            RisReferenceTag tag) {
426
        List<RisValue> list = record.get(tag);
427
        record.remove(tag);
428
        if (list == null){
429
            list = new ArrayList<>();
430
        }
431
        return list;
432
    }
433

    
434
    private void assertSingle(RisReferenceImportState state, List<RisValue> list, RisReferenceTag tag) {
435
        if (list.size() > 1){
436
            String message = "There is more than 1 tag '%s' but only 1 tag is supported by RIS format or"
437
                    + " by the current import implementation.";
438
            message = String.format(message, tag.name());
439
            state.getResult().addWarning(message, null, list.get(0).location + "ff");
440
        }else if (list.isEmpty()){
441
            state.getResult().addError("A tag list was empty. This should not happen and is a programming code error");
442
        }
443
    }
444

    
445
    private ReferenceType makeReferenceType(RisReferenceImportState state,
446
            Map<RisReferenceTag, List<RisValue>> record) {
447
        RisReferenceTag tyTag = RisReferenceTag.TY;
448
        RisValue value = this.getSingleValue(state, record, tyTag, false);
449
        String typeStr = value.value;
450
        RisRecordType type = RisRecordType.valueOf(typeStr);
451
        ReferenceType cdmType = type.getCdmReferenceType();
452
        return cdmType;
453
    }
454

    
455
    @Override
456
    protected boolean doCheck(RisReferenceImportState state) {
457
        return true;
458
    }
459

    
460
    @Override
461
    protected boolean isIgnore(RisReferenceImportState state) {
462
        return false;
463
    }
464
}
(3-3/6)