1
|
/**
|
2
|
* Copyright (C) 2017 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.reference.ris.in;
|
10
|
|
11
|
import java.io.ByteArrayInputStream;
|
12
|
import java.io.InputStreamReader;
|
13
|
import eu.etaxonomy.cdm.common.URI;
|
14
|
import java.util.ArrayList;
|
15
|
import java.util.Arrays;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
import java.util.Set;
|
20
|
|
21
|
import org.apache.log4j.Logger;
|
22
|
import org.springframework.stereotype.Component;
|
23
|
|
24
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
25
|
import eu.etaxonomy.cdm.common.DOI;
|
26
|
import eu.etaxonomy.cdm.io.common.CdmImportBase;
|
27
|
import eu.etaxonomy.cdm.io.reference.ris.in.RisRecordReader.RisValue;
|
28
|
import eu.etaxonomy.cdm.model.agent.Person;
|
29
|
import eu.etaxonomy.cdm.model.agent.Team;
|
30
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
31
|
import eu.etaxonomy.cdm.model.common.Annotation;
|
32
|
import eu.etaxonomy.cdm.model.common.AnnotationType;
|
33
|
import eu.etaxonomy.cdm.model.common.Language;
|
34
|
import eu.etaxonomy.cdm.model.common.TimePeriod;
|
35
|
import eu.etaxonomy.cdm.model.common.VerbatimTimePeriod;
|
36
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
37
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
38
|
import eu.etaxonomy.cdm.model.reference.ReferenceType;
|
39
|
|
40
|
/**
|
41
|
* @author a.mueller
|
42
|
* @since 11.05.2017
|
43
|
*
|
44
|
*/
|
45
|
@Component
|
46
|
public class RisReferenceImport
|
47
|
extends CdmImportBase<RisReferenceImportConfigurator, RisReferenceImportState>{
|
48
|
|
49
|
private static final long serialVersionUID = 7022034669942979722L;
|
50
|
@SuppressWarnings("unused")
|
51
|
private static final Logger logger = Logger.getLogger(RisReferenceImport.class);
|
52
|
|
53
|
/**
|
54
|
* {@inheritDoc}
|
55
|
*/
|
56
|
@Override
|
57
|
protected void doInvoke(RisReferenceImportState state) {
|
58
|
RisReferenceImportConfigurator config = state.getConfig();
|
59
|
try {
|
60
|
// new FileReader(file)
|
61
|
byte[] data = config.getStream();
|
62
|
|
63
|
ByteArrayInputStream stream = new ByteArrayInputStream(data);
|
64
|
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
|
65
|
RisRecordReader risReader = new RisRecordReader(state, reader);
|
66
|
|
67
|
Set<Reference> referencesToSave = new HashSet<>();
|
68
|
|
69
|
|
70
|
Map<RisReferenceTag, List<RisValue>> next = risReader.readRecord();
|
71
|
while (next != RisRecordReader.EOF){
|
72
|
Reference ref;
|
73
|
String location = "";
|
74
|
try {
|
75
|
location = recordLocation(state, next);
|
76
|
ref = makeReference(state, next);
|
77
|
referencesToSave.add(ref);
|
78
|
if (ref.getInReference() != null){
|
79
|
referencesToSave.add(ref.getInReference());
|
80
|
}
|
81
|
} catch (Exception e) {
|
82
|
String message = "Unexpected exception during RIS Reference Import";
|
83
|
state.getResult().addException(e, message, location);
|
84
|
}
|
85
|
|
86
|
next = risReader.readRecord();
|
87
|
}
|
88
|
|
89
|
getReferenceService().saveOrUpdate(referencesToSave);
|
90
|
state.getResult().addNewRecords(Reference.class.getSimpleName(), referencesToSave.size());
|
91
|
|
92
|
} catch (Exception e) {
|
93
|
String message = "Unexpected exception during RIS Reference Import";
|
94
|
state.getResult().addException(e, message);
|
95
|
}
|
96
|
|
97
|
//unhandled
|
98
|
Map<RisReferenceTag, Integer> unhandled = state.getUnhandled();
|
99
|
for (RisReferenceTag tag : unhandled.keySet()){
|
100
|
String message = "RIS tag %s (%s) not yet handled. n = %d";
|
101
|
message = String .format(message, tag.name(), tag.getDescription(), unhandled.get(tag));
|
102
|
state.getResult().addWarning(message);
|
103
|
}
|
104
|
}
|
105
|
|
106
|
/**
|
107
|
* @param state
|
108
|
* @param next
|
109
|
* @return
|
110
|
*/
|
111
|
private Reference makeReference(RisReferenceImportState state,
|
112
|
Map<RisReferenceTag, List<RisValue>> record) {
|
113
|
|
114
|
//type
|
115
|
ReferenceType type = makeReferenceType(state, record);
|
116
|
Reference ref = ReferenceFactory.newReference(type);
|
117
|
Reference inRef = null;
|
118
|
if (hasInRef(ref)){
|
119
|
ReferenceType inRefType =
|
120
|
type == ReferenceType.Article ? ReferenceType.Journal:
|
121
|
type == ReferenceType.BookSection ? ReferenceType.Book :
|
122
|
ReferenceType.Generic;
|
123
|
inRef = ReferenceFactory.newReference(inRefType);
|
124
|
ref.setInReference(inRef);
|
125
|
}
|
126
|
Reference higherRef = inRef == null ? ref : inRef;
|
127
|
|
128
|
|
129
|
//Title
|
130
|
RisValue t1 = getSingleValue(state, record, RisReferenceTag.T1);
|
131
|
RisValue ti = getSingleValue(state, record, RisReferenceTag.TI);
|
132
|
RisValue value = assertEqual(state, "title", t1, ti);
|
133
|
if (value != null){
|
134
|
ref.setTitle(value.value);
|
135
|
}
|
136
|
|
137
|
//Journal title
|
138
|
RisValue t2 = getSingleValue(state, record, RisReferenceTag.T2); //Secondary Title (journal title, if applicable)
|
139
|
|
140
|
if (higherRef.getType() == ReferenceType.Journal){
|
141
|
RisValue jf = getSingleValue(state, record, RisReferenceTag.JF); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
|
142
|
RisValue jo = getSingleValue(state, record, RisReferenceTag.JO); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
|
143
|
RisValue x = assertEqual(state, "Journal/Periodical name: full format", jf, jo);
|
144
|
x = assertEqual(state, "Journal title", t2, x);
|
145
|
if (x != null){
|
146
|
higherRef.setTitle(x.value);
|
147
|
}
|
148
|
}else{
|
149
|
//TODO
|
150
|
}
|
151
|
|
152
|
//ST (remove as same as TI or T1), not handled otherwise
|
153
|
RisValue st = getSingleValue(state, record, RisReferenceTag.ST, false); //Short title
|
154
|
if (st != null && st.value.equals(ref.getTitle())){
|
155
|
record.remove(RisReferenceTag.ST);
|
156
|
}
|
157
|
|
158
|
//Author
|
159
|
List<RisValue> list = getListValue(record, RisReferenceTag.AU);
|
160
|
if (!list.isEmpty()){
|
161
|
TeamOrPersonBase<?> author = makeAuthor(state, list);
|
162
|
ref.setAuthorship(author);
|
163
|
}
|
164
|
|
165
|
//Date
|
166
|
// RisValue y1 = getSingleValue(state, record, RisReferenceTag.Y1); //Primary Date
|
167
|
RisValue py = getSingleValue(state, record, RisReferenceTag.PY);
|
168
|
RisValue da = getSingleValue(state, record, RisReferenceTag.DA);
|
169
|
Integer year = makeYear(state, py);
|
170
|
VerbatimTimePeriod date = makeDate(state, da);
|
171
|
assertDateYear(state, year, date, py);
|
172
|
ref.setDatePublished(date);
|
173
|
//TODO y1 not yet handled
|
174
|
|
175
|
//Note
|
176
|
RisValue n1 = getSingleValue(state, record, RisReferenceTag.N1); //Note
|
177
|
if (n1 != null){
|
178
|
Annotation annotation = Annotation.NewInstance(n1.value, AnnotationType.EDITORIAL(), Language.DEFAULT());
|
179
|
ref.addAnnotation(annotation);
|
180
|
}
|
181
|
|
182
|
//DOI
|
183
|
RisValue doiVal = getSingleValue(state, record, RisReferenceTag.DO); //Doi
|
184
|
if (doiVal != null){
|
185
|
DOI doi;
|
186
|
try {
|
187
|
String doiStr = doiVal.value;
|
188
|
if (doiStr.toLowerCase().startsWith("doi ")){
|
189
|
doiStr = doiStr.substring(4).trim();
|
190
|
}
|
191
|
doi = DOI.fromString(doiStr);
|
192
|
ref.setDoi(doi);
|
193
|
} catch (IllegalArgumentException e) {
|
194
|
String message = "DOI could not be recognized: " + doiVal.value;
|
195
|
state.getResult().addWarning(message, null, doiVal.location);
|
196
|
}
|
197
|
}
|
198
|
|
199
|
//UR
|
200
|
RisValue ur = getSingleValue(state, record, RisReferenceTag.UR); //URL
|
201
|
if (ur != null){
|
202
|
URI uri;
|
203
|
try {
|
204
|
String urStr = ur.value;
|
205
|
uri = URI.create(urStr);
|
206
|
ref.setUri(uri);
|
207
|
} catch (Exception e) {
|
208
|
String message = "URL could not be recognized: " + ur.value;
|
209
|
state.getResult().addWarning(message, null, ur.location);
|
210
|
}
|
211
|
}
|
212
|
|
213
|
//Pages
|
214
|
RisValue sp = getSingleValue(state, record, RisReferenceTag.SP);
|
215
|
RisValue ep = getSingleValue(state, record, RisReferenceTag.EP);
|
216
|
String pages = CdmUtils.concat("-", sp != null ? sp.value : null, ep != null ? ep.value : null);
|
217
|
ref.setPages(pages);
|
218
|
|
219
|
//Volume
|
220
|
RisValue vl = getSingleValue(state, record, RisReferenceTag.VL);
|
221
|
RisValue is = getSingleValue(state, record, RisReferenceTag.IS);
|
222
|
String vol = vl == null? "": vl.value + (is != null ? "("+ is.value + ")": "");
|
223
|
ref.setVolume(vol);
|
224
|
|
225
|
//Publisher
|
226
|
RisValue pb = getSingleValue(state, record, RisReferenceTag.PB);
|
227
|
if (pb != null){
|
228
|
higherRef.setPublisher(pb.value);
|
229
|
}
|
230
|
|
231
|
//Abstract
|
232
|
RisValue ab = getSingleValue(state, record, RisReferenceTag.AB);
|
233
|
RisValue n2 = getSingleValue(state, record, RisReferenceTag.N2);
|
234
|
RisValue abst = assertEqual(state, "Abstract", ab, n2);
|
235
|
if (abst != null){
|
236
|
ref.setReferenceAbstract(abst.value);
|
237
|
}
|
238
|
|
239
|
//ISSN/ISBN
|
240
|
RisValue sn = getSingleValue(state, record, RisReferenceTag.SN);
|
241
|
if (sn != null){
|
242
|
if (higherRef.getType() == ReferenceType.Journal){
|
243
|
higherRef.setIssn(sn.value);
|
244
|
}else{
|
245
|
higherRef.setIsbn(sn.value);
|
246
|
}
|
247
|
}
|
248
|
|
249
|
//ID
|
250
|
RisValue id = getSingleValue(state, record, RisReferenceTag.ID);
|
251
|
String idStr = id != null? id.value: null;
|
252
|
String recLoc = recordLocation(state, record);
|
253
|
ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
|
254
|
if (inRef != null){
|
255
|
ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
|
256
|
|
257
|
}
|
258
|
|
259
|
//remove
|
260
|
record.remove(RisReferenceTag.ER);
|
261
|
record.remove(RisReferenceTag.TY);
|
262
|
|
263
|
for (RisReferenceTag tag : record.keySet()){
|
264
|
// String message = "RIS Tag " + tag.name() + " not yet handled";
|
265
|
// state.getResult().addWarning(message, record.get(tag).get(0).location);
|
266
|
state.addUnhandled(tag);
|
267
|
|
268
|
//TODO add as annotation or extension
|
269
|
}
|
270
|
|
271
|
return ref;
|
272
|
}
|
273
|
|
274
|
/**
|
275
|
* @param ref
|
276
|
* @return
|
277
|
*/
|
278
|
private boolean hasInRef(Reference ref) {
|
279
|
return ref.getType() == ReferenceType.BookSection || ref.getType() == ReferenceType.Article ;
|
280
|
}
|
281
|
|
282
|
|
283
|
/**
|
284
|
* @param state
|
285
|
* @param record
|
286
|
* @return
|
287
|
*/
|
288
|
private String recordLocation(RisReferenceImportState state,
|
289
|
Map<RisReferenceTag, List<RisValue>> record) {
|
290
|
RisValue typeTag = this.getSingleValue(state, record, RisReferenceTag.TY, false);
|
291
|
RisValue erTag = this.getSingleValue(state, record, RisReferenceTag.ER, false);
|
292
|
|
293
|
String start = typeTag == null ? "??" : typeTag.location;
|
294
|
String end = erTag == null ? "??" : erTag.location;
|
295
|
|
296
|
String result = "line " + CdmUtils.concat(" - ", start, end);
|
297
|
|
298
|
return result;
|
299
|
}
|
300
|
|
301
|
/**
|
302
|
* @param state
|
303
|
* @param year
|
304
|
* @param date
|
305
|
*/
|
306
|
private void assertDateYear(RisReferenceImportState state, Integer year, TimePeriod date, RisValue py) {
|
307
|
if (year != null && date != null && !year.equals(date.getStartYear())){
|
308
|
String message = "Year 'PY' and date 'DA' are not consistent. PY is neglected.";
|
309
|
state.getResult().addWarning(message, null, py.location);
|
310
|
}
|
311
|
}
|
312
|
|
313
|
private RisValue assertEqual(RisReferenceImportState state, String meaning, RisValue val1, RisValue val2) {
|
314
|
if (val1 != null && val2 != null && !val1.value.equals(val2.value)){
|
315
|
String message = "The tags '%s' and '%s' are not equal but have a similar meaning ('%s'). "
|
316
|
+ "%s was used and %s neglected";
|
317
|
message = String.format(message, val1.tag.name(), val2.tag.name(), meaning , val1.tag.name(), val2.tag.name());
|
318
|
state.getResult().addWarning(message, null, val1.location);
|
319
|
}
|
320
|
return val1 != null ? val1 : val2;
|
321
|
}
|
322
|
|
323
|
/**
|
324
|
* @param state
|
325
|
* @param da
|
326
|
* @return
|
327
|
*/
|
328
|
private VerbatimTimePeriod makeDate(RisReferenceImportState state, RisValue da) {
|
329
|
if (da == null){
|
330
|
return null;
|
331
|
}
|
332
|
if (! da.value.matches("([0-9]{4})?(\\/([0-9]{2})?(\\/([0-9]{2})?(\\/.*)?)?)?")){
|
333
|
String message = "Tag '%s' has incorrect format. Only exactly 'dddd/dd/dd/any text' is allowed (where d is a digit), but was '%s'";
|
334
|
message = String.format(message, da.tag.name(), da.value);
|
335
|
state.getResult().addWarning(message, null, da.location);
|
336
|
return null;
|
337
|
}
|
338
|
String[] split = da.value.split("/");
|
339
|
VerbatimTimePeriod tp = VerbatimTimePeriod.NewVerbatimInstance();
|
340
|
if (split.length > 0 && isNotBlank(split[0])){
|
341
|
tp.setStartYear(Integer.valueOf(split[0]));
|
342
|
}
|
343
|
if (split.length > 1 && isNotBlank(split[1])){
|
344
|
tp.setStartMonth(Integer.valueOf(split[1]));
|
345
|
}
|
346
|
if (split.length > 2 && isNotBlank(split[2])){
|
347
|
tp.setStartDay(Integer.valueOf(split[2]));
|
348
|
}
|
349
|
if (split.length > 3 && isNotBlank(split[3])){
|
350
|
List<String> other = Arrays.asList(split).subList(3, split.length);
|
351
|
String otherStr = CdmUtils.concat("/", other.toArray(new String[other.size()]));
|
352
|
tp.setFreeText(tp.toString() + " " + otherStr);
|
353
|
}
|
354
|
return tp;
|
355
|
}
|
356
|
|
357
|
/**
|
358
|
* @param state
|
359
|
* @param py
|
360
|
* @return
|
361
|
*/
|
362
|
private Integer makeYear(RisReferenceImportState state, RisValue py) {
|
363
|
if (py == null){
|
364
|
return null;
|
365
|
}
|
366
|
if (py.value.matches("[0-9]{4}")){
|
367
|
return Integer.valueOf(py.value);
|
368
|
}else{
|
369
|
String message = "Tag '%s' has incorrect format. Only exactly 4 digits are allowed, but was '%s'";
|
370
|
message = String.format(message, py.tag.name(), py.value);
|
371
|
state.getResult().addWarning(message, null, py.location);
|
372
|
return null;
|
373
|
}
|
374
|
}
|
375
|
|
376
|
/**
|
377
|
* @param state
|
378
|
* @param list
|
379
|
* @return
|
380
|
*/
|
381
|
private TeamOrPersonBase<?> makeAuthor(RisReferenceImportState state, List<RisValue> list) {
|
382
|
if (list.size() == 1){
|
383
|
return makePerson(state, list.get(0));
|
384
|
}else{
|
385
|
Team team = Team.NewInstance();
|
386
|
for (RisValue value : list){
|
387
|
team.addTeamMember(makePerson(state, value));
|
388
|
}
|
389
|
return team;
|
390
|
}
|
391
|
}
|
392
|
|
393
|
/**
|
394
|
* @param state
|
395
|
* @param risValue
|
396
|
* @return
|
397
|
*/
|
398
|
private Person makePerson(RisReferenceImportState state, RisValue risValue) {
|
399
|
Person person = Person.NewInstance();
|
400
|
String[] split = risValue.value.split(",");
|
401
|
if (split.length >= 1){
|
402
|
person.setFamilyName(split[0].trim());
|
403
|
}
|
404
|
if (split.length >= 2){
|
405
|
person.setGivenName(split[1].trim());
|
406
|
}
|
407
|
if (split.length >= 3){
|
408
|
person.setSuffix(split[2].trim());
|
409
|
}
|
410
|
|
411
|
return person;
|
412
|
}
|
413
|
|
414
|
/**
|
415
|
* Returns the single value for the given tag
|
416
|
* and removes the tag from the record.
|
417
|
* If more than 1 value exists this is logged
|
418
|
* as a warning.
|
419
|
*/
|
420
|
private RisValue getSingleValue(RisReferenceImportState state,
|
421
|
Map<RisReferenceTag, List<RisValue>> record,
|
422
|
RisReferenceTag tag) {
|
423
|
return getSingleValue(state, record, tag, true);
|
424
|
}
|
425
|
|
426
|
/**
|
427
|
* Returns the single value for the given tag
|
428
|
* and removes the tag from the record.
|
429
|
* If more than 1 value exists this is logged
|
430
|
* as a warning.
|
431
|
*/
|
432
|
private RisValue getSingleValue(RisReferenceImportState state,
|
433
|
Map<RisReferenceTag, List<RisValue>> record,
|
434
|
RisReferenceTag tag, boolean remove) {
|
435
|
List<RisValue> list = record.get(tag);
|
436
|
if (list == null){
|
437
|
return null;
|
438
|
}
|
439
|
assertSingle(state, list, tag);
|
440
|
if (remove){
|
441
|
record.remove(tag);
|
442
|
}
|
443
|
return list.get(0);
|
444
|
}
|
445
|
|
446
|
private List<RisValue> getListValue(Map<RisReferenceTag, List<RisValue>> record,
|
447
|
RisReferenceTag tag) {
|
448
|
List<RisValue> list = record.get(tag);
|
449
|
record.remove(tag);
|
450
|
if (list == null){
|
451
|
list = new ArrayList<>();
|
452
|
}
|
453
|
return list;
|
454
|
}
|
455
|
|
456
|
/**
|
457
|
* @param state
|
458
|
* @param list
|
459
|
* @param tag
|
460
|
*/
|
461
|
private void assertSingle(RisReferenceImportState state, List<RisValue> list, RisReferenceTag tag) {
|
462
|
if (list.size() > 1){
|
463
|
String message = "There is more than 1 tag '%s' but only 1 tag is supported by RIS format or"
|
464
|
+ " by the current import implementation.";
|
465
|
message = String.format(message, tag.name());
|
466
|
state.getResult().addWarning(message, null, list.get(0).location + "ff");
|
467
|
}else if (list.isEmpty()){
|
468
|
state.getResult().addError("A tag list was empty. This should not happen and is a programming code error");
|
469
|
}
|
470
|
}
|
471
|
|
472
|
/**
|
473
|
* @param state
|
474
|
* @param next
|
475
|
* @return
|
476
|
*/
|
477
|
private ReferenceType makeReferenceType(RisReferenceImportState state,
|
478
|
Map<RisReferenceTag, List<RisValue>> record) {
|
479
|
RisReferenceTag tyTag = RisReferenceTag.TY;
|
480
|
RisValue value = this.getSingleValue(state, record, tyTag, false);
|
481
|
String typeStr = value.value;
|
482
|
RisRecordType type = RisRecordType.valueOf(typeStr);
|
483
|
ReferenceType cdmType = type.getCdmReferenceType();
|
484
|
return cdmType;
|
485
|
}
|
486
|
|
487
|
/**
|
488
|
* {@inheritDoc}
|
489
|
*/
|
490
|
@Override
|
491
|
protected boolean doCheck(RisReferenceImportState state) {
|
492
|
return true;
|
493
|
}
|
494
|
|
495
|
/**
|
496
|
* {@inheritDoc}
|
497
|
*/
|
498
|
@Override
|
499
|
protected boolean isIgnore(RisReferenceImportState state) {
|
500
|
return false;
|
501
|
}
|
502
|
}
|