1
|
/**
|
2
|
* Copyright (C) 2017 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.reference.ris.in;
|
10
|
|
11
|
import java.io.ByteArrayInputStream;
|
12
|
import java.io.InputStreamReader;
|
13
|
import java.net.URI;
|
14
|
import java.util.ArrayList;
|
15
|
import java.util.Arrays;
|
16
|
import java.util.HashSet;
|
17
|
import java.util.List;
|
18
|
import java.util.Map;
|
19
|
import java.util.Set;
|
20
|
|
21
|
import org.apache.log4j.Logger;
|
22
|
import org.springframework.stereotype.Component;
|
23
|
|
24
|
import eu.etaxonomy.cdm.common.CdmUtils;
|
25
|
import eu.etaxonomy.cdm.common.DOI;
|
26
|
import eu.etaxonomy.cdm.io.common.CdmImportBase;
|
27
|
import eu.etaxonomy.cdm.io.reference.ris.in.RisRecordReader.RisValue;
|
28
|
import eu.etaxonomy.cdm.model.agent.Person;
|
29
|
import eu.etaxonomy.cdm.model.agent.Team;
|
30
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
31
|
import eu.etaxonomy.cdm.model.common.Annotation;
|
32
|
import eu.etaxonomy.cdm.model.common.AnnotationType;
|
33
|
import eu.etaxonomy.cdm.model.common.Language;
|
34
|
import eu.etaxonomy.cdm.model.common.TimePeriod;
|
35
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
36
|
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
|
37
|
import eu.etaxonomy.cdm.model.reference.ReferenceType;
|
38
|
|
39
|
/**
|
40
|
* @author a.mueller
|
41
|
* @since 11.05.2017
|
42
|
*
|
43
|
*/
|
44
|
@Component
|
45
|
public class RisReferenceImport
|
46
|
extends CdmImportBase<RisReferenceImportConfigurator, RisReferenceImportState>{
|
47
|
|
48
|
private static final long serialVersionUID = 7022034669942979722L;
|
49
|
@SuppressWarnings("unused")
|
50
|
private static final Logger logger = Logger.getLogger(RisReferenceImport.class);
|
51
|
|
52
|
/**
|
53
|
* {@inheritDoc}
|
54
|
*/
|
55
|
@Override
|
56
|
protected void doInvoke(RisReferenceImportState state) {
|
57
|
RisReferenceImportConfigurator config = state.getConfig();
|
58
|
try {
|
59
|
// new FileReader(file)
|
60
|
byte[] data = config.getStream();
|
61
|
|
62
|
ByteArrayInputStream stream = new ByteArrayInputStream(data);
|
63
|
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
|
64
|
RisRecordReader risReader = new RisRecordReader(state, reader);
|
65
|
|
66
|
Set<Reference> referencesToSave = new HashSet<>();
|
67
|
|
68
|
|
69
|
Map<RisReferenceTag, List<RisValue>> next = risReader.readRecord();
|
70
|
while (next != RisRecordReader.EOF){
|
71
|
Reference ref;
|
72
|
String location = "";
|
73
|
try {
|
74
|
location = recordLocation(state, next);
|
75
|
ref = makeReference(state, next);
|
76
|
referencesToSave.add(ref);
|
77
|
if (ref.getInReference() != null){
|
78
|
referencesToSave.add(ref.getInReference());
|
79
|
}
|
80
|
} catch (Exception e) {
|
81
|
String message = "Unexpected exception during RIS Reference Import";
|
82
|
state.getResult().addException(e, message, location);
|
83
|
}
|
84
|
|
85
|
next = risReader.readRecord();
|
86
|
}
|
87
|
|
88
|
getReferenceService().saveOrUpdate(referencesToSave);
|
89
|
state.getResult().addNewRecords(Reference.class.getSimpleName(), referencesToSave.size());
|
90
|
|
91
|
} catch (Exception e) {
|
92
|
String message = "Unexpected exception during RIS Reference Import";
|
93
|
state.getResult().addException(e, message);
|
94
|
}
|
95
|
|
96
|
//unhandled
|
97
|
Map<RisReferenceTag, Integer> unhandled = state.getUnhandled();
|
98
|
for (RisReferenceTag tag : unhandled.keySet()){
|
99
|
String message = "RIS tag %s (%s) not yet handled. n = %d";
|
100
|
message = String .format(message, tag.name(), tag.getDescription(), unhandled.get(tag));
|
101
|
state.getResult().addWarning(message);
|
102
|
}
|
103
|
}
|
104
|
|
105
|
/**
|
106
|
* @param state
|
107
|
* @param next
|
108
|
* @return
|
109
|
*/
|
110
|
private Reference makeReference(RisReferenceImportState state,
|
111
|
Map<RisReferenceTag, List<RisValue>> record) {
|
112
|
|
113
|
//type
|
114
|
ReferenceType type = makeReferenceType(state, record);
|
115
|
Reference ref = ReferenceFactory.newReference(type);
|
116
|
Reference inRef = null;
|
117
|
if (hasInRef(ref)){
|
118
|
ReferenceType inRefType =
|
119
|
type == ReferenceType.Article ? ReferenceType.Journal:
|
120
|
type == ReferenceType.BookSection ? ReferenceType.Book :
|
121
|
ReferenceType.Generic;
|
122
|
inRef = ReferenceFactory.newReference(inRefType);
|
123
|
ref.setInReference(inRef);
|
124
|
}
|
125
|
Reference higherRef = inRef == null ? ref : inRef;
|
126
|
|
127
|
|
128
|
//Title
|
129
|
RisValue t1 = getSingleValue(state, record, RisReferenceTag.T1);
|
130
|
RisValue ti = getSingleValue(state, record, RisReferenceTag.TI);
|
131
|
RisValue value = assertEqual(state, "title", t1, ti);
|
132
|
if (value != null){
|
133
|
ref.setTitle(value.value);
|
134
|
}
|
135
|
|
136
|
//Journal title
|
137
|
RisValue t2 = getSingleValue(state, record, RisReferenceTag.T2); //Secondary Title (journal title, if applicable)
|
138
|
|
139
|
if (higherRef.getType() == ReferenceType.Journal){
|
140
|
RisValue jf = getSingleValue(state, record, RisReferenceTag.JF); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
|
141
|
RisValue jo = getSingleValue(state, record, RisReferenceTag.JO); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
|
142
|
RisValue x = assertEqual(state, "Journal/Periodical name: full format", jf, jo);
|
143
|
x = assertEqual(state, "Journal title", t2, x);
|
144
|
if (x != null){
|
145
|
higherRef.setTitle(x.value);
|
146
|
}
|
147
|
}else{
|
148
|
//TODO
|
149
|
}
|
150
|
|
151
|
//ST (remove as same as TI or T1), not handled otherwise
|
152
|
RisValue st = getSingleValue(state, record, RisReferenceTag.ST, false); //Short title
|
153
|
if (st != null && st.value.equals(ref.getTitle())){
|
154
|
record.remove(RisReferenceTag.ST);
|
155
|
}
|
156
|
|
157
|
//Author
|
158
|
List<RisValue> list = getListValue(record, RisReferenceTag.AU);
|
159
|
if (!list.isEmpty()){
|
160
|
TeamOrPersonBase<?> author = makeAuthor(state, list);
|
161
|
ref.setAuthorship(author);
|
162
|
}
|
163
|
|
164
|
//Date
|
165
|
// RisValue y1 = getSingleValue(state, record, RisReferenceTag.Y1); //Primary Date
|
166
|
RisValue py = getSingleValue(state, record, RisReferenceTag.PY);
|
167
|
RisValue da = getSingleValue(state, record, RisReferenceTag.DA);
|
168
|
Integer year = makeYear(state, py);
|
169
|
TimePeriod date = makeDate(state, da);
|
170
|
assertDateYear(state, year, date, py);
|
171
|
ref.setDatePublished(date);
|
172
|
//TODO y1 not yet handled
|
173
|
|
174
|
//Note
|
175
|
RisValue n1 = getSingleValue(state, record, RisReferenceTag.N1); //Note
|
176
|
if (n1 != null){
|
177
|
Annotation annotation = Annotation.NewInstance(n1.value, AnnotationType.EDITORIAL(), Language.DEFAULT());
|
178
|
ref.addAnnotation(annotation);
|
179
|
}
|
180
|
|
181
|
//DOI
|
182
|
RisValue doiVal = getSingleValue(state, record, RisReferenceTag.DO); //Doi
|
183
|
if (doiVal != null){
|
184
|
DOI doi;
|
185
|
try {
|
186
|
String doiStr = doiVal.value;
|
187
|
if (doiStr.toLowerCase().startsWith("doi ")){
|
188
|
doiStr = doiStr.substring(4).trim();
|
189
|
}
|
190
|
doi = DOI.fromString(doiStr);
|
191
|
ref.setDoi(doi);
|
192
|
} catch (IllegalArgumentException e) {
|
193
|
String message = "DOI could not be recognized: " + doiVal.value;
|
194
|
state.getResult().addWarning(message, null, doiVal.location);
|
195
|
}
|
196
|
}
|
197
|
|
198
|
//UR
|
199
|
RisValue ur = getSingleValue(state, record, RisReferenceTag.UR); //URL
|
200
|
if (ur != null){
|
201
|
URI uri;
|
202
|
try {
|
203
|
String urStr = ur.value;
|
204
|
uri = URI.create(urStr);
|
205
|
ref.setUri(uri);
|
206
|
} catch (Exception e) {
|
207
|
String message = "URL could not be recognized: " + ur.value;
|
208
|
state.getResult().addWarning(message, null, ur.location);
|
209
|
}
|
210
|
}
|
211
|
|
212
|
//Pages
|
213
|
RisValue sp = getSingleValue(state, record, RisReferenceTag.SP);
|
214
|
RisValue ep = getSingleValue(state, record, RisReferenceTag.EP);
|
215
|
String pages = CdmUtils.concat("-", sp != null ? sp.value : null, ep != null ? ep.value : null);
|
216
|
ref.setPages(pages);
|
217
|
|
218
|
//Volume
|
219
|
RisValue vl = getSingleValue(state, record, RisReferenceTag.VL);
|
220
|
RisValue is = getSingleValue(state, record, RisReferenceTag.IS);
|
221
|
String vol = vl == null? "": vl.value + (is != null ? "("+ is.value + ")": "");
|
222
|
ref.setVolume(vol);
|
223
|
|
224
|
//Publisher
|
225
|
RisValue pb = getSingleValue(state, record, RisReferenceTag.PB);
|
226
|
if (pb != null){
|
227
|
higherRef.setPublisher(pb.value);
|
228
|
}
|
229
|
|
230
|
//Abstract
|
231
|
RisValue ab = getSingleValue(state, record, RisReferenceTag.AB);
|
232
|
RisValue n2 = getSingleValue(state, record, RisReferenceTag.N2);
|
233
|
RisValue abst = assertEqual(state, "Abstract", ab, n2);
|
234
|
if (abst != null){
|
235
|
ref.setReferenceAbstract(abst.value);
|
236
|
}
|
237
|
|
238
|
//ISSN/ISBN
|
239
|
RisValue sn = getSingleValue(state, record, RisReferenceTag.SN);
|
240
|
if (sn != null){
|
241
|
if (higherRef.getType() == ReferenceType.Journal){
|
242
|
higherRef.setIssn(sn.value);
|
243
|
}else{
|
244
|
higherRef.setIsbn(sn.value);
|
245
|
}
|
246
|
}
|
247
|
|
248
|
//ID
|
249
|
RisValue id = getSingleValue(state, record, RisReferenceTag.ID);
|
250
|
String idStr = id != null? id.value: null;
|
251
|
String recLoc = recordLocation(state, record);
|
252
|
ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
|
253
|
if (inRef != null){
|
254
|
ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
|
255
|
|
256
|
}
|
257
|
|
258
|
//remove
|
259
|
record.remove(RisReferenceTag.ER);
|
260
|
record.remove(RisReferenceTag.TY);
|
261
|
|
262
|
for (RisReferenceTag tag : record.keySet()){
|
263
|
// String message = "RIS Tag " + tag.name() + " not yet handled";
|
264
|
// state.getResult().addWarning(message, record.get(tag).get(0).location);
|
265
|
state.addUnhandled(tag);
|
266
|
|
267
|
//TODO add as annotation or extension
|
268
|
}
|
269
|
|
270
|
return ref;
|
271
|
}
|
272
|
|
273
|
/**
|
274
|
* @param ref
|
275
|
* @return
|
276
|
*/
|
277
|
private boolean hasInRef(Reference ref) {
|
278
|
return ref.getType() == ReferenceType.BookSection || ref.getType() == ReferenceType.Article ;
|
279
|
}
|
280
|
|
281
|
|
282
|
/**
|
283
|
* @param state
|
284
|
* @param record
|
285
|
* @return
|
286
|
*/
|
287
|
private String recordLocation(RisReferenceImportState state,
|
288
|
Map<RisReferenceTag, List<RisValue>> record) {
|
289
|
RisValue typeTag = this.getSingleValue(state, record, RisReferenceTag.TY, false);
|
290
|
RisValue erTag = this.getSingleValue(state, record, RisReferenceTag.ER, false);
|
291
|
|
292
|
String start = typeTag == null ? "??" : typeTag.location;
|
293
|
String end = erTag == null ? "??" : erTag.location;
|
294
|
|
295
|
String result = "line " + CdmUtils.concat(" - ", start, end);
|
296
|
|
297
|
return result;
|
298
|
}
|
299
|
|
300
|
/**
|
301
|
* @param state
|
302
|
* @param year
|
303
|
* @param date
|
304
|
*/
|
305
|
private void assertDateYear(RisReferenceImportState state, Integer year, TimePeriod date, RisValue py) {
|
306
|
if (year != null && date != null && !year.equals(date.getStartYear())){
|
307
|
String message = "Year 'PY' and date 'DA' are not consistent. PY is neglected.";
|
308
|
state.getResult().addWarning(message, null, py.location);
|
309
|
}
|
310
|
}
|
311
|
|
312
|
private RisValue assertEqual(RisReferenceImportState state, String meaning, RisValue val1, RisValue val2) {
|
313
|
if (val1 != null && val2 != null && !val1.value.equals(val2.value)){
|
314
|
String message = "The tags '%s' and '%s' are not equal but have a similar meaning ('%s'). "
|
315
|
+ "%s was used and %s neglected";
|
316
|
message = String.format(message, val1.tag.name(), val2.tag.name(), meaning , val1.tag.name(), val2.tag.name());
|
317
|
state.getResult().addWarning(message, null, val1.location);
|
318
|
}
|
319
|
return val1 != null ? val1 : val2;
|
320
|
}
|
321
|
|
322
|
/**
|
323
|
* @param state
|
324
|
* @param da
|
325
|
* @return
|
326
|
*/
|
327
|
private TimePeriod makeDate(RisReferenceImportState state, RisValue da) {
|
328
|
if (da == null){
|
329
|
return null;
|
330
|
}
|
331
|
if (! da.value.matches("([0-9]{4})?(\\/([0-9]{2})?(\\/([0-9]{2})?(\\/.*)?)?)?")){
|
332
|
String message = "Tag '%s' has incorrect format. Only exactly 'dddd/dd/dd/any text' is allowed (where d is a digit), but was '%s'";
|
333
|
message = String.format(message, da.tag.name(), da.value);
|
334
|
state.getResult().addWarning(message, null, da.location);
|
335
|
return null;
|
336
|
}
|
337
|
String[] split = da.value.split("/");
|
338
|
TimePeriod tp = TimePeriod.NewInstance();
|
339
|
if (split.length > 0 && isNotBlank(split[0])){
|
340
|
tp.setStartYear(Integer.valueOf(split[0]));
|
341
|
}
|
342
|
if (split.length > 1 && isNotBlank(split[1])){
|
343
|
tp.setStartMonth(Integer.valueOf(split[1]));
|
344
|
}
|
345
|
if (split.length > 2 && isNotBlank(split[2])){
|
346
|
tp.setStartDay(Integer.valueOf(split[2]));
|
347
|
}
|
348
|
if (split.length > 3 && isNotBlank(split[3])){
|
349
|
List<String> other = Arrays.asList(split).subList(3, split.length);
|
350
|
String otherStr = CdmUtils.concat("/", other.toArray(new String[other.size()]));
|
351
|
tp.setFreeText(tp.toString() + " " + otherStr);
|
352
|
}
|
353
|
return tp;
|
354
|
}
|
355
|
|
356
|
/**
|
357
|
* @param state
|
358
|
* @param py
|
359
|
* @return
|
360
|
*/
|
361
|
private Integer makeYear(RisReferenceImportState state, RisValue py) {
|
362
|
if (py == null){
|
363
|
return null;
|
364
|
}
|
365
|
if (py.value.matches("[0-9]{4}")){
|
366
|
return Integer.valueOf(py.value);
|
367
|
}else{
|
368
|
String message = "Tag '%s' has incorrect format. Only exactly 4 digits are allowed, but was '%s'";
|
369
|
message = String.format(message, py.tag.name(), py.value);
|
370
|
state.getResult().addWarning(message, null, py.location);
|
371
|
return null;
|
372
|
}
|
373
|
}
|
374
|
|
375
|
/**
|
376
|
* @param state
|
377
|
* @param list
|
378
|
* @return
|
379
|
*/
|
380
|
private TeamOrPersonBase<?> makeAuthor(RisReferenceImportState state, List<RisValue> list) {
|
381
|
if (list.size() == 1){
|
382
|
return makePerson(state, list.get(0));
|
383
|
}else{
|
384
|
Team team = Team.NewInstance();
|
385
|
for (RisValue value : list){
|
386
|
team.addTeamMember(makePerson(state, value));
|
387
|
}
|
388
|
return team;
|
389
|
}
|
390
|
}
|
391
|
|
392
|
/**
|
393
|
* @param state
|
394
|
* @param risValue
|
395
|
* @return
|
396
|
*/
|
397
|
private Person makePerson(RisReferenceImportState state, RisValue risValue) {
|
398
|
Person person = Person.NewInstance();
|
399
|
String[] split = risValue.value.split(",");
|
400
|
if (split.length >= 1){
|
401
|
person.setFamilyName(split[0].trim());
|
402
|
}
|
403
|
if (split.length >= 2){
|
404
|
person.setGivenName(split[1].trim());
|
405
|
}
|
406
|
if (split.length >= 3){
|
407
|
person.setSuffix(split[2].trim());
|
408
|
}
|
409
|
|
410
|
return person;
|
411
|
}
|
412
|
|
413
|
/**
|
414
|
* Returns the single value for the given tag
|
415
|
* and removes the tag from the record.
|
416
|
* If more than 1 value exists this is logged
|
417
|
* as a warning.
|
418
|
*/
|
419
|
private RisValue getSingleValue(RisReferenceImportState state,
|
420
|
Map<RisReferenceTag, List<RisValue>> record,
|
421
|
RisReferenceTag tag) {
|
422
|
return getSingleValue(state, record, tag, true);
|
423
|
}
|
424
|
|
425
|
/**
|
426
|
* Returns the single value for the given tag
|
427
|
* and removes the tag from the record.
|
428
|
* If more than 1 value exists this is logged
|
429
|
* as a warning.
|
430
|
*/
|
431
|
private RisValue getSingleValue(RisReferenceImportState state,
|
432
|
Map<RisReferenceTag, List<RisValue>> record,
|
433
|
RisReferenceTag tag, boolean remove) {
|
434
|
List<RisValue> list = record.get(tag);
|
435
|
if (list == null){
|
436
|
return null;
|
437
|
}
|
438
|
assertSingle(state, list, tag);
|
439
|
if (remove){
|
440
|
record.remove(tag);
|
441
|
}
|
442
|
return list.get(0);
|
443
|
}
|
444
|
|
445
|
private List<RisValue> getListValue(Map<RisReferenceTag, List<RisValue>> record,
|
446
|
RisReferenceTag tag) {
|
447
|
List<RisValue> list = record.get(tag);
|
448
|
record.remove(tag);
|
449
|
if (list == null){
|
450
|
list = new ArrayList<>();
|
451
|
}
|
452
|
return list;
|
453
|
}
|
454
|
|
455
|
/**
|
456
|
* @param state
|
457
|
* @param list
|
458
|
* @param tag
|
459
|
*/
|
460
|
private void assertSingle(RisReferenceImportState state, List<RisValue> list, RisReferenceTag tag) {
|
461
|
if (list.size() > 1){
|
462
|
String message = "There is more than 1 tag '%s' but only 1 tag is supported by RIS format or"
|
463
|
+ " by the current import implementation.";
|
464
|
message = String.format(message, tag.name());
|
465
|
state.getResult().addWarning(message, null, list.get(0).location + "ff");
|
466
|
}else if (list.isEmpty()){
|
467
|
state.getResult().addError("A tag list was empty. This should not happen and is a programming code error");
|
468
|
}
|
469
|
}
|
470
|
|
471
|
/**
|
472
|
* @param state
|
473
|
* @param next
|
474
|
* @return
|
475
|
*/
|
476
|
private ReferenceType makeReferenceType(RisReferenceImportState state,
|
477
|
Map<RisReferenceTag, List<RisValue>> record) {
|
478
|
RisReferenceTag tyTag = RisReferenceTag.TY;
|
479
|
RisValue value = this.getSingleValue(state, record, tyTag, false);
|
480
|
String typeStr = value.value;
|
481
|
RisRecordType type = RisRecordType.valueOf(typeStr);
|
482
|
ReferenceType cdmType = type.getCdmReferenceType();
|
483
|
return cdmType;
|
484
|
}
|
485
|
|
486
|
/**
|
487
|
* {@inheritDoc}
|
488
|
*/
|
489
|
@Override
|
490
|
protected boolean doCheck(RisReferenceImportState state) {
|
491
|
return true;
|
492
|
}
|
493
|
|
494
|
/**
|
495
|
* {@inheritDoc}
|
496
|
*/
|
497
|
@Override
|
498
|
protected boolean isIgnore(RisReferenceImportState state) {
|
499
|
return false;
|
500
|
}
|
501
|
}
|