2 * Copyright (C) 2017 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.io
.reference
.ris
.in
;
11 import java
.io
.InputStreamReader
;
13 import java
.util
.ArrayList
;
14 import java
.util
.Arrays
;
15 import java
.util
.HashSet
;
16 import java
.util
.List
;
20 import org
.apache
.log4j
.Logger
;
21 import org
.springframework
.stereotype
.Component
;
23 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
24 import eu
.etaxonomy
.cdm
.common
.DOI
;
25 import eu
.etaxonomy
.cdm
.io
.common
.CdmImportBase
;
26 import eu
.etaxonomy
.cdm
.io
.reference
.ris
.in
.RisRecordReader
.RisValue
;
27 import eu
.etaxonomy
.cdm
.model
.agent
.Person
;
28 import eu
.etaxonomy
.cdm
.model
.agent
.Team
;
29 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
30 import eu
.etaxonomy
.cdm
.model
.common
.Annotation
;
31 import eu
.etaxonomy
.cdm
.model
.common
.AnnotationType
;
32 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
33 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
34 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
35 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
36 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceType
;
44 public class RisReferenceImport
45 extends CdmImportBase
<RisReferenceImportConfigurator
, RisReferenceImportState
>{
47 private static final long serialVersionUID
= 7022034669942979722L;
48 @SuppressWarnings("unused")
49 private static final Logger logger
= Logger
.getLogger(RisReferenceImport
.class);
55 protected void doInvoke(RisReferenceImportState state
) {
56 RisReferenceImportConfigurator config
= state
.getConfig();
58 // new FileReader(file)
59 InputStreamReader inputReader
= config
.getSource();
60 RisRecordReader risReader
= new RisRecordReader(state
, inputReader
);
62 Set
<Reference
> referencesToSave
= new HashSet
<>();
65 Map
<RisReferenceTag
, List
<RisValue
>> next
= risReader
.readRecord();
66 while (next
!= RisRecordReader
.EOF
){
70 location
= recordLocation(state
, next
);
71 ref
= makeReference(state
, next
);
72 referencesToSave
.add(ref
);
73 if (ref
.getInReference() != null){
74 referencesToSave
.add(ref
.getInReference());
76 } catch (Exception e
) {
77 String message
= "Unexpected exception during RIS Reference Import";
78 state
.getResult().addException(e
, message
, location
);
81 next
= risReader
.readRecord();
84 getReferenceService().saveOrUpdate(referencesToSave
);
85 state
.getResult().addNewRecords(Reference
.class.getSimpleName(), referencesToSave
.size());
87 } catch (Exception e
) {
88 String message
= "Unexpected exception during RIS Reference Import";
89 state
.getResult().addException(e
, message
);
93 Map
<RisReferenceTag
, Integer
> unhandled
= state
.getUnhandled();
94 for (RisReferenceTag tag
: unhandled
.keySet()){
95 String message
= "RIS tag %s (%s) not yet handled. n = %d";
96 message
= String
.format(message
, tag
.name(), tag
.getDescription(), unhandled
.get(tag
));
97 state
.getResult().addWarning(message
);
106 private Reference
makeReference(RisReferenceImportState state
,
107 Map
<RisReferenceTag
, List
<RisValue
>> record
) {
110 ReferenceType type
= makeReferenceType(state
, record
);
111 Reference ref
= ReferenceFactory
.newReference(type
);
112 Reference inRef
= null;
114 ReferenceType inRefType
=
115 type
== ReferenceType
.Article ? ReferenceType
.Journal
:
116 type
== ReferenceType
.BookSection ? ReferenceType
.Book
:
117 ReferenceType
.Generic
;
118 inRef
= ReferenceFactory
.newReference(inRefType
);
119 ref
.setInReference(inRef
);
121 Reference higherRef
= inRef
== null ? ref
: inRef
;
125 RisValue t1
= getSingleValue(state
, record
, RisReferenceTag
.T1
);
126 RisValue ti
= getSingleValue(state
, record
, RisReferenceTag
.TI
);
127 RisValue value
= assertEqual(state
, "title", t1
, ti
);
129 ref
.setTitle(value
.value
);
133 RisValue t2
= getSingleValue(state
, record
, RisReferenceTag
.T2
); //Secondary Title (journal title, if applicable)
135 if (higherRef
.getType() == ReferenceType
.Journal
){
136 RisValue jf
= getSingleValue(state
, record
, RisReferenceTag
.JF
); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
137 RisValue jo
= getSingleValue(state
, record
, RisReferenceTag
.JO
); //Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters.
138 RisValue x
= assertEqual(state
, "Journal/Periodical name: full format", jf
, jo
);
139 x
= assertEqual(state
, "Journal title", t2
, x
);
141 higherRef
.setTitle(x
.value
);
147 //ST (remove as same as TI or T1), not handled otherwise
148 RisValue st
= getSingleValue(state
, record
, RisReferenceTag
.ST
, false); //Short title
149 if (st
!= null && st
.value
.equals(ref
.getTitle())){
150 record
.remove(RisReferenceTag
.ST
);
154 List
<RisValue
> list
= getListValue(record
, RisReferenceTag
.AU
);
155 if (!list
.isEmpty()){
156 TeamOrPersonBase
<?
> author
= makeAuthor(state
, list
);
157 ref
.setAuthorship(author
);
161 // RisValue y1 = getSingleValue(state, record, RisReferenceTag.Y1); //Primary Date
162 RisValue py
= getSingleValue(state
, record
, RisReferenceTag
.PY
);
163 RisValue da
= getSingleValue(state
, record
, RisReferenceTag
.DA
);
164 Integer year
= makeYear(state
, py
);
165 TimePeriod date
= makeDate(state
, da
);
166 assertDateYear(state
, year
, date
, py
);
167 ref
.setDatePublished(date
);
168 //TODO y1 not yet handled
171 RisValue n1
= getSingleValue(state
, record
, RisReferenceTag
.N1
); //Note
173 Annotation annotation
= Annotation
.NewInstance(n1
.value
, AnnotationType
.EDITORIAL(), Language
.DEFAULT());
174 ref
.addAnnotation(annotation
);
178 RisValue doiVal
= getSingleValue(state
, record
, RisReferenceTag
.DO
); //Doi
182 String doiStr
= doiVal
.value
;
183 if (doiStr
.toLowerCase().startsWith("doi ")){
184 doiStr
= doiStr
.substring(4).trim();
186 doi
= DOI
.fromString(doiStr
);
188 } catch (IllegalArgumentException e
) {
189 String message
= "DOI could not be recognized: " + doiVal
.value
;
190 state
.getResult().addWarning(message
, doiVal
.location
);
195 RisValue ur
= getSingleValue(state
, record
, RisReferenceTag
.UR
); //URL
199 String urStr
= ur
.value
;
200 uri
= URI
.create(urStr
);
202 } catch (Exception e
) {
203 String message
= "URL could not be recognized: " + ur
.value
;
204 state
.getResult().addWarning(message
, ur
.location
);
209 RisValue sp
= getSingleValue(state
, record
, RisReferenceTag
.SP
);
210 RisValue ep
= getSingleValue(state
, record
, RisReferenceTag
.EP
);
211 String pages
= CdmUtils
.concat("-", sp
!= null ? sp
.value
: null, ep
!= null ? ep
.value
: null);
215 RisValue vl
= getSingleValue(state
, record
, RisReferenceTag
.VL
);
216 RisValue is
= getSingleValue(state
, record
, RisReferenceTag
.IS
);
217 String vol
= vl
== null?
"": vl
.value
+ (is
!= null ?
"("+ is
.value
+ ")": "");
221 RisValue pb
= getSingleValue(state
, record
, RisReferenceTag
.PB
);
223 higherRef
.setPublisher(pb
.value
);
227 RisValue ab
= getSingleValue(state
, record
, RisReferenceTag
.AB
);
228 RisValue n2
= getSingleValue(state
, record
, RisReferenceTag
.N2
);
229 RisValue abst
= assertEqual(state
, "Abstract", ab
, n2
);
231 ref
.setReferenceAbstract(abst
.value
);
235 RisValue sn
= getSingleValue(state
, record
, RisReferenceTag
.SN
);
237 if (higherRef
.getType() == ReferenceType
.Journal
){
238 higherRef
.setIssn(sn
.value
);
240 higherRef
.setIsbn(sn
.value
);
245 RisValue id
= getSingleValue(state
, record
, RisReferenceTag
.ID
);
246 String idStr
= id
!= null? id
.value
: null;
247 String recLoc
= recordLocation(state
, record
);
248 ref
.addImportSource(idStr
, null, state
.getConfig().getSourceReference(), recLoc
);
250 ref
.addImportSource(idStr
, null, state
.getConfig().getSourceReference(), recLoc
);
255 record
.remove(RisReferenceTag
.ER
);
256 record
.remove(RisReferenceTag
.TY
);
258 for (RisReferenceTag tag
: record
.keySet()){
259 // String message = "RIS Tag " + tag.name() + " not yet handled";
260 // state.getResult().addWarning(message, record.get(tag).get(0).location);
261 state
.addUnhandled(tag
);
263 //TODO add as annotation or extension
273 private boolean hasInRef(Reference ref
) {
274 return ref
.getType() == ReferenceType
.BookSection
|| ref
.getType() == ReferenceType
.Article
;
283 private String
recordLocation(RisReferenceImportState state
,
284 Map
<RisReferenceTag
, List
<RisValue
>> record
) {
285 RisValue typeTag
= this.getSingleValue(state
, record
, RisReferenceTag
.TY
, false);
286 RisValue erTag
= this.getSingleValue(state
, record
, RisReferenceTag
.ER
, false);
288 String start
= typeTag
== null ?
"??" : typeTag
.location
;
289 String end
= erTag
== null ?
"??" : erTag
.location
;
291 String result
= "line " + CdmUtils
.concat("-", start
, end
);
301 private void assertDateYear(RisReferenceImportState state
, Integer year
, TimePeriod date
, RisValue py
) {
302 if (year
!= null && date
!= null && !year
.equals(date
.getStartYear())){
303 String message
= "Year 'PY' and date 'DA' are not consistent. PY is neglected.";
304 state
.getResult().addWarning(message
, py
.location
);
308 private RisValue
assertEqual(RisReferenceImportState state
, String meaning
, RisValue val1
, RisValue val2
) {
309 if (val1
!= null && val2
!= null && !val1
.value
.equals(val2
.value
)){
310 String message
= "The tags '%s' and '%s' are not equal but have a similar meaning ('%s'). "
311 + "%s was used and %s neglected";
312 message
= String
.format(message
, val1
.tag
.name(), val2
.tag
.name(), meaning
, val1
.tag
.name(), val2
.tag
.name());
313 state
.getResult().addWarning(message
, val1
.location
);
315 return val1
!= null ? val1
: val2
;
323 private TimePeriod
makeDate(RisReferenceImportState state
, RisValue da
) {
327 if (! da
.value
.matches("([0-9]{4})?(\\/([0-9]{2})?(\\/([0-9]{2})?(\\/.*)?)?)?")){
328 String message
= "Tag '%s' has incorrect format. Only exactly 'dddd/dd/dd/any text' is allowed (where d is a digit), but was '%s'";
329 message
= String
.format(message
, da
.tag
.name(), da
.value
);
330 state
.getResult().addWarning(message
, da
.location
);
333 String
[] split
= da
.value
.split("/");
334 TimePeriod tp
= TimePeriod
.NewInstance();
335 if (split
.length
> 0 && isNotBlank(split
[0])){
336 tp
.setStartYear(Integer
.valueOf(split
[0]));
338 if (split
.length
> 1 && isNotBlank(split
[1])){
339 tp
.setStartMonth(Integer
.valueOf(split
[1]));
341 if (split
.length
> 2 && isNotBlank(split
[2])){
342 tp
.setStartDay(Integer
.valueOf(split
[2]));
344 if (split
.length
> 3 && isNotBlank(split
[3])){
345 List
<String
> other
= Arrays
.asList(split
).subList(3, split
.length
);
346 String otherStr
= CdmUtils
.concat("/", other
.toArray(new String
[other
.size()]));
347 tp
.setFreeText(tp
.toString() + " " + otherStr
);
357 private Integer
makeYear(RisReferenceImportState state
, RisValue py
) {
361 if (py
.value
.matches("[0-9]{4}")){
362 return Integer
.valueOf(py
.value
);
364 String message
= "Tag '%s' has incorrect format. Only exactly 4 digits are allowed, but was '%s'";
365 message
= String
.format(message
, py
.tag
.name(), py
.value
);
366 state
.getResult().addWarning(message
, py
.location
);
376 private TeamOrPersonBase
<?
> makeAuthor(RisReferenceImportState state
, List
<RisValue
> list
) {
377 if (list
.size() == 1){
378 return makePerson(state
, list
.get(0));
380 Team team
= Team
.NewInstance();
381 for (RisValue value
: list
){
382 team
.addTeamMember(makePerson(state
, value
));
393 private Person
makePerson(RisReferenceImportState state
, RisValue risValue
) {
394 Person person
= Person
.NewInstance();
395 String
[] split
= risValue
.value
.split(",");
396 if (split
.length
>= 1){
397 person
.setLastname(split
[0].trim());
399 if (split
.length
>= 2){
400 person
.setFirstname(split
[1].trim());
402 if (split
.length
>= 3){
403 person
.setSuffix(split
[2].trim());
410 * Returns the single value for the given tag
411 * and removes the tag from the record.
412 * If more than 1 value exists this is logged
415 private RisValue
getSingleValue(RisReferenceImportState state
,
416 Map
<RisReferenceTag
, List
<RisValue
>> record
,
417 RisReferenceTag tag
) {
418 return getSingleValue(state
, record
, tag
, true);
422 * Returns the single value for the given tag
423 * and removes the tag from the record.
424 * If more than 1 value exists this is logged
427 private RisValue
getSingleValue(RisReferenceImportState state
,
428 Map
<RisReferenceTag
, List
<RisValue
>> record
,
429 RisReferenceTag tag
, boolean remove
) {
430 List
<RisValue
> list
= record
.get(tag
);
434 assertSingle(state
, list
, tag
);
441 private List
<RisValue
> getListValue(Map
<RisReferenceTag
, List
<RisValue
>> record
,
442 RisReferenceTag tag
) {
443 List
<RisValue
> list
= record
.get(tag
);
446 list
= new ArrayList
<>();
456 private void assertSingle(RisReferenceImportState state
, List
<RisValue
> list
, RisReferenceTag tag
) {
457 if (list
.size() > 1){
458 String message
= "There is more than 1 tag '%s' but only 1 tag is supported by RIS format or"
459 + " by the current import implementation.";
460 message
= String
.format(message
, tag
.name());
461 state
.getResult().addWarning(message
, list
.get(0).location
+ "ff");
462 }else if (list
.isEmpty()){
463 state
.getResult().addError("A tag list was empty. This should not happen and is a programming code error");
472 private ReferenceType
makeReferenceType(RisReferenceImportState state
,
473 Map
<RisReferenceTag
, List
<RisValue
>> record
) {
474 RisReferenceTag tyTag
= RisReferenceTag
.TY
;
475 RisValue value
= this.getSingleValue(state
, record
, tyTag
, false);
476 String typeStr
= value
.value
;
477 RisRecordType type
= RisRecordType
.valueOf(typeStr
);
478 ReferenceType cdmType
= type
.getCdmReferenceType();
486 protected boolean doCheck(RisReferenceImportState state
) {
494 protected boolean isIgnore(RisReferenceImportState state
) {