Project

General

Profile

Revision 98ad8fb7

ID98ad8fb78d988f72e25844458fb1b133dec2378e
Parent b93248b0
Child 5a15b4d9

Added by Andreas Müller about 3 years ago

ref #6636 further improvements to RIS import

View differences:

cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/ImportResult.java
200 200
        }
201 201
    }
202 202

  
203

  
204 203
}
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/common/IoResultBase.java
84 84
        exceptions.add(new IoInfo(message, e));
85 85
        setExceptionState();
86 86
    }
87
    public void addException(Exception e, String message, String location) {
88
        exceptions.add(new IoInfo(message, e, location));
89
        setExceptionState();
90
    }
87 91

  
88 92
    protected abstract void setExceptionState();
89 93

  
......
136 140
                String message = ioInfo.message != null ? ioInfo.message : ioInfo.exception != null ? ioInfo.exception.getMessage() : "";
137 141
                message = StringUtils.isBlank(message)? "no message" : message;
138 142
                Object stacktrace = ioInfo.exception == null? null : ioInfo.exception.getStackTrace();
139
                String available = (stacktrace == null ? " not" : "");
140
                report.append("\n" + location + message + "(stacktrace" + available + " available)");
143
                String available = (stacktrace != null ? " (stacktrace available)" : "");
144
                report.append("\n" + location + message + available);
141 145
            }
142 146
        }
143 147
    }
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/reference/ris/in/RisRecordReader.java
78 78
                       addTaggedValue(type, result, line, lineNo);
79 79
                       count++;
80 80
                   }else if (isErLine(line)){
81
                       addTaggedValue(RisReferenceTag.ER, result, line, lineNo);
81 82
                       return result;
82 83
                   }else{
83 84
                       //TODO
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/reference/ris/in/RisReferenceImport.java
64 64
            Map<RisReferenceTag, List<RisValue>> next = risReader.readRecord();
65 65
            while (next != RisRecordReader.EOF){
66 66
                Reference ref;
67
                String location = "";
67 68
                try {
69
                    location = recordLocation(state, next);
68 70
                    ref = makeReference(state, next);
69 71
                    referencesToSave.add(ref);
70 72
                    if (ref.getInReference() != null){
......
72 74
                    }
73 75
                } catch (Exception e) {
74 76
                    String message = "Unexpected exception during RIS Reference Import";
75
                    state.getResult().addException(e, message);
77
                    state.getResult().addException(e, message, location);
76 78
                }
77 79

  
78 80
                next = risReader.readRecord();
......
85 87
            String message = "Unexpected exception during RIS Reference Import";
86 88
            state.getResult().addException(e, message);
87 89
        }
90

  
91
        //unhandled
92
        Map<RisReferenceTag, Integer> unhandled = state.getUnhandled();
93
        for (RisReferenceTag tag : unhandled.keySet()){
94
            String message = "RIS tag %s (%s) not yet handled. n = %d";
95
            message = String .format(message, tag.name(), tag.getDescription(), unhandled.get(tag));
96
            state.getResult().addWarning(message);
97
        }
88 98
    }
89 99

  
90 100
    /**
......
163 173
        if (doiVal != null){
164 174
            DOI doi;
165 175
            try {
166
                doi = DOI.fromString(doiVal.value);
176
                String doiStr = doiVal.value;
177
                if (doiStr.toLowerCase().startsWith("doi ")){
178
                    doiStr = doiStr.substring(4).trim();
179
                }
180
                doi = DOI.fromString(doiStr);
167 181
                ref.setDoi(doi);
168 182
            } catch (IllegalArgumentException e) {
169 183
                String message = "DOI could not be recognized: " + doiVal.value;
......
207 221
            }
208 222
        }
209 223

  
210
        //
224
        //ID
225
        RisValue id = getSingleValue(state, record, RisReferenceTag.ID);
226
        String idStr = id != null? id.value: null;
227
        String recLoc = recordLocation(state, record);
228
        ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
229
        if (inRef != null){
230
            ref.addImportSource(idStr, null, state.getConfig().getSourceReference(), recLoc);
231

  
232
        }
233

  
234
        //remove
235
        record.remove(RisReferenceTag.ER);
236
        record.remove(RisReferenceTag.TY);
211 237

  
212 238
        for (RisReferenceTag tag : record.keySet()){
213
            //TODO we may want to count them for the whole file and not mention each time when not available
214
            String message = "RIS Tag " + tag.name() +  " not yet handled";
215
            state.getResult().addWarning(message, record.get(tag).get(0).location);
239
//            String message = "RIS Tag " + tag.name() +  " not yet handled";
240
//            state.getResult().addWarning(message, record.get(tag).get(0).location);
241
            state.addUnhandled(tag);
216 242

  
217 243
            //TODO add as annotation or extension
218 244
        }
......
223 249

  
224 250
    /**
225 251
     * @param state
252
     * @param record
253
     * @return
254
     */
255
    private String recordLocation(RisReferenceImportState state,
256
            Map<RisReferenceTag, List<RisValue>> record) {
257
        RisValue typeTag = this.getSingleValue(state, record, RisReferenceTag.TY, false);
258
        RisValue erTag = this.getSingleValue(state, record, RisReferenceTag.ER, false);
259

  
260
        String start = typeTag == null ? "??" : typeTag.location;
261
        String end = erTag == null ? "??" : erTag.location;
262

  
263
        String result = "line " + CdmUtils.concat("-", start, end);
264

  
265
        return result;
266
    }
267

  
268
    /**
269
     * @param state
226 270
     * @param year
227 271
     * @param date
228 272
     */
......
343 387
    private RisValue getSingleValue(RisReferenceImportState state,
344 388
            Map<RisReferenceTag, List<RisValue>> record,
345 389
            RisReferenceTag tag) {
390
        return getSingleValue(state, record, tag, true);
391
    }
392

  
393
    /**
394
     * Returns the single value for the given tag
395
     * and removes the tag from the record.
396
     * If more than 1 value exists this is logged
397
     * as a warning.
398
     */
399
    private RisValue getSingleValue(RisReferenceImportState state,
400
            Map<RisReferenceTag, List<RisValue>> record,
401
            RisReferenceTag tag, boolean remove) {
346 402
        List<RisValue> list = record.get(tag);
347 403
        if (list == null){
348 404
            return null;
349 405
        }
350 406
        assertSingle(state, list, tag);
351
        record.remove(tag);
407
        if (remove){
408
            record.remove(tag);
409
        }
352 410
        return list.get(0);
353 411
    }
354 412

  
......
386 444
    private ReferenceType makeReferenceType(RisReferenceImportState state,
387 445
            Map<RisReferenceTag, List<RisValue>> record) {
388 446
        RisReferenceTag tyTag = RisReferenceTag.TY;
389
        RisValue value = this.getSingleValue(state, record, tyTag);
447
        RisValue value = this.getSingleValue(state, record, tyTag, false);
390 448
        String typeStr = value.value;
391 449
        RisRecordType type = RisRecordType.valueOf(typeStr);
392 450
        ReferenceType cdmType = type.getCdmReferenceType();
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/reference/ris/in/RisReferenceImportState.java
8 8
*/
9 9
package eu.etaxonomy.cdm.io.reference.ris.in;
10 10

  
11
import java.util.HashMap;
12
import java.util.Map;
13

  
11 14
import eu.etaxonomy.cdm.io.common.ImportStateBase;
12 15

  
13 16
/**
......
18 21
public class RisReferenceImportState
19 22
        extends ImportStateBase<RisReferenceImportConfigurator, RisReferenceImport>{
20 23

  
24
    private Map<RisReferenceTag, Integer> unhandled = new HashMap<>();
25

  
26

  
21 27
    /**
22 28
     * @param config
23 29
     */
......
25 31
        super(config);
26 32
    }
27 33

  
34
    public void addUnhandled(RisReferenceTag tag) {
35
        Integer x = unhandled.get(tag);
36
        unhandled.put(tag, x == null ? 1 : ++x);
37
    }
38

  
39
    public Map<RisReferenceTag, Integer> getUnhandled() {
40
        return unhandled;
41
    }
42

  
28 43
}
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/reference/ris/in/RisReferenceTag.java
16 16
public enum RisReferenceTag {
17 17
    TY("Type of reference"),
18 18
    A1("First Author"),
19
    A2("Secondary Author (each author on its own line preceded by the tag)"),
20
    A3("Tertiary Author (each author on its own line preceded by the tag)"),
21
    A4("Subsidiary Author (each author on its own line preceded by the tag)"),
19
    A2("Secondary Author","each author on its own line preceded by the tag"),
20
    A3("Tertiary Author","each author on its own line preceded by the tag"),
21
    A4("Subsidiary Author","each author on its own line preceded by the tag"),
22 22
    AB("Abstract"),
23 23
    AD("Author Address"),
24 24
    AN("Accession Number"),
25
    AU("Author (each author on its own line preceded by the tag)"),
25
    AU("Author","each author on its own line preceded by the tag"),
26 26
    AV("Location in Archives"),
27 27
    BT("This field can contain alphanumeric characters. There is no practical limit to the length of this field."),
28 28
    C1("Custom 1"),
......
47 47
    ET("Edition"),
48 48
    ID("Reference ID"),
49 49
    IS("Issue number"),
50
    J1("Periodical name: user abbreviation 1. This is an alphanumeric field of up to 255 characters."),
51
    J2("Alternate Title (this field is used for the abbreviated title of a book or journal name, the latter mapped to T2)"),
50
    J1("Periodical name: user abbreviation 1","This is an alphanumeric field of up to 255 characters."),
51
    J2("Alternate Title (abbreviated title of a book or journal name, the latter mapped to T2)"),
52 52
    JA("Periodical name: standard abbreviation. This is the periodical in which the article was (or is to be, in the case of in-press references) published. This is an alphanumeric field of up to 255 characters."),
53
    JF("Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters."),
54
    JO("Journal/Periodical name: full format. This is an alphanumeric field of up to 255 characters."),
55
    KW("Keywords (keywords should be entered each on its own line preceded by the tag)"),
56
    L1("Link to PDF. There is no practical limit to the length of this field. URL addresses can be entered individually, one per tag or multiple addresses can be entered on one line using a semi-colon as a separator."),
57
    L2("Link to Full-text. There is no practical limit to the length of this field. URL addresses can be entered individually, one per tag or multiple addresses can be entered on one line using a semi-colon as a separator."),
58
    L3("Related Records. There is no practical limit to the length of this field."),
59
    L4("Image(s). There is no practical limit to the length of this field."),
53
    JF("Journal/Periodical name: full format","This is an alphanumeric field of up to 255 characters."),
54
    JO("Journal/Periodical name: full format","This is an alphanumeric field of up to 255 characters."),
55
    KW("Keywords","(keywords should be entered each on its own line preceded by the tag)"),
56
    L1("Link to PDF"," There is no practical limit to the length of this field. URL addresses can be entered individually, one per tag or multiple addresses can be entered on one line using a semi-colon as a separator."),
57
    L2("Link to Full-text","There is no practical limit to the length of this field. URL addresses can be entered individually, one per tag or multiple addresses can be entered on one line using a semi-colon as a separator."),
58
    L3("Related Records","There is no practical limit to the length of this field."),
59
    L4("Image(s)","There is no practical limit to the length of this field."),
60 60
    LA("Language"),
61 61
    LB("Label"),
62 62
    LK("Website Link"),
63 63
    M1("Number"),
64
    M2("Miscellaneous 2. This is an alphanumeric field and there is no practical limit to the length of this field."),
64
    M2("Miscellaneous 2","This is an alphanumeric field and there is no practical limit to the length of this field."),
65 65
    M3("Type of Work"),
66 66
    N1("Notes"),
67
    N2("Abstract. This is a free text field and can contain alphanumeric characters. There is no practical length limit to this field."),
67
    N2("Abstract","This is a free text field and can contain alphanumeric characters. There is no practical length limit to this field."),
68 68
    NV("Number of Volumes"),
69 69
    OP("Original Publication"),
70 70
    PB("Publisher"),
71 71
    PP("Publishing Place"),
72
    PY("Publication year (YYYY/MM/DD)"),
72
    PY("Publication year","(YYYY/MM/DD)"),
73 73
    RI("Reviewed Item"),
74 74
    RN("Research Notes"),
75 75
    RP("Reprint Edition"),
......
78 78
    SP("Start Page"),
79 79
    ST("Short Title"),
80 80
    T1("Primary Title"),
81
    T2("Secondary Title (journal title, if applicable)"),
81
    T2("Secondary Title","journal title, if applicable"),
82 82
    T3("Tertiary Title"),
83 83
    TA("Translated Author"),
84 84
    TI("Title"),
85 85
    TT("Translated Title"),
86
    U1("User definable 1. This is an alphanumeric field and there is no practical limit to the length of this field."),
87
    U2("User definable 2. This is an alphanumeric field and there is no practical limit to the length of this field."),
88
    U3("User definable 3. This is an alphanumeric field and there is no practical limit to the length of this field."),
89
    U4("User definable 4. This is an alphanumeric field and there is no practical limit to the length of this field."),
90
    U5("User definable 5. This is an alphanumeric field and there is no practical limit to the length of this field."),
86
    U1("User definable 1","This is an alphanumeric field and there is no practical limit to the length of this field."),
87
    U2("User definable 2","This is an alphanumeric field and there is no practical limit to the length of this field."),
88
    U3("User definable 3","This is an alphanumeric field and there is no practical limit to the length of this field."),
89
    U4("User definable 4","This is an alphanumeric field and there is no practical limit to the length of this field."),
90
    U5("User definable 5","This is an alphanumeric field and there is no practical limit to the length of this field."),
91 91
    UR("URL"),
92 92
    VL("Volume number"),
93 93
    VO("Published Standard number"),
......
97 97
    ;
98 98

  
99 99
    private String description;
100
    private String description2;
100 101

  
101 102
    private RisReferenceTag(String description){
102 103
        this.description = description;
103 104
    }
105
    private RisReferenceTag(String description, String description2){
106
        this.description = description;
107
        this.description2 = description2;
108
    }
104 109

  
105 110
    /**
106 111
     * @return the description
......
109 114
        return description;
110 115
    }
111 116

  
117
    /**
118
     * @return the description2
119
     */
120
    public String getDescription2() {
121
        return description2;
122
    }
123

  
112 124

  
113 125
}

Also available in: Unified diff

Add picture from clipboard (Maximum size: 40 MB)