Project

General

Profile

Revision 0c683ea3

ID0c683ea35437e97349002e7d3502381eeb8e5bae
Parent 204aa9a6
Child 5cf44b8a

Added by Andreas Müller about 2 years ago

fix #6793 implement csv import framework

View differences:

cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/csv/CsvImportBase.java
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.csv;
10

  
11
import java.io.IOException;
12
import java.io.InputStreamReader;
13
import java.util.Arrays;
14
import java.util.HashMap;
15
import java.util.List;
16
import java.util.Map;
17

  
18
import org.codehaus.plexus.util.StringUtils;
19
import org.springframework.transaction.TransactionStatus;
20

  
21
import au.com.bytecode.opencsv.CSVReader;
22
import eu.etaxonomy.cdm.io.common.CdmImportBase;
23
import eu.etaxonomy.cdm.io.common.ImportResult;
24

  
25
/**
26
 * A base class for <b>simple</b> CSV imports.
27
 * Simple means less than 10.000 lines and only one file
28
 * with a flat structure.<BR><BR>
29
 * See DwC-A import for more complex structures.<BR><BR>
30
 * Update: as it supports transactions now, also longer files
31
 * are possible.
32
 *
33
 * @author a.mueller
34
 * @date 08.07.2017
35
 *
36
 */
37
public abstract class CsvImportBase<CONFIG extends CsvImportConfiguratorBase, STATE extends CsvImportState<CONFIG>, T>
38
        extends CdmImportBase<CONFIG, STATE>{
39

  
40
    private static final long serialVersionUID = 3052198644463797541L;
41

  
42

  
43
    @Override
44
    protected void doInvoke(STATE state) {
45
        int txNLimit = state.getConfig().getTransactionLineCount();
46
        ImportResult result = state.getResult();
47
        try {
48
            InputStreamReader inputReader = state.getConfig().getSource();
49
            CSVReader csvReader = new CSVReader(inputReader, state.getConfig().getFieldSeparator());
50
            String[] headerStr = csvReader.readNext();
51
            String[] next = csvReader.readNext();
52

  
53
            if (headerStr == null){
54
                String message = "Import file is empty";
55
                result.addWarning(message);
56
            }else if (next == null){
57
                String message = "No data. Only header line exists";
58
                result.addWarning(message);
59
            }else{
60
                List<String> header = Arrays.asList(headerStr);
61
                TransactionStatus tx = this.startTransaction();
62
                int row = 2;
63
                int txN = 0;
64
                while (next != null){
65
                    try {
66
                        Map<String, String> record = lineToMap(header, next, row, result);
67
                        state.setCurrentRecord(record);
68
                        state.setRow(row);
69
                        handleSingleLine(state);
70
                        next = csvReader.readNext();
71
                        row++;
72
                        txN++;
73
                        if (txN >= txNLimit && txNLimit > 0){
74
                            tx = startNewTransaction(state, tx);
75
                            txN = 0;
76
                        }
77
                    } catch (Exception e) {
78
                        String message = "Exception when handling csv row: " + e.getMessage();
79
                        state.getResult().addException(e, message, null, state.getLine());
80
                    }
81
                }
82
                this.commitTransaction(tx);
83

  
84
            }
85
            csvReader.close();
86

  
87
            return ;
88

  
89
        } catch (IOException e) {
90
            throw new RuntimeException(e);
91
        }
92

  
93
    }
94

  
95
    private TransactionStatus startNewTransaction(STATE state, TransactionStatus tx) {
96
        try {
97
            this.commitTransaction(tx);
98
        } catch (Exception e) {
99
            String message = "Exception when commiting transaction: " + e.getMessage();
100
            state.getResult().addException(e, message, null, state.getLine());
101
        }
102
        tx = this.startTransaction();
103
        try {
104
            refreshTransactionStatus(state);
105
        } catch (Exception e) {
106
            String message = "Exception when refreshing transaction: " + e.getMessage();
107
            state.getResult().addException(e, message, null, state.getLine());
108
        }
109
        return tx;
110
    }
111

  
112
    /**
113
     * To be implemented by subclasses if required
114
     * @param state
115
     */
116
    protected void refreshTransactionStatus(STATE state) {}
117

  
118
    /**
119
     * @param header
120
     * @param line
121
     * @return
122
     */
123
    private Map<String, String> lineToMap(List<String> header, String[] line, int row, ImportResult importResult) {
124
        Map<String, String> result = new HashMap<>();
125
        if (header.size() > line.length){
126
            String message = "CSV line has less fields than header";
127
            importResult.addError(message, row);
128
        }else if (header.size() < line.length){
129
            String message = "CSV line has more fields than header";
130
            importResult.addError(message, row);
131
        }
132
        for (int i = 0; i<header.size(); i++){
133
            String value = line.length < i ? null : line[i];
134
            if (StringUtils.isBlank(value)|| "NULL".equalsIgnoreCase(value)) {
135
                value = null;
136
            }
137
            result.put(header.get(i), value);
138
        }
139
        return result;
140
    }
141

  
142
    /**
143
     * @param state
144
     * @param result
145
     */
146
    protected abstract void handleSingleLine(STATE state);
147

  
148
    /**
149
     * {@inheritDoc}
150
     */
151
    @Override
152
    protected boolean doCheck(STATE state) {
153
        return false;
154
    }
155

  
156
    /**
157
     * {@inheritDoc}
158
     */
159
    @Override
160
    protected boolean isIgnore(STATE state) {
161
        return false;
162
    }
163
}
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/csv/CsvImportConfiguratorBase.java
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.csv;
10

  
11
import java.io.IOException;
12
import java.io.InputStream;
13
import java.io.InputStreamReader;
14
import java.net.URI;
15
import java.net.URL;
16

  
17
import eu.etaxonomy.cdm.database.ICdmDataSource;
18
import eu.etaxonomy.cdm.io.common.ImportConfiguratorBase;
19
import eu.etaxonomy.cdm.io.common.mapping.IInputTransformer;
20
import eu.etaxonomy.cdm.model.reference.Reference;
21
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
22

  
23
/**
24
 * Base class for {@link CsvImportBase csv import} configuration.
25
 *
26
 * @author a.mueller
27
 * @date 08.07.2017
28

  
29
 */
30
public abstract class CsvImportConfiguratorBase
31
        extends ImportConfiguratorBase<CsvImportState, InputStreamReader>{
32

  
33

  
34
    private static final long serialVersionUID = -6735627744555323225L;
35

  
36
    private char fieldSeparator = ',';
37

  
38
    private int transactionLineCount = 1000;
39

  
40

  
41
    // ****************** CONSTRUCTOR *****************************/
42
    protected CsvImportConfiguratorBase(InputStreamReader inputStream,
43
            ICdmDataSource cdmDestination){
44
        super(null);
45
        setSource(inputStream);
46
        setDestination(cdmDestination);
47
    }
48

  
49
    protected CsvImportConfiguratorBase(InputStreamReader inputStream,
50
            ICdmDataSource cdmDestination, IInputTransformer transformer){
51
        super(transformer);
52
        setSource(inputStream);
53
        setDestination(cdmDestination);
54
    }
55

  
56
    protected CsvImportConfiguratorBase(URI uri,
57
            ICdmDataSource cdmDestination, IInputTransformer transformer) throws IOException{
58
        super(transformer);
59
        setSource(toStream(uri));
60
        setDestination(cdmDestination);
61
    }
62

  
63
    /**
64
     * @param uri
65
     * @return
66
     * @throws IOException
67
     */
68
    private static InputStreamReader toStream(URI uri) throws IOException {
69
        URL url = uri.toURL();
70
        InputStream stream = url.openStream();
71
        InputStreamReader inputStreamReader = new InputStreamReader(stream, "UTF8");
72
        return inputStreamReader;
73
    }
74

  
75

  
76

  
77
    @Override
78
    public Reference getSourceReference() {
79
        if (this.sourceReference == null){
80
            sourceReference = ReferenceFactory.newGeneric();
81
            if (this.getSource() == null){
82
                sourceReference.setTitleCache("CSV Import " + getDateString(), true);
83
            }else{
84
                sourceReference.setTitleCache(getSource().toString(), true);
85
            }
86
        }
87
        return sourceReference;
88
    }
89

  
90
    /**
91
     * {@inheritDoc}
92
     */
93
    @Override
94
    public CsvImportState getNewState() {
95
        return new CsvImportState<CsvImportConfiguratorBase>(this);
96
    }
97

  
98
    /**
99
     * Returns the field separator. Default is ','.
100
     * In future we may add other types like
101
     */
102
    public char getFieldSeparator() {
103
        return fieldSeparator;
104
    }
105

  
106
    public void setFieldSeparator(char fieldSeparator) {
107
        this.fieldSeparator = fieldSeparator;
108
    }
109

  
110
    /**
111
     * @return the transactionLineCount
112
     */
113
    public int getTransactionLineCount() {
114
        return transactionLineCount;
115
    }
116

  
117
    /**
118
     * @param transactionLineCount the transactionLineCount to set
119
     */
120
    public void setTransactionLineCount(int transactionLineCount) {
121
        this.transactionLineCount = transactionLineCount;
122
    }
123
}
cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/csv/CsvImportState.java
1
/**
2
* Copyright (C) 2017 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.csv;
10

  
11
import java.util.Map;
12

  
13
import eu.etaxonomy.cdm.io.common.ImportStateBase;
14

  
15
/**
16
 * State class for {@link CsvImportBase csv imports}. This
17
 * class can either be used directly or being subclassed
18
 * according to the needs of a certain import.
19
 *
20
 * @author a.mueller
21
 * @date 08.07.2017
22
 *
23
 */
24
public class CsvImportState<CONFIG extends CsvImportConfiguratorBase>
25
    extends ImportStateBase<CONFIG, CsvImportBase>{
26

  
27
    private Map<String, String> currentRecord;
28
    private int row;
29

  
30

  
31
    protected CsvImportState(CONFIG config) {
32
        super(config);
33
    }
34

  
35

  
36
    public Map<String, String> getCurrentRecord() {
37
        return currentRecord;
38
    }
39
    public void setCurrentRecord(Map<String, String> currentRecord) {
40
        this.currentRecord = currentRecord;
41
    }
42

  
43
    public void setRow(int row) {
44
        this.row = row;
45
    }
46
    public int getRow() {
47
        return row;
48
    }
49
    public String getLine(){
50
        return String.valueOf(row);
51
    }
52

  
53
}

Also available in: Unified diff

Add picture from clipboard (Maximum size: 40 MB)