Project

General

Profile

Download (8.06 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.common;
10

    
11
import java.io.FileNotFoundException;
12
import java.io.InputStream;
13
import java.text.DateFormat;
14
import java.text.Format;
15
import java.util.ArrayList;
16
import java.util.Date;
17
import java.util.HashMap;
18
import java.util.List;
19
import java.util.Locale;
20
import java.util.Map;
21

    
22
import org.apache.logging.log4j.LogManager;
23
import org.apache.logging.log4j.Logger;
24
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
25
import org.apache.poi.ss.usermodel.Cell;
26
import org.apache.poi.ss.usermodel.CellType;
27
import org.apache.poi.ss.usermodel.Row;
28
import org.apache.poi.ss.usermodel.Sheet;
29
import org.apache.poi.ss.usermodel.Workbook;
30
import org.apache.poi.ss.usermodel.WorkbookFactory;
31

    
32
/**
33
 * @author n.hoffmann
34
 * @since 18.11.2008
35
 */
36
public class ExcelUtils {
37

    
38
	private static final Logger logger = LogManager.getLogger();
39

    
40
    /** Reads all rows of an Excel worksheet */
41
    public static List<Map<String, String>> parseXLS(URI uri) throws FileNotFoundException {
42
    	return parseXLS(uri, null);
43
    }
44

    
45
	/** Reads all rows of an Excel worksheet */
46
    public static List<Map<String, String>> parseXLS(URI uri, String worksheetName) throws FileNotFoundException {
47
        try {
48
            InputStream stream = UriUtils.getInputStream(uri);
49
            return parseXLS(stream, worksheetName);
50
        } catch(FileNotFoundException fne) {
51
            throw new FileNotFoundException(uri.toString());
52
        } catch(Exception ioe) {
53
            String message = "Error reading the Excel file." + uri.toString();
54
            logger.error(message);
55
            ioe.printStackTrace();
56
            throw new RuntimeException(message);
57
        }
58
    }
59

    
60
    /** Reads all rows of an Excel worksheet */
61
    public static List<Map<String, String>> parseXLS(InputStream stream, String worksheetName) {
62

    
63
    	List<Map<String, String>> recordList = new ArrayList<>();
64

    
65
    	try {
66
//    		POIFSFileSystem fs = new POIFSFileSystem(UriUtils.getInputStream(uri));
67
//    		HSSFWorkbook wb = new HSSFWorkbook(fs);
68

    
69
    		Workbook wb = WorkbookFactory.create(stream);
70

    
71
    		Sheet sheet;
72
    		if (worksheetName == null){
73
    			sheet = wb.getSheetAt(0);
74
    		}else{
75
    			sheet = wb.getSheet(worksheetName);
76
    		}
77

    
78
    		if (sheet== null){
79
    			if (worksheetName != null){
80
    			    //TODO report error
81
    				logger.debug(worksheetName + " not provided!");
82
    			}
83
    		}else{
84
	    		Row row;
85
	    		Cell cell;
86

    
87
	    		int rows; // Number of rows
88
	    		rows = sheet.getPhysicalNumberOfRows();
89
				if(logger.isDebugEnabled()) { logger.debug("Number of rows: " + rows); }
90

    
91
	    		int cols = 0; // Number of columns
92
	    		int tmp = 0;
93

    
94
	    		// This trick ensures that we get the data properly even if it doesn't start from first few rows
95
	    		for(int i = 0; i < 10 || i < rows; i++) {
96
	    			row = sheet.getRow(i);
97
	     			if(row != null) {
98
	    				tmp = sheet.getRow(i).getPhysicalNumberOfCells();
99
	    				if(tmp > cols){
100
	    					cols = tmp;
101
	    				}
102
	    			}
103
	    		}
104

    
105
	    		//first row
106
	    		List<String> columns = new ArrayList<>();
107
	    		row = sheet.getRow(0);
108
	    		for (int c = 0; c < cols; c++){
109
	    			cell = row.getCell(c);
110
					if(cell != null) {
111
					    String str = cell.toString();
112
					    str = (str == null)? null : str.trim();
113
					    //TODO better make case sensitive, but need to adapt all existing imports for this
114
						columns.add(str);
115
						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + ": " + str); }
116
					} else {
117
						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
118
					}
119
	    		}
120

    
121
	    		//value rows
122
	    		for(int r = 1; r < rows; r++) {
123
	    			row = sheet.getRow(r);
124
	    			Map<String, String> headers = new HashMap<>();
125
	    			boolean notEmpty = checkIsEmptyRow(row);
126
	    			if(notEmpty) {
127
	    				for(int c = 0; c < cols; c++) {
128
	    					cell = row.getCell(c);
129
	    					if(cell != null) {
130
	    						if (c >= columns.size()){
131
	    							String message = "Cell has no header. There are only %d headers but more not-null cells in approx. row %d. Cell is neglected.";
132
	    							message = String.format(message, columns.size(),row.getRowNum());
133
	    							logger.warn(message);
134
	    						}else{
135
	    							if(logger.isDebugEnabled()) { logger.debug(String.format("Cell #%d: %s", c, cell.toString())); }
136
	    							headers.put(columns.get(c), getCellValue(cell));
137
	    						}
138
	    					} else {
139
	    						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
140
	    					}
141
	    				}
142
	    			}
143
	    			recordList.add(headers);
144
	    		}
145
    		}
146
    	} catch(Exception ioe) {
147
    		logger.error("Error reading the Excel file.");
148
    		ioe.printStackTrace();
149
    	}
150
    	return recordList;
151
    }
152

    
153
	public static String getCellValue(Cell cell) {
154
		try {
155
			if (cell.getCellType() == CellType.STRING){
156
				return cell.getStringCellValue();
157
			}else if (cell.getCellType() == CellType.BLANK){
158
				return "";
159
			}else if (cell.getCellType() == CellType.NUMERIC){
160
				return getNumericCellValue(cell);
161
			}else if (cell.getCellType() == CellType.BOOLEAN){
162
				Boolean cellValue = cell.getBooleanCellValue();
163
				String value = String.valueOf(cellValue);
164
				return value;
165
			}else if (cell.getCellType() == CellType.ERROR){
166
				return "-error-";
167
			}else if (cell.getCellType() == CellType.FORMULA){
168
				try {
169
					String strValue = cell.getStringCellValue();
170
					if ("".equals(strValue)){
171
						strValue = getNumericCellValue(cell);
172
					}
173
					return strValue;
174
				} catch (Exception e) {
175
					String message = "Formula cell (%s) can't be transformed to string";
176
					message = String.format(message, getExcelCellString(cell));
177
					throw new RuntimeException(message, e);
178
				}
179
			}else{
180
				return cell.toString();
181
			}
182
		} catch (Exception e) {
183
			String message = "Error occurred while reading Excel cell '%s' . Use toString() instead. Error: %s";
184
			message = String.format(message,getExcelCellString(cell) ,e.getLocalizedMessage());
185
			logger.warn(message);
186
			return cell.toString();
187
		}
188
	}
189

    
190
	public static String getExcelCellString(Cell cell){
191
		String result = "%s%s";
192
		result = String.format(result, getExcelColString(cell.getColumnIndex()), cell.getRowIndex());
193
		return result;
194
	}
195

    
196
	private static String getExcelColString(int colNr){
197
		int first = colNr / 26;
198
		int second = colNr % 26;
199
		String firstStr = String.valueOf((first > 0 ? (char)(first +64) :""));
200
		String secondStr = String.valueOf((char)(second + 64));
201
		return firstStr +  secondStr;
202
	}
203

    
204
	/**
205
	 * Returns the numeric cell value. In case the cell is formatted as
206
	 * a date it returns a date (using the dates toString() method.
207
	 */
208
	private static String getNumericCellValue(Cell cell) {
209
		Double number = cell.getNumericCellValue();
210
//		HSSFCellStyle style = cell.getCellStyle();
211
//		String dataFormatString = style.getDataFormatString();
212
//		int index = style.getIndex();
213
		HSSFDataFormatter formatter = new HSSFDataFormatter();
214
//		Format defFormat = formatter.getDefaultFormat(cell);
215
		Format format = formatter.createFormat(cell);
216
//		String v = formatter.formatCellValue(cell);
217
		if (format != null && format instanceof DateFormat){
218
			//TODO use ISO or similar format once TimePeriod knows how to parse this
219
//			String result = formatter.formatCellValue(cell);
220
			Date date = cell.getDateCellValue();
221
			Locale locale = Locale.GERMAN;
222
			DateFormat df = DateFormat.getDateInstance(2,locale);
223
			String result = df.format(date); //result of type dd.mm.yyyy
224
//			String result = date.toString();
225

    
226
			return result;
227
		}
228

    
229
		if (number.intValue() == number){
230
			return String.valueOf(number.intValue());
231
		}else{
232
			return String.valueOf(number);
233
		}
234
	}
235

    
236
	/**
237
	 * Returns false, if row is null or has no values
238
	 */
239
	private static boolean checkIsEmptyRow(Row row) {
240
		if (row == null){
241
			return false;
242
		}
243
		boolean notEmpty = false;
244
		for (int j = 0; j<row.getLastCellNum(); j++){
245
			if (row.getCell(j) != null){
246
				notEmpty = true;
247
			}
248
		}
249
		return notEmpty;
250
	}
251

    
252
}
(9-9/26)