Project

General

Profile

Download (7.99 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.common;
10

    
11
import java.io.FileNotFoundException;
12
import java.io.InputStream;
13
import java.text.DateFormat;
14
import java.text.Format;
15
import java.util.ArrayList;
16
import java.util.Date;
17
import java.util.HashMap;
18
import java.util.List;
19
import java.util.Locale;
20
import java.util.Map;
21

    
22
import org.apache.log4j.Logger;
23
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
24
import org.apache.poi.ss.usermodel.Cell;
25
import org.apache.poi.ss.usermodel.CellType;
26
import org.apache.poi.ss.usermodel.Row;
27
import org.apache.poi.ss.usermodel.Sheet;
28
import org.apache.poi.ss.usermodel.Workbook;
29
import org.apache.poi.ss.usermodel.WorkbookFactory;
30

    
31
/**
32
 * @author n.hoffmann
33
 * @since 18.11.2008
34
 */
35
public class ExcelUtils {
36
	private static final Logger logger = Logger.getLogger(ExcelUtils.class);
37

    
38
    /** Reads all rows of an Excel worksheet */
39
    public static List<Map<String, String>> parseXLS(URI uri) throws FileNotFoundException {
40
    	return parseXLS(uri, null);
41
    }
42

    
43
	/** Reads all rows of an Excel worksheet */
44
    public static List<Map<String, String>> parseXLS(URI uri, String worksheetName) throws FileNotFoundException {
45
        try {
46
            InputStream stream = UriUtils.getInputStream(uri);
47
            return parseXLS(stream, worksheetName);
48
        } catch(FileNotFoundException fne) {
49
            throw new FileNotFoundException(uri.toString());
50
        } catch(Exception ioe) {
51
            String message = "Error reading the Excel file." + uri.toString();
52
            logger.error(message);
53
            ioe.printStackTrace();
54
            throw new RuntimeException(message);
55
        }
56
    }
57

    
58
    /** Reads all rows of an Excel worksheet */
59
    public static List<Map<String, String>> parseXLS(InputStream stream, String worksheetName) {
60

    
61
    	List<Map<String, String>> recordList = new ArrayList<>();
62

    
63
    	try {
64
//    		POIFSFileSystem fs = new POIFSFileSystem(UriUtils.getInputStream(uri));
65
//    		HSSFWorkbook wb = new HSSFWorkbook(fs);
66

    
67
    		Workbook wb = WorkbookFactory.create(stream);
68

    
69
    		Sheet sheet;
70
    		if (worksheetName == null){
71
    			sheet = wb.getSheetAt(0);
72
    		}else{
73
    			sheet = wb.getSheet(worksheetName);
74
    		}
75

    
76
    		if (sheet== null){
77
    			if (worksheetName != null){
78
    				logger.debug(worksheetName + " not provided!");
79
    			}
80
    		}else{
81
	    		Row row;
82
	    		Cell cell;
83

    
84
	    		int rows; // Number of rows
85
	    		rows = sheet.getPhysicalNumberOfRows();
86
				if(logger.isDebugEnabled()) { logger.debug("Number of rows: " + rows); }
87

    
88
	    		int cols = 0; // Number of columns
89
	    		int tmp = 0;
90

    
91
	    		// This trick ensures that we get the data properly even if it doesn't start from first few rows
92
	    		for(int i = 0; i < 10 || i < rows; i++) {
93
	    			row = sheet.getRow(i);
94
	     			if(row != null) {
95
	    				tmp = sheet.getRow(i).getPhysicalNumberOfCells();
96
	    				if(tmp > cols){
97
	    					cols = tmp;
98
	    				}
99
	    			}
100
	    		}
101

    
102
	    		//first row
103
	    		List<String> columns = new ArrayList<>();
104
	    		row = sheet.getRow(0);
105
	    		for (int c = 0; c < cols; c++){
106
	    			cell = row.getCell(c);
107
					if(cell != null) {
108
					    String str = cell.toString();
109
					    str = (str == null)? null : str.trim();
110
					    //TODO better make case sensitive, but need to adapt all existing imports for this
111
						columns.add(str);
112
						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + ": " + str); }
113
					} else {
114
						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
115
					}
116
	    		}
117

    
118
	    		//value rows
119
	    		for(int r = 1; r < rows; r++) {
120
	    			row = sheet.getRow(r);
121
	    			Map<String, String> headers = new HashMap<>();
122
	    			boolean notEmpty = checkIsEmptyRow(row);
123
	    			if(notEmpty) {
124
	    				for(int c = 0; c < cols; c++) {
125
	    					cell = row.getCell(c);
126
	    					if(cell != null) {
127
	    						if (c >= columns.size()){
128
	    							String message = "Cell has no header. There are only %d headers but more not-null cells in approx. row %d. Cell is neglected.";
129
	    							message = String.format(message, columns.size(),row.getRowNum());
130
	    							logger.warn(message);
131
	    						}else{
132
	    							if(logger.isDebugEnabled()) { logger.debug(String.format("Cell #%d: %s", c, cell.toString())); }
133
	    							headers.put(columns.get(c), getCellValue(cell));
134
	    						}
135
	    					} else {
136
	    						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
137
	    					}
138
	    				}
139
	    			}
140
	    			recordList.add(headers);
141
	    		}
142
    		}
143
    	} catch(Exception ioe) {
144
    		logger.error("Error reading the Excel file.");
145
    		ioe.printStackTrace();
146
    	}
147
    	return recordList;
148
    }
149

    
150
	public static String getCellValue(Cell cell) {
151
		try {
152
			if (cell.getCellType() == CellType.STRING){
153
				return cell.getStringCellValue();
154
			}else if (cell.getCellType() == CellType.BLANK){
155
				return "";
156
			}else if (cell.getCellType() == CellType.NUMERIC){
157
				return getNumericCellValue(cell);
158
			}else if (cell.getCellType() == CellType.BOOLEAN){
159
				Boolean cellValue = cell.getBooleanCellValue();
160
				String value = String.valueOf(cellValue);
161
				return value;
162
			}else if (cell.getCellType() == CellType.ERROR){
163
				return "-error-";
164
			}else if (cell.getCellType() == CellType.FORMULA){
165
				try {
166
					String strValue = cell.getStringCellValue();
167
					if ("".equals(strValue)){
168
						strValue = getNumericCellValue(cell);
169
					}
170
					return strValue;
171
				} catch (Exception e) {
172
					String message = "Formula cell (%s) can't be transformed to string";
173
					message = String.format(message, getExcelCellString(cell));
174
					throw new RuntimeException(message, e);
175
				}
176
			}else{
177
				return cell.toString();
178
			}
179
		} catch (Exception e) {
180
			String message = "Error occurred while reading Excel cell '%s' . Use toString() instead. Error: %s";
181
			message = String.format(message,getExcelCellString(cell) ,e.getLocalizedMessage());
182
			logger.warn(message);
183
			return cell.toString();
184
		}
185
	}
186

    
187
	public static String getExcelCellString(Cell cell){
188
		String result = "%s%s";
189
		result = String.format(result, getExcelColString(cell.getColumnIndex()), cell.getRowIndex());
190
		return result;
191
	}
192

    
193
	private static String getExcelColString(int colNr){
194
		int first = colNr / 26;
195
		int second = colNr % 26;
196
		String firstStr = String.valueOf((first > 0 ? (char)(first +64) :""));
197
		String secondStr = String.valueOf((char)(second + 64));
198
		return firstStr +  secondStr;
199
	}
200

    
201
	/**
202
	 * Returns the numeric cell value. In case the cell is formatted as
203
	 * a date it returns a date (using the dates toString() method.
204
	 */
205
	private static String getNumericCellValue(Cell cell) {
206
		Double number = cell.getNumericCellValue();
207
//		HSSFCellStyle style = cell.getCellStyle();
208
//		String dataFormatString = style.getDataFormatString();
209
//		int index = style.getIndex();
210
		HSSFDataFormatter formatter = new HSSFDataFormatter();
211
//		Format defFormat = formatter.getDefaultFormat(cell);
212
		Format format = formatter.createFormat(cell);
213
//		String v = formatter.formatCellValue(cell);
214
		if (format != null && format instanceof DateFormat){
215
			//TODO use ISO or similar format once TimePeriod knows how to parse this
216
//			String result = formatter.formatCellValue(cell);
217
			Date date = cell.getDateCellValue();
218
			Locale locale = Locale.GERMAN;
219
			DateFormat df = DateFormat.getDateInstance(2,locale);
220
			String result = df.format(date); //result of type dd.mm.yyyy
221
//			String result = date.toString();
222

    
223
			return result;
224
		}
225

    
226
		if (number.intValue() == number){
227
			return String.valueOf(number.intValue());
228
		}else{
229
			return String.valueOf(number);
230
		}
231
	}
232

    
233
	/**
234
	 * Returns false, if row is null or has no values
235
	 */
236
	private static boolean checkIsEmptyRow(Row row) {
237
		if (row == null){
238
			return false;
239
		}
240
		boolean notEmpty = false;
241
		for (int j = 0; j<row.getLastCellNum(); j++){
242
			if (row.getCell(j) != null){
243
				notEmpty = true;
244
			}
245
		}
246
		return notEmpty;
247
	}
248

    
249
}
(9-9/23)