Project

General

Profile

Download (8.12 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9

    
10
package eu.etaxonomy.cdm.common;
11

    
12
import java.io.FileNotFoundException;
13
import java.io.InputStream;
14
import java.net.URI;
15
import java.text.DateFormat;
16
import java.text.Format;
17
import java.util.ArrayList;
18
import java.util.Date;
19
import java.util.HashMap;
20
import java.util.Locale;
21

    
22
import org.apache.log4j.Logger;
23
import org.apache.poi.hssf.usermodel.HSSFCell;
24
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
25
import org.apache.poi.ss.usermodel.Cell;
26
import org.apache.poi.ss.usermodel.Row;
27
import org.apache.poi.ss.usermodel.Sheet;
28
import org.apache.poi.ss.usermodel.Workbook;
29
import org.apache.poi.ss.usermodel.WorkbookFactory;
30

    
31
/**
32
 * @author n.hoffmann
33
 * @created 18.11.2008
34
 * @version 1.0
35
 */
36
public class ExcelUtils {
37
	private static final Logger logger = Logger.getLogger(ExcelUtils.class);
38

    
39
    /** Reads all rows of an Excel worksheet */
40
    public static ArrayList<HashMap<String, String>> parseXLS(URI uri) throws FileNotFoundException {
41
    	return parseXLS(uri, null);
42
    }
43

    
44

    
45
	/** Reads all rows of an Excel worksheet */
46
    public static ArrayList<HashMap<String, String>> parseXLS(URI uri, String worksheetName) throws FileNotFoundException {
47
        InputStream stream;
48
        try {
49
            stream = UriUtils.getInputStream(uri);
50
            return parseXLS(stream, worksheetName);
51
        } catch(FileNotFoundException fne) {
52
            throw new FileNotFoundException(uri.toString());
53
        } catch(Exception ioe) {
54
            logger.error("Error reading the Excel file." + uri.toString());
55
            ioe.printStackTrace();
56
        }
57
        return null;
58

    
59
    }
60
        /** Reads all rows of an Excel worksheet */
61
        public static ArrayList<HashMap<String, String>> parseXLS(InputStream stream, String worksheetName) throws FileNotFoundException {
62

    
63

    
64
    	ArrayList<HashMap<String, String>> recordList = new ArrayList<HashMap<String, String>>();
65

    
66
    	try {
67
//    		POIFSFileSystem fs = new POIFSFileSystem(UriUtils.getInputStream(uri));
68
//    		HSSFWorkbook wb = new HSSFWorkbook(fs);
69

    
70

    
71
    		Workbook wb = WorkbookFactory.create(stream);
72

    
73

    
74
    		Sheet sheet;
75
    		if (worksheetName == null){
76
    			sheet = wb.getSheetAt(0);
77
    		}else{
78
    			sheet = wb.getSheet(worksheetName);
79
    		}
80

    
81
    		if (sheet== null){
82
    			if (worksheetName != null){
83
    				logger.debug(worksheetName + " not provided!");
84
    			}
85
    		}else{
86
	    		Row row;
87
	    		Cell cell;
88

    
89
	    		int rows; // Number of rows
90
	    		rows = sheet.getPhysicalNumberOfRows();
91
				if(logger.isDebugEnabled()) { logger.debug("Number of rows: " + rows); }
92

    
93
	    		int cols = 0; // Number of columns
94
	    		int tmp = 0;
95

    
96
	    		// This trick ensures that we get the data properly even if it doesn't start from first few rows
97
	    		for(int i = 0; i < 10 || i < rows; i++) {
98
	    			row = sheet.getRow(i);
99
	     			if(row != null) {
100
	    				tmp = sheet.getRow(i).getPhysicalNumberOfCells();
101
	    				if(tmp > cols){
102
	    					cols = tmp;
103
	    				}
104
	    			}
105
	    		}
106

    
107

    
108
	    		//first row
109
	    		ArrayList<String> columns = new ArrayList<String>();
110
	    		row = sheet.getRow(0);
111
	    		for (int c = 0; c < cols; c++){
112
	    			cell = row.getCell(c);
113
					if(cell != null) {
114
						columns.add(cell.toString());
115
						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + ": " + cell.toString()); }
116
					} else {
117
						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
118
					}
119
	    		}
120

    
121
	    		//value rows
122
	    		for(int r = 1; r < rows; r++) {
123
	    			row = sheet.getRow(r);
124
	    			HashMap<String, String> headers = new HashMap<String, String>();
125
	    			boolean notEmpty = checkIsEmptyRow(row);
126
	    			if(notEmpty) {
127
	    				for(int c = 0; c < cols; c++) {
128
	    					cell = row.getCell(c);
129
	    					if(cell != null) {
130
	    						if (c >= columns.size()){
131
	    							String message = "Cell has no header. There are only %d headers but more not-null cells in approx. row %d. Cell is neglected.";
132
	    							message = String.format(message, columns.size(),row.getRowNum());
133
	    							logger.warn(message);
134
	    						}else{
135
	    							if(logger.isDebugEnabled()) { logger.debug(String.format("Cell #%d: %s", c, cell.toString())); }
136
	    							headers.put(columns.get(c), getCellValue(cell));
137
	    						}
138
	    					} else {
139
	    						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
140
	    					}
141
	    				}
142
	    			}
143
	    			recordList.add(headers);
144
	    		}
145
    		}
146

    
147
    	} catch(Exception ioe) {
148
    		logger.error("Error reading the Excel file.");
149
    		ioe.printStackTrace();
150
    	}
151
    	return recordList;
152
    }
153

    
154

    
155
	public static String getCellValue(Cell cell) {
156
		try {
157
			if (cell.getCellType() == HSSFCell.CELL_TYPE_STRING ){
158
				return cell.getStringCellValue();
159
			}else if (cell.getCellType() == HSSFCell.CELL_TYPE_BLANK){
160
				return "";
161
			}else if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC){
162
				return getNumericCellValue(cell);
163
			}else if (cell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN){
164
				Boolean cellValue = cell.getBooleanCellValue();
165
				String value = String.valueOf(cellValue);
166
				return value;
167
			}else if (cell.getCellType() == HSSFCell.CELL_TYPE_ERROR){
168
				return "-error-";
169
			}else if (cell.getCellType() == HSSFCell.CELL_TYPE_FORMULA){
170
				try {
171
					String strValue = cell.getStringCellValue();
172
					if ("".equals(strValue)){
173
						strValue = getNumericCellValue(cell);
174
					}
175
					return strValue;
176
				} catch (Exception e) {
177
					String message = "Formula cell (%s) can't be transformed to string";
178
					message = String.format(message, getExcelCellString(cell));
179
					throw new RuntimeException(message, e);
180
				}
181
			}else{
182
				return cell.toString();
183
			}
184
		} catch (Exception e) {
185
			String message = "Error occurred while reading Excel cell '%s' . Use toString() instead. Error: %s";
186
			message = String.format(message,getExcelCellString(cell) ,e.getLocalizedMessage());
187
			logger.warn(message);
188
			return cell.toString();
189
		}
190
	}
191

    
192
	public static String getExcelCellString(Cell cell){
193
		String result = "%s%s";
194
		result = String.format(result, getExcelColString(cell.getColumnIndex()), cell.getRowIndex());
195
		return result;
196
	}
197

    
198
	private static String getExcelColString(int colNr){
199
		int first = colNr / 26;
200
		int second = colNr % 26;
201
//		char a = Integer.valueOf(first).shortValue();
202
		String firstStr = String.valueOf((first > 0 ? (char)(first +64) :""));
203
		String secondStr = String.valueOf((char)(second + 64));
204
		return firstStr +  secondStr;
205
	}
206

    
207

    
208
	/**
209
	 * Returns the numeric cell value. In case the cell is formatted as
210
	 * a date it returns a date (using the dates toString() method.
211
	 * @param cell
212
	 * @return
213
	 */
214
	private static String getNumericCellValue(Cell cell) {
215
		Double number = cell.getNumericCellValue();
216
//		HSSFCellStyle style = cell.getCellStyle();
217
//		String dataFormatString = style.getDataFormatString();
218
//		int index = style.getIndex();
219
		HSSFDataFormatter formatter = new HSSFDataFormatter();
220
//		Format defFormat = formatter.getDefaultFormat(cell);
221
		Format format = formatter.createFormat(cell);
222
//		String v = formatter.formatCellValue(cell);
223
		if (format != null && format instanceof DateFormat){
224
			//TODO use ISO or similar format once TimePeriod knows how to parse this
225
//			String result = formatter.formatCellValue(cell);
226
			Date date = cell.getDateCellValue();
227
			Locale locale = Locale.GERMAN;
228
			DateFormat df = DateFormat.getDateInstance(2,locale);
229
			String result = df.format(date); //result of type dd.mm.yyyy
230
//			String result = date.toString();
231

    
232
			return result;
233
		}
234
//		System.out.println(d);
235

    
236
		if (number.intValue() == number){
237
			return String.valueOf(number.intValue());
238
		}else{
239
			return String.valueOf(number);
240
		}
241
	}
242

    
243

    
244
	/**
245
	 * Returns false, if row is null or has no values
246
	 * @param row
247
	 * @param notEmpty
248
	 * @return
249
	 */
250
	private static boolean checkIsEmptyRow(Row row) {
251
		if (row == null){
252
			return false;
253
		}
254
		boolean notEmpty = false;
255
		for (int j = 0; j<row.getLastCellNum(); j++){
256
			if (row.getCell(j) != null){
257
				notEmpty = true;
258
			}
259
		}
260
		return notEmpty;
261
	}
262

    
263
}
(8-8/20)