Project

General

Profile

Download (8.01 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.common;
10

    
11
import java.io.FileNotFoundException;
12
import java.io.InputStream;
13
import java.net.URI;
14
import java.text.DateFormat;
15
import java.text.Format;
16
import java.util.ArrayList;
17
import java.util.Date;
18
import java.util.HashMap;
19
import java.util.List;
20
import java.util.Locale;
21
import java.util.Map;
22

    
23
import org.apache.log4j.Logger;
24
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
25
import org.apache.poi.ss.usermodel.Cell;
26
import org.apache.poi.ss.usermodel.CellType;
27
import org.apache.poi.ss.usermodel.Row;
28
import org.apache.poi.ss.usermodel.Sheet;
29
import org.apache.poi.ss.usermodel.Workbook;
30
import org.apache.poi.ss.usermodel.WorkbookFactory;
31

    
32
/**
33
 * @author n.hoffmann
34
 * @since 18.11.2008
35
 */
36
public class ExcelUtils {
37
	private static final Logger logger = Logger.getLogger(ExcelUtils.class);
38

    
39
    /** Reads all rows of an Excel worksheet */
40
    public static List<Map<String, String>> parseXLS(URI uri) throws FileNotFoundException {
41
    	return parseXLS(uri, null);
42
    }
43

    
44
	/** Reads all rows of an Excel worksheet */
45
    public static List<Map<String, String>> parseXLS(URI uri, String worksheetName) throws FileNotFoundException {
46
        try {
47
            InputStream stream = UriUtils.getInputStream(uri);
48
            return parseXLS(stream, worksheetName);
49
        } catch(FileNotFoundException fne) {
50
            throw new FileNotFoundException(uri.toString());
51
        } catch(Exception ioe) {
52
            String message = "Error reading the Excel file." + uri.toString();
53
            logger.error(message);
54
            ioe.printStackTrace();
55
            throw new RuntimeException(message);
56
        }
57
    }
58

    
59
    /** Reads all rows of an Excel worksheet */
60
    public static List<Map<String, String>> parseXLS(InputStream stream, String worksheetName) {
61

    
62
    	List<Map<String, String>> recordList = new ArrayList<>();
63

    
64
    	try {
65
//    		POIFSFileSystem fs = new POIFSFileSystem(UriUtils.getInputStream(uri));
66
//    		HSSFWorkbook wb = new HSSFWorkbook(fs);
67

    
68
    		Workbook wb = WorkbookFactory.create(stream);
69

    
70
    		Sheet sheet;
71
    		if (worksheetName == null){
72
    			sheet = wb.getSheetAt(0);
73
    		}else{
74
    			sheet = wb.getSheet(worksheetName);
75
    		}
76

    
77
    		if (sheet== null){
78
    			if (worksheetName != null){
79
    				logger.debug(worksheetName + " not provided!");
80
    			}
81
    		}else{
82
	    		Row row;
83
	    		Cell cell;
84

    
85
	    		int rows; // Number of rows
86
	    		rows = sheet.getPhysicalNumberOfRows();
87
				if(logger.isDebugEnabled()) { logger.debug("Number of rows: " + rows); }
88

    
89
	    		int cols = 0; // Number of columns
90
	    		int tmp = 0;
91

    
92
	    		// This trick ensures that we get the data properly even if it doesn't start from first few rows
93
	    		for(int i = 0; i < 10 || i < rows; i++) {
94
	    			row = sheet.getRow(i);
95
	     			if(row != null) {
96
	    				tmp = sheet.getRow(i).getPhysicalNumberOfCells();
97
	    				if(tmp > cols){
98
	    					cols = tmp;
99
	    				}
100
	    			}
101
	    		}
102

    
103
	    		//first row
104
	    		List<String> columns = new ArrayList<>();
105
	    		row = sheet.getRow(0);
106
	    		for (int c = 0; c < cols; c++){
107
	    			cell = row.getCell(c);
108
					if(cell != null) {
109
					    String str = cell.toString();
110
					    str = (str == null)? null : str.trim();
111
					    //TODO better make case sensitive, but need to adapt all existing imports for this
112
						columns.add(str);
113
						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + ": " + str); }
114
					} else {
115
						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
116
					}
117
	    		}
118

    
119
	    		//value rows
120
	    		for(int r = 1; r < rows; r++) {
121
	    			row = sheet.getRow(r);
122
	    			Map<String, String> headers = new HashMap<>();
123
	    			boolean notEmpty = checkIsEmptyRow(row);
124
	    			if(notEmpty) {
125
	    				for(int c = 0; c < cols; c++) {
126
	    					cell = row.getCell(c);
127
	    					if(cell != null) {
128
	    						if (c >= columns.size()){
129
	    							String message = "Cell has no header. There are only %d headers but more not-null cells in approx. row %d. Cell is neglected.";
130
	    							message = String.format(message, columns.size(),row.getRowNum());
131
	    							logger.warn(message);
132
	    						}else{
133
	    							if(logger.isDebugEnabled()) { logger.debug(String.format("Cell #%d: %s", c, cell.toString())); }
134
	    							headers.put(columns.get(c), getCellValue(cell));
135
	    						}
136
	    					} else {
137
	    						if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
138
	    					}
139
	    				}
140
	    			}
141
	    			recordList.add(headers);
142
	    		}
143
    		}
144
    	} catch(Exception ioe) {
145
    		logger.error("Error reading the Excel file.");
146
    		ioe.printStackTrace();
147
    	}
148
    	return recordList;
149
    }
150

    
151
	public static String getCellValue(Cell cell) {
152
		try {
153
			if (cell.getCellType() == CellType.STRING){
154
				return cell.getStringCellValue();
155
			}else if (cell.getCellType() == CellType.BLANK){
156
				return "";
157
			}else if (cell.getCellType() == CellType.NUMERIC){
158
				return getNumericCellValue(cell);
159
			}else if (cell.getCellType() == CellType.BOOLEAN){
160
				Boolean cellValue = cell.getBooleanCellValue();
161
				String value = String.valueOf(cellValue);
162
				return value;
163
			}else if (cell.getCellType() == CellType.ERROR){
164
				return "-error-";
165
			}else if (cell.getCellType() == CellType.FORMULA){
166
				try {
167
					String strValue = cell.getStringCellValue();
168
					if ("".equals(strValue)){
169
						strValue = getNumericCellValue(cell);
170
					}
171
					return strValue;
172
				} catch (Exception e) {
173
					String message = "Formula cell (%s) can't be transformed to string";
174
					message = String.format(message, getExcelCellString(cell));
175
					throw new RuntimeException(message, e);
176
				}
177
			}else{
178
				return cell.toString();
179
			}
180
		} catch (Exception e) {
181
			String message = "Error occurred while reading Excel cell '%s' . Use toString() instead. Error: %s";
182
			message = String.format(message,getExcelCellString(cell) ,e.getLocalizedMessage());
183
			logger.warn(message);
184
			return cell.toString();
185
		}
186
	}
187

    
188
	public static String getExcelCellString(Cell cell){
189
		String result = "%s%s";
190
		result = String.format(result, getExcelColString(cell.getColumnIndex()), cell.getRowIndex());
191
		return result;
192
	}
193

    
194
	private static String getExcelColString(int colNr){
195
		int first = colNr / 26;
196
		int second = colNr % 26;
197
		String firstStr = String.valueOf((first > 0 ? (char)(first +64) :""));
198
		String secondStr = String.valueOf((char)(second + 64));
199
		return firstStr +  secondStr;
200
	}
201

    
202
	/**
203
	 * Returns the numeric cell value. In case the cell is formatted as
204
	 * a date it returns a date (using the dates toString() method.
205
	 */
206
	private static String getNumericCellValue(Cell cell) {
207
		Double number = cell.getNumericCellValue();
208
//		HSSFCellStyle style = cell.getCellStyle();
209
//		String dataFormatString = style.getDataFormatString();
210
//		int index = style.getIndex();
211
		HSSFDataFormatter formatter = new HSSFDataFormatter();
212
//		Format defFormat = formatter.getDefaultFormat(cell);
213
		Format format = formatter.createFormat(cell);
214
//		String v = formatter.formatCellValue(cell);
215
		if (format != null && format instanceof DateFormat){
216
			//TODO use ISO or similar format once TimePeriod knows how to parse this
217
//			String result = formatter.formatCellValue(cell);
218
			Date date = cell.getDateCellValue();
219
			Locale locale = Locale.GERMAN;
220
			DateFormat df = DateFormat.getDateInstance(2,locale);
221
			String result = df.format(date); //result of type dd.mm.yyyy
222
//			String result = date.toString();
223

    
224
			return result;
225
		}
226

    
227
		if (number.intValue() == number){
228
			return String.valueOf(number.intValue());
229
		}else{
230
			return String.valueOf(number);
231
		}
232
	}
233

    
234
	/**
235
	 * Returns false, if row is null or has no values
236
	 */
237
	private static boolean checkIsEmptyRow(Row row) {
238
		if (row == null){
239
			return false;
240
		}
241
		boolean notEmpty = false;
242
		for (int j = 0; j<row.getLastCellNum(); j++){
243
			if (row.getCell(j) != null){
244
				notEmpty = true;
245
			}
246
		}
247
		return notEmpty;
248
	}
249

    
250
}
(9-9/22)