1
|
// $Id$
|
2
|
/**
|
3
|
* Copyright (C) 2007 EDIT
|
4
|
* European Distributed Institute of Taxonomy
|
5
|
* http://www.e-taxonomy.eu
|
6
|
*
|
7
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
8
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
9
|
*/
|
10
|
|
11
|
package eu.etaxonomy.cdm.common;
|
12
|
|
13
|
import java.io.FileNotFoundException;
|
14
|
import java.net.URI;
|
15
|
import java.util.ArrayList;
|
16
|
import java.util.HashMap;
|
17
|
|
18
|
import org.apache.log4j.Logger;
|
19
|
import org.apache.poi.hssf.usermodel.HSSFCell;
|
20
|
import org.apache.poi.hssf.usermodel.HSSFRow;
|
21
|
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
22
|
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
23
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
24
|
|
25
|
/**
|
26
|
* @author n.hoffmann
|
27
|
* @created 18.11.2008
|
28
|
* @version 1.0
|
29
|
*/
|
30
|
public class ExcelUtils {
|
31
|
private static final Logger logger = Logger.getLogger(ExcelUtils.class);
|
32
|
|
33
|
/** Reads all rows of an Excel worksheet */
|
34
|
public static ArrayList<HashMap<String, String>> parseXLS(URI uri) throws FileNotFoundException {
|
35
|
return parseXLS(uri, null);
|
36
|
}
|
37
|
|
38
|
|
39
|
/** Reads all rows of an Excel worksheet */
|
40
|
public static ArrayList<HashMap<String, String>> parseXLS(URI uri, String worksheetName) throws FileNotFoundException {
|
41
|
|
42
|
ArrayList<HashMap<String, String>> recordList = new ArrayList<HashMap<String, String>>();
|
43
|
|
44
|
try {
|
45
|
POIFSFileSystem fs = new POIFSFileSystem(UriUtils.getInputStream(uri));
|
46
|
HSSFWorkbook wb = new HSSFWorkbook(fs);
|
47
|
HSSFSheet sheet;
|
48
|
if (worksheetName == null){
|
49
|
sheet = wb.getSheetAt(0);
|
50
|
}else{
|
51
|
sheet = wb.getSheet(worksheetName);
|
52
|
}
|
53
|
HSSFRow row;
|
54
|
HSSFCell cell;
|
55
|
|
56
|
int rows; // No of rows
|
57
|
rows = sheet.getPhysicalNumberOfRows();
|
58
|
if(logger.isDebugEnabled()) { logger.debug("Number of rows: " + rows); }
|
59
|
|
60
|
int cols = 0; // No of columns
|
61
|
int tmp = 0;
|
62
|
|
63
|
// This trick ensures that we get the data properly even if it doesn't start from first few rows
|
64
|
for(int i = 0; i < 10 || i < rows; i++) {
|
65
|
row = sheet.getRow(i);
|
66
|
|
67
|
|
68
|
if(row != null) {
|
69
|
tmp = sheet.getRow(i).getPhysicalNumberOfCells();
|
70
|
if(tmp > cols){
|
71
|
cols = tmp;
|
72
|
}
|
73
|
}
|
74
|
}
|
75
|
ArrayList<String> columns = new ArrayList<String>();
|
76
|
row = sheet.getRow(0);
|
77
|
for (int c = 0; c < cols; c++){
|
78
|
cell = row.getCell(c);
|
79
|
if(cell != null) {
|
80
|
columns.add(cell.toString());
|
81
|
if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + ": " + cell.toString()); }
|
82
|
} else {
|
83
|
if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
|
84
|
}
|
85
|
}
|
86
|
for(int r = 1; r < rows; r++) {
|
87
|
row = sheet.getRow(r);
|
88
|
HashMap<String, String> headers = new HashMap<String, String>();
|
89
|
boolean notEmpty = false;
|
90
|
for (int j = 0; j<row.getLastCellNum(); j++){
|
91
|
if (row.getCell(j) != null){
|
92
|
notEmpty = true;
|
93
|
}
|
94
|
}
|
95
|
if(row != null && notEmpty) {
|
96
|
for(int c = 0; c < cols; c++) {
|
97
|
cell = row.getCell((short)c);
|
98
|
if(cell != null) {
|
99
|
if (c >= columns.size()){
|
100
|
logger.warn("Cell has no header. There are only " + columns.size() + " headers but more not-null cells in approx. row " + row.getRowNum() + ". Cell is neglected.");
|
101
|
}else{
|
102
|
if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + ": " + cell.toString()); }
|
103
|
headers.put(columns.get(c), cell.toString());
|
104
|
}
|
105
|
} else {
|
106
|
if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
|
107
|
}
|
108
|
}
|
109
|
}
|
110
|
recordList.add(headers);
|
111
|
}
|
112
|
} catch(FileNotFoundException fne) {
|
113
|
throw new FileNotFoundException(uri.toString());
|
114
|
} catch(Exception ioe) {
|
115
|
logger.error("Error reading the Excel file.");
|
116
|
ioe.printStackTrace();
|
117
|
}
|
118
|
return recordList;
|
119
|
}
|
120
|
|
121
|
}
|