added source type generic to import configurator base class
[cdmlib.git] / cdmlib-commons / src / main / java / eu / etaxonomy / cdm / common / ExcelUtils.java
1 // $Id$
2 /**
3 * Copyright (C) 2007 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10
11 package eu.etaxonomy.cdm.common;
12
13 import java.io.FileInputStream;
14 import java.util.ArrayList;
15 import java.util.HashMap;
16 import java.io.FileNotFoundException;
17
18 import org.apache.log4j.Logger;
19 import org.apache.poi.hssf.usermodel.HSSFCell;
20 import org.apache.poi.hssf.usermodel.HSSFRow;
21 import org.apache.poi.hssf.usermodel.HSSFSheet;
22 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
23 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
24
25 /**
26 * @author n.hoffmann
27 * @created 18.11.2008
28 * @version 1.0
29 */
30 public class ExcelUtils {
31 private static final Logger logger = Logger.getLogger(ExcelUtils.class);
32
33 /** Reads all rows of an Excel worksheet */
34 public static ArrayList<HashMap<String, String>> parseXLS(String fileName) throws FileNotFoundException {
35
36 ArrayList<HashMap<String, String>> recordList = new ArrayList<HashMap<String, String>>();
37
38 try {
39 POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(fileName));
40 HSSFWorkbook wb = new HSSFWorkbook(fs);
41 HSSFSheet sheet = wb.getSheetAt(0);
42 HSSFRow row;
43 HSSFCell cell;
44
45 int rows; // No of rows
46 rows = sheet.getPhysicalNumberOfRows();
47 if(logger.isDebugEnabled()) { logger.debug("Number of rows: " + rows); }
48
49 int cols = 0; // No of columns
50 int tmp = 0;
51
52 // This trick ensures that we get the data properly even if it doesn't start from first few rows
53 for(int i = 0; i < 10 || i < rows; i++) {
54 row = sheet.getRow(i);
55
56
57 if(row != null) {
58 tmp = sheet.getRow(i).getPhysicalNumberOfCells();
59 if(tmp > cols){
60 cols = tmp;
61 }
62 }
63 }
64 HashMap<String, String> headers = null;
65 ArrayList<String> columns = new ArrayList<String>();
66 row = sheet.getRow(0);
67 for (int c = 0; c < cols; c++){
68 cell = row.getCell(c);
69 if(cell != null) {
70 columns.add(cell.toString());
71 if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + ": " + cell.toString()); }
72 } else {
73 if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
74 }
75 }
76 for(int r = 1; r < rows; r++) {
77 row = sheet.getRow(r);
78 headers = new HashMap<String, String>();
79 boolean notEmpty = false;
80 for (int j = 0; j<row.getRowNum(); j++){
81 if (row.getCell(j) != null){
82 notEmpty = true;
83 break;
84 }
85 }
86 if(row != null && notEmpty) {
87 for(int c = 0; c < cols; c++) {
88 cell = row.getCell((short)c);
89 if(cell != null) {
90 if (c >= columns.size()){
91 logger.warn("Cell has no header. There are only " + columns.size() + " headers but more not-null cells in approx. row " + row.getRowNum() + ". Cell is neglected.");
92 }else{
93 if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + ": " + cell.toString()); }
94 headers.put(columns.get(c), cell.toString());
95 }
96 } else {
97 if(logger.isDebugEnabled()) { logger.debug("Cell #" + c + " is null"); }
98 }
99 }
100 }
101 recordList.add(headers);
102 }
103 } catch(FileNotFoundException fne) {
104 throw new FileNotFoundException(fileName);
105 } catch(Exception ioe) {
106 logger.error("Error reading the Excel file.");
107 ioe.printStackTrace();
108 }
109 return recordList;
110 }
111
112 }