1
|
// $Id$
|
2
|
/**
|
3
|
* Copyright (C) 2009 EDIT
|
4
|
* European Distributed Institute of Taxonomy
|
5
|
* http://www.e-taxonomy.eu
|
6
|
*
|
7
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
8
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
9
|
*/
|
10
|
package eu.etaxonomy.cdm.io.excel.stream;
|
11
|
|
12
|
import java.io.IOException;
|
13
|
import java.net.URI;
|
14
|
import java.util.ArrayList;
|
15
|
import java.util.HashMap;
|
16
|
import java.util.List;
|
17
|
import java.util.Map;
|
18
|
|
19
|
import org.apache.commons.lang.StringUtils;
|
20
|
import org.apache.http.HttpException;
|
21
|
import org.apache.log4j.Logger;
|
22
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
23
|
import org.apache.poi.ss.usermodel.Sheet;
|
24
|
import org.apache.poi.ss.usermodel.Workbook;
|
25
|
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
26
|
|
27
|
import eu.etaxonomy.cdm.common.UriUtils;
|
28
|
import eu.etaxonomy.cdm.common.monitor.IProgressMonitor;
|
29
|
import eu.etaxonomy.cdm.io.dwca.TermUri;
|
30
|
import eu.etaxonomy.cdm.io.dwca.in.IReader;
|
31
|
import eu.etaxonomy.cdm.io.dwca.in.ListReader;
|
32
|
|
33
|
/**
|
34
|
* This class transforms excel archive in to a InputStream.
|
35
|
*
|
36
|
* @author a.oppermann
|
37
|
* @date 16.05.2013
|
38
|
*
|
39
|
*/
|
40
|
public class ExcelToStreamConverter<STATE extends ExcelStreamImportState> {
|
41
|
|
42
|
private static Logger logger = Logger.getLogger(ExcelToStreamConverter.class);
|
43
|
|
44
|
private URI source;
|
45
|
|
46
|
/**
|
47
|
*
|
48
|
* Factory
|
49
|
* @param source
|
50
|
* @return
|
51
|
*/
|
52
|
public static ExcelToStreamConverter<ExcelStreamImportState> NewInstance(URI source) {
|
53
|
return new ExcelToStreamConverter<ExcelStreamImportState>(source);
|
54
|
}
|
55
|
|
56
|
/**
|
57
|
* Constructor
|
58
|
* @param source
|
59
|
*/
|
60
|
public ExcelToStreamConverter(URI source){
|
61
|
this.source = source;
|
62
|
}
|
63
|
|
64
|
/**
|
65
|
* @param state
|
66
|
* @return
|
67
|
* @throws HttpException
|
68
|
* @throws IOException
|
69
|
* @throws InvalidFormatException
|
70
|
*/
|
71
|
public IReader<ExcelRecordStream> getWorksheetStream(STATE state) throws IOException, HttpException, InvalidFormatException{
|
72
|
// POIFSFileSystem fs = new POIFSFileSystem(UriUtils.getInputStream(source));
|
73
|
// HSSFWorkbook wb = new HSSFWorkbook(fs);
|
74
|
Workbook wb = WorkbookFactory.create(UriUtils.getInputStream(source));
|
75
|
|
76
|
Map<TermUri, Integer> map = new HashMap<TermUri, Integer>();
|
77
|
for (int i = 0 ; i < wb.getNumberOfSheets(); i++){
|
78
|
String wsName = wb.getSheetName(i);
|
79
|
TermUri termUri = convertSheetName2TermUri(wsName);
|
80
|
if (map.get(termUri) != null){
|
81
|
String message = "Worksheet type exists more then once: %s";
|
82
|
//TODO fire event
|
83
|
logger.warn(String.format(message, termUri.toString()));
|
84
|
}
|
85
|
map.put(termUri, i);
|
86
|
}
|
87
|
|
88
|
//core
|
89
|
List<ExcelRecordStream> streamList = new ArrayList<ExcelRecordStream>();
|
90
|
TermUri term= TermUri.DWC_TAXON;
|
91
|
Integer i = map.get(term);
|
92
|
if (i != null){
|
93
|
Sheet ws = wb.getSheetAt(i);
|
94
|
ExcelRecordStream excelRecordStream = new ExcelRecordStream(state, ws, term);
|
95
|
streamList.add(excelRecordStream); //for taxa and names
|
96
|
}else{
|
97
|
String message = "Taxon worksheet not available for %s";
|
98
|
logger.warn(String.format(message, "taxa"));
|
99
|
state.setSuccess(false);
|
100
|
}
|
101
|
|
102
|
//core relationships
|
103
|
i = map.get(term);
|
104
|
if (i != null){
|
105
|
Sheet ws = wb.getSheetAt(i);
|
106
|
ExcelRecordStream excelRecordStream = new ExcelRecordStream(state, ws, term);
|
107
|
streamList.add(excelRecordStream); //for relationships
|
108
|
}else{
|
109
|
String message = "Taxon worksheet not available for %s";
|
110
|
logger.warn(String.format(message, "taxon relations"));
|
111
|
state.setSuccess(false);
|
112
|
}
|
113
|
|
114
|
return new ListReader<ExcelRecordStream>(streamList);
|
115
|
}
|
116
|
|
117
|
|
118
|
/**
|
119
|
* @param wsName
|
120
|
* @return
|
121
|
*/
|
122
|
private TermUri convertSheetName2TermUri(String wsName) {
|
123
|
if (StringUtils.isBlank(wsName)){
|
124
|
throw new IllegalArgumentException("Worksheet name must not be null or empty");
|
125
|
//FIXME: Hard coded worksheet name should be avoided
|
126
|
}else if(wsName.equalsIgnoreCase("Sheet1")){
|
127
|
return TermUri.DWC_TAXON;
|
128
|
}else{
|
129
|
String message = "Worksheet name %s not yet handled by %s";
|
130
|
throw new IllegalArgumentException(String.format(message, wsName, this.getClass().getSimpleName()));
|
131
|
}
|
132
|
}
|
133
|
|
134
|
public void warnProgress(STATE state, String message, Throwable e) {
|
135
|
if(state.getConfig().getProgressMonitor() != null){
|
136
|
IProgressMonitor monitor = state.getConfig().getProgressMonitor();
|
137
|
if (e == null) {
|
138
|
monitor.warning(message);
|
139
|
}else{
|
140
|
monitor.warning(message, e);
|
141
|
}
|
142
|
}
|
143
|
}
|
144
|
|
145
|
|
146
|
|
147
|
}
|