cleanup
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / common / ExcelImportBase.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.excel.common;
11
12 import java.io.ByteArrayInputStream;
13 import java.io.FileNotFoundException;
14 import java.net.URI;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.UUID;
18
19 import org.apache.commons.lang.StringUtils;
20 import org.apache.log4j.Logger;
21 import org.springframework.transaction.support.DefaultTransactionStatus;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.common.ExcelUtils;
25 import eu.etaxonomy.cdm.io.common.CdmImportBase;
26 import eu.etaxonomy.cdm.io.distribution.excelupdate.ExcelDistributionUpdateConfigurator;
27 import eu.etaxonomy.cdm.io.excel.taxa.NormalExplicitImportConfigurator;
28 import eu.etaxonomy.cdm.io.excel.taxa.TaxonListImportConfigurator;
29 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
30 import eu.etaxonomy.cdm.model.common.CdmBase;
31 import eu.etaxonomy.cdm.model.common.TimePeriod;
32 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
33 import eu.etaxonomy.cdm.model.reference.Reference;
34 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
35 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
36 import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
37
38 /**
39 * @author a.babadshanjan
40 * @since 17.12.2008
41 */
42 public abstract class ExcelImportBase<STATE extends ExcelImportState<CONFIG, ROW>, CONFIG extends ExcelImportConfiguratorBase, ROW extends ExcelRowBase>
43 extends CdmImportBase<CONFIG, STATE> {
44
45 private static final long serialVersionUID = 2759164811664484732L;
46 private static final Logger logger = Logger.getLogger(ExcelImportBase.class);
47
48 protected static final String SCIENTIFIC_NAME_COLUMN = "ScientificName";
49
50 private List<Map<String, String>> recordList = null;
51
52 private ExcelImportConfiguratorBase configurator = null;
53
54
55 /** Reads data from an Excel file and stores them into a CDM DB.
56 *
57 * @param config
58 * @param stores (not used)
59 */
60 @Override
61 protected void doInvoke(STATE state){
62
63 logger.debug("Importing excel data");
64
65 configurator = state.getConfig();
66
67 NomenclaturalCode nc = getConfigurator().getNomenclaturalCode();
68 if (nc == null && requiresNomenclaturalCode()) {
69 logger.error("Nomenclatural code could not be determined. Skip invoke.");
70 state.setUnsuccessfull();
71 return;
72 }
73 URI source = null;
74
75 byte[] data = null;
76 // read and save all rows of the excel worksheet
77 if ((state.getConfig() instanceof NormalExplicitImportConfigurator
78 || state.getConfig() instanceof ExcelDistributionUpdateConfigurator
79 || state.getConfig() instanceof TaxonListImportConfigurator) && (state.getConfig().getStream() != null
80 || state.getConfig().getStream() != null)){
81 data = state.getConfig().getStream();
82 } else{
83 source = state.getConfig().getSource();
84 }
85
86 String sheetName = getWorksheetName(state.getConfig());
87
88 if (data != null){
89 try {
90 ByteArrayInputStream stream = new ByteArrayInputStream(data);
91 recordList = ExcelUtils.parseXLS(stream, sheetName);
92 } catch (Exception e) {
93 throw new RuntimeException(e);
94 }
95 }else{
96 try {
97 recordList = ExcelUtils.parseXLS(source, sheetName);
98 } catch (FileNotFoundException e) {
99 String message = "File not found: " + source;
100 warnProgress(state, message, e);
101 logger.error(message);
102 state.setUnsuccessfull();
103 return;
104 }
105 }
106
107 handleRecordList(state, source);
108 logger.debug("End excel data import");
109 return;
110 }
111
112 protected boolean requiresNomenclaturalCode() {
113 return true;
114 }
115
116 private void handleRecordList(STATE state, URI source) {
117 Integer startingLine = 2;
118 if (recordList != null) {
119 Map<String,String> record = null;
120
121 state.setTransactionStatus(startTransaction());
122
123 //first pass
124 state.setCurrentLine(startingLine);
125 for (int i = 0; i < recordList.size(); i++) {
126 record = recordList.get(i);
127 analyzeRecord(record, state);
128 state.setOriginalRecord(record);
129 try {
130 firstPass(state);
131 //for debugging only
132 // if (i % 1000 == 0){
133 // try {
134 // System.out.println(i);
135 // getSession().flush();
136 // } catch (Exception e) {
137 // e.printStackTrace();
138 // }
139 // }
140 DefaultTransactionStatus defStatus = (DefaultTransactionStatus) state.getTransactionStatus();
141 if (defStatus.isRollbackOnly()){
142 logger.warn("Rollback only in line: " + i);
143 }
144 } catch (Exception e) {
145 e.printStackTrace();
146 }finally{
147 state.incCurrentLine();
148 }
149 }
150 //second pass
151 state.setCurrentLine(startingLine);
152 for (int i = 0; i < recordList.size(); i++) {
153 record = recordList.get(i);
154 analyzeRecord(record, state);
155 state.setOriginalRecord(record);
156 secondPass(state);
157 state.incCurrentLine();
158 }
159 if (configurator.isDeduplicateReferences()){
160 getReferenceService().deduplicate(Reference.class, null, null);
161 }
162 if (configurator.isDeduplicateAuthors()){
163 getAgentService().deduplicate(TeamOrPersonBase.class, null, null);
164 }
165 commitTransaction(state.getTransactionStatus());
166 }else{
167 logger.warn("No records found in " + source);
168 }
169 return;
170 }
171
172 /**
173 * To define a worksheet name other then the one defined in the configurator
174 * override this method with a non <code>null</code> return value.
175 * If <code>null</code> is returned the first worksheet is taken.
176
177 * @return worksheet name. <code>null</null> if no worksheet is defined.
178 */
179 protected String getWorksheetName(CONFIG config) {
180 return config.getWorksheetName();
181 }
182
183 @Override
184 protected boolean doCheck(STATE state) {
185 boolean result = true;
186 logger.warn("No check implemented for Excel import");
187 return result;
188 }
189
190 /**
191 *
192 *
193 * @param record
194 * @return
195 */
196 protected abstract void analyzeRecord(Map<String,String> record, STATE state);
197
198 protected abstract void firstPass(STATE state);
199 protected abstract void secondPass(STATE state);
200
201
202 public ExcelImportConfiguratorBase getConfigurator() {
203 return configurator;
204 }
205
206
207 protected int floatString2IntValue(String value) {
208 int intValue = 0;
209 try {
210 Float fobj = new Float(Float.parseFloat(value));
211 intValue = fobj.intValue();
212 if (logger.isDebugEnabled()) { logger.debug("Value formatted: " + intValue); }
213 } catch (NumberFormatException ex) {
214 logger.error(value + " is not an integer");
215 }
216 return intValue;
217 }
218
219 protected String floatString2IntStringValue(String value) {
220 int i = floatString2IntValue(value);
221 return String.valueOf(i);
222 }
223
224
225 /**
226 * @param start
227 * @param end
228 * @return
229 */
230 protected TimePeriod getTimePeriod(String start, String end) {
231 String strPeriod = CdmUtils.concat(" - ", start, end);
232 TimePeriod result = TimePeriodParser.parseString(strPeriod);
233 return result;
234 }
235
236 /**
237 * Returns the value of the record map for the given key.
238 * The value is trimmed and empty values are set to <code>null</code>.
239 * @param record
240 * @param originalKey
241 * @return the value
242 */
243 protected static String getValue(Map<String, String> record, String originalKey) {
244 String value = record.get(originalKey);
245 if (! StringUtils.isBlank(value)) {
246 if (logger.isDebugEnabled()) { logger.debug(originalKey + ": " + value); }
247 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
248 return value;
249 }else{
250 return null;
251 }
252 }
253
254 protected String getValue(STATE state, String key){
255 key = state.getConfig().replaceColumnLabel(key);
256 return getValue(state.getOriginalRecord(), key);
257 }
258
259 /**
260 * Returns the taxon for the given CDM uuid. If no taxon exists for the given id
261 * no record is returned. If a name cache, name title cache (full name) or
262 * taxon title cache column is given the name is checked against the given columns.
263 * If they don't manage it is logged as a warning in import result.
264 * <BR>If clazz is given, only objects of the given class are loaded.
265 *
266 *
267 * @param state
268 * @param colTaxonUuid taxon uuid column
269 * @param colNameCache name cache column (if exists)
270 * @param colNameTitleCache name title cache column (if exists)
271 * @param colTaxonTitleCache taxon title cache column (if exists)
272 * @param clazz the clazz null
273 * @param line the row, for debug information
274 * @return the taxon to load
275 */
276 protected <T extends TaxonBase<?>> T getTaxonByCdmId(STATE state, String colTaxonUuid,
277 String colNameCache, String colNameTitleCache, String colTaxonTitleCache,
278 Class<T> clazz, String line) {
279
280 Map<String, String> record = getRecord(state);
281 String strUuidTaxon = record.get(colTaxonUuid);
282 if (strUuidTaxon != null){
283 UUID uuidTaxon;
284 try {
285 uuidTaxon = UUID.fromString(strUuidTaxon);
286 } catch (Exception e) {
287 state.getResult().addError("Taxon uuid has incorrect format. Taxon could not be loaded. Data not imported.", null, line);
288 return null;
289 }
290 TaxonBase<?> result = getTaxonService().find(uuidTaxon);
291 //TODO load only objects of correct class
292 if (result != null && clazz != null && !result.isInstanceOf(clazz)){
293 result = null;
294 }
295
296
297 if (result == null){
298 state.getResult().addError("Taxon for uuid "+strUuidTaxon+" could not be found in database. "
299 + "Taxon could not be loaded. Data not imported.", null, line);
300 }else{
301 verifyName(state, colNameCache, colNameTitleCache, colTaxonTitleCache, line, record, result);
302 }
303 result = CdmBase.deproxy(result, clazz);
304
305
306 return CdmBase.deproxy(result, clazz);
307 }else{
308 String message = "No taxon identifier column found";
309 state.getResult().addWarning(message, null, line);
310 return null;
311 }
312 }
313
314 protected Map<String, String> getRecord(STATE state) {
315 Map<String, String> record = state.getOriginalRecord();
316 return record;
317 }
318
319
320 /**
321 * @see #getTaxonByCdmId(ExcelImportState, String, String, String, String, Class, String)
322 */
323 protected void verifyName(STATE state, String colNameCache, String colNameTitleCache, String colTaxonTitleCache,
324 String line, Map<String, String> record, TaxonBase<?> result) {
325 //nameCache
326 String strExpectedNameCache = record.get(colNameCache);
327 String nameCache = result.getName() == null ? null : result.getName().getNameCache();
328 if (isNotBlank(strExpectedNameCache) && (!strExpectedNameCache.trim().equals(nameCache))){
329 String message = "Name cache (%s) does not match expected name (%s)";
330 message = String.format(message, nameCache==null? "null":nameCache, strExpectedNameCache);
331 state.getResult().addWarning(message, null, line);
332 }
333 //name title
334 String strExpectedNameTitleCache = record.get(colNameTitleCache);
335 String nameTitleCache = result.getName() == null ? null : result.getName().getTitleCache();
336 if (isNotBlank(strExpectedNameTitleCache) && (!strExpectedNameTitleCache.trim().equals(nameTitleCache))){
337 String message = "Name title cache (%s) does not match expected name (%s)";
338 message = String.format(message, nameTitleCache==null? "null":nameTitleCache, strExpectedNameTitleCache);
339 state.getResult().addWarning(message, null, line);
340 }
341 //taxon title cache
342 String strExpectedTaxonTitleCache = record.get(colTaxonTitleCache);
343 String taxonTitleCache = result.getTitleCache();
344 if (isNotBlank(strExpectedTaxonTitleCache) && (!strExpectedTaxonTitleCache.trim().equals(taxonTitleCache))){
345 String message = "Name cache (%s) does not match expected name (%s)";
346 message = String.format(message, taxonTitleCache==null? "null":taxonTitleCache, strExpectedTaxonTitleCache);
347 state.getResult().addWarning(message, null, line);
348 }
349 }
350
351
352 /**
353 * Non transaction save method to retrieve the source reference
354 * if either existent or not in the database (uses check for uuid).
355 *
356 * @param state
357 * @return the source reference
358 */
359 protected Reference getSourceReference(STATE state) {
360
361 Reference sourceRef = state.getSourceReference();
362 if (sourceRef != null){
363 return sourceRef;
364 }
365 UUID uuid = state.getConfig().getSourceRefUuid();
366 if (uuid == null){
367 sourceRef = state.getConfig().getSourceReference();
368 if (sourceRef != null){
369 uuid = sourceRef.getUuid();
370 }
371 }
372 if (uuid != null){
373 Reference existingRef = getReferenceService().find(uuid);
374 if (existingRef != null){
375 sourceRef = existingRef;
376 }
377 // else if (sourceRef != null){
378 // getReferenceService().save(sourceRef);
379 // }
380 }
381 if (sourceRef == null){
382 sourceRef = ReferenceFactory.newGeneric();
383 String title = state.getConfig().getSourceNameString();
384 sourceRef.setTitle(title);
385 state.getConfig().setSourceReference(sourceRef);
386 }
387 state.setSourceReference(sourceRef);
388
389 return sourceRef;
390 }
391 }