2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.excel
.common
;
12 import java
.io
.ByteArrayInputStream
;
13 import java
.io
.FileNotFoundException
;
15 import java
.util
.List
;
17 import java
.util
.UUID
;
19 import org
.apache
.commons
.lang
.StringUtils
;
20 import org
.apache
.log4j
.Logger
;
21 import org
.springframework
.transaction
.support
.DefaultTransactionStatus
;
23 import eu
.etaxonomy
.cdm
.common
.CdmUtils
;
24 import eu
.etaxonomy
.cdm
.common
.ExcelUtils
;
25 import eu
.etaxonomy
.cdm
.io
.common
.CdmImportBase
;
26 import eu
.etaxonomy
.cdm
.io
.distribution
.excelupdate
.ExcelDistributionUpdateConfigurator
;
27 import eu
.etaxonomy
.cdm
.io
.excel
.taxa
.NormalExplicitImportConfigurator
;
28 import eu
.etaxonomy
.cdm
.io
.excel
.taxa
.TaxonListImportConfigurator
;
29 import eu
.etaxonomy
.cdm
.model
.agent
.TeamOrPersonBase
;
30 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
31 import eu
.etaxonomy
.cdm
.model
.common
.TimePeriod
;
32 import eu
.etaxonomy
.cdm
.model
.name
.NomenclaturalCode
;
33 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
34 import eu
.etaxonomy
.cdm
.model
.reference
.ReferenceFactory
;
35 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
36 import eu
.etaxonomy
.cdm
.strategy
.parser
.TimePeriodParser
;
39 * @author a.babadshanjan
42 public abstract class ExcelImportBase
<STATE
extends ExcelImportState
<CONFIG
, ROW
>, CONFIG
extends ExcelImportConfiguratorBase
, ROW
extends ExcelRowBase
>
43 extends CdmImportBase
<CONFIG
, STATE
> {
45 private static final long serialVersionUID
= 2759164811664484732L;
46 private static final Logger logger
= Logger
.getLogger(ExcelImportBase
.class);
48 protected static final String SCIENTIFIC_NAME_COLUMN
= "ScientificName";
50 private List
<Map
<String
, String
>> recordList
= null;
52 private ExcelImportConfiguratorBase configurator
= null;
55 /** Reads data from an Excel file and stores them into a CDM DB.
58 * @param stores (not used)
61 protected void doInvoke(STATE state
){
63 logger
.debug("Importing excel data");
65 configurator
= state
.getConfig();
67 NomenclaturalCode nc
= getConfigurator().getNomenclaturalCode();
68 if (nc
== null && requiresNomenclaturalCode()) {
69 logger
.error("Nomenclatural code could not be determined. Skip invoke.");
70 state
.setUnsuccessfull();
76 // read and save all rows of the excel worksheet
77 if ((state
.getConfig() instanceof NormalExplicitImportConfigurator
78 || state
.getConfig() instanceof ExcelDistributionUpdateConfigurator
79 || state
.getConfig() instanceof TaxonListImportConfigurator
) && (state
.getConfig().getStream() != null
80 || state
.getConfig().getStream() != null)){
81 data
= state
.getConfig().getStream();
83 source
= state
.getConfig().getSource();
86 String sheetName
= getWorksheetName(state
.getConfig());
90 ByteArrayInputStream stream
= new ByteArrayInputStream(data
);
91 recordList
= ExcelUtils
.parseXLS(stream
, sheetName
);
92 } catch (Exception e
) {
93 throw new RuntimeException(e
);
97 recordList
= ExcelUtils
.parseXLS(source
, sheetName
);
98 } catch (FileNotFoundException e
) {
99 String message
= "File not found: " + source
;
100 warnProgress(state
, message
, e
);
101 logger
.error(message
);
102 state
.setUnsuccessfull();
107 handleRecordList(state
, source
);
108 logger
.debug("End excel data import");
112 protected boolean requiresNomenclaturalCode() {
116 private void handleRecordList(STATE state
, URI source
) {
117 Integer startingLine
= 2;
118 if (recordList
!= null) {
119 Map
<String
,String
> record
= null;
121 state
.setTransactionStatus(startTransaction());
124 state
.setCurrentLine(startingLine
);
125 for (int i
= 0; i
< recordList
.size(); i
++) {
126 record
= recordList
.get(i
);
127 analyzeRecord(record
, state
);
128 state
.setOriginalRecord(record
);
132 // if (i % 1000 == 0){
134 // System.out.println(i);
135 // getSession().flush();
136 // } catch (Exception e) {
137 // e.printStackTrace();
140 DefaultTransactionStatus defStatus
= (DefaultTransactionStatus
) state
.getTransactionStatus();
141 if (defStatus
.isRollbackOnly()){
142 logger
.warn("Rollback only in line: " + i
);
144 } catch (Exception e
) {
147 state
.incCurrentLine();
151 state
.setCurrentLine(startingLine
);
152 for (int i
= 0; i
< recordList
.size(); i
++) {
153 record
= recordList
.get(i
);
154 analyzeRecord(record
, state
);
155 state
.setOriginalRecord(record
);
157 state
.incCurrentLine();
159 if (configurator
.isDeduplicateReferences()){
160 getReferenceService().deduplicate(Reference
.class, null, null);
162 if (configurator
.isDeduplicateAuthors()){
163 getAgentService().deduplicate(TeamOrPersonBase
.class, null, null);
165 commitTransaction(state
.getTransactionStatus());
167 logger
.warn("No records found in " + source
);
173 * To define a worksheet name other then the one defined in the configurator
174 * override this method with a non <code>null</code> return value.
175 * If <code>null</code> is returned the first worksheet is taken.
177 * @return worksheet name. <code>null</null> if no worksheet is defined.
179 protected String
getWorksheetName(CONFIG config
) {
180 return config
.getWorksheetName();
184 protected boolean doCheck(STATE state
) {
185 boolean result
= true;
186 logger
.warn("No check implemented for Excel import");
196 protected abstract void analyzeRecord(Map
<String
,String
> record
, STATE state
);
198 protected abstract void firstPass(STATE state
);
199 protected abstract void secondPass(STATE state
);
202 public ExcelImportConfiguratorBase
getConfigurator() {
207 protected int floatString2IntValue(String value
) {
210 Float fobj
= new Float(Float
.parseFloat(value
));
211 intValue
= fobj
.intValue();
212 if (logger
.isDebugEnabled()) { logger
.debug("Value formatted: " + intValue
); }
213 } catch (NumberFormatException ex
) {
214 logger
.error(value
+ " is not an integer");
219 protected String
floatString2IntStringValue(String value
) {
220 int i
= floatString2IntValue(value
);
221 return String
.valueOf(i
);
230 protected TimePeriod
getTimePeriod(String start
, String end
) {
231 String strPeriod
= CdmUtils
.concat(" - ", start
, end
);
232 TimePeriod result
= TimePeriodParser
.parseString(strPeriod
);
237 * Returns the value of the record map for the given key.
238 * The value is trimmed and empty values are set to <code>null</code>.
243 protected static String
getValue(Map
<String
, String
> record
, String originalKey
) {
244 String value
= record
.get(originalKey
);
245 if (! StringUtils
.isBlank(value
)) {
246 if (logger
.isDebugEnabled()) { logger
.debug(originalKey
+ ": " + value
); }
247 value
= CdmUtils
.removeDuplicateWhitespace(value
.trim()).toString();
254 protected String
getValue(STATE state
, String key
){
255 key
= state
.getConfig().replaceColumnLabel(key
);
256 return getValue(state
.getOriginalRecord(), key
);
260 * Returns the taxon for the given CDM uuid. If no taxon exists for the given id
261 * no record is returned. If a name cache, name title cache (full name) or
262 * taxon title cache column is given the name is checked against the given columns.
263 * If they don't manage it is logged as a warning in import result.
264 * <BR>If clazz is given, only objects of the given class are loaded.
268 * @param colTaxonUuid taxon uuid column
269 * @param colNameCache name cache column (if exists)
270 * @param colNameTitleCache name title cache column (if exists)
271 * @param colTaxonTitleCache taxon title cache column (if exists)
272 * @param clazz the clazz null
273 * @param line the row, for debug information
274 * @return the taxon to load
276 protected <T
extends TaxonBase
<?
>> T
getTaxonByCdmId(STATE state
, String colTaxonUuid
,
277 String colNameCache
, String colNameTitleCache
, String colTaxonTitleCache
,
278 Class
<T
> clazz
, String line
) {
280 Map
<String
, String
> record
= getRecord(state
);
281 String strUuidTaxon
= record
.get(colTaxonUuid
);
282 if (strUuidTaxon
!= null){
285 uuidTaxon
= UUID
.fromString(strUuidTaxon
);
286 } catch (Exception e
) {
287 state
.getResult().addError("Taxon uuid has incorrect format. Taxon could not be loaded. Data not imported.", null, line
);
290 TaxonBase
<?
> result
= getTaxonService().find(uuidTaxon
);
291 //TODO load only objects of correct class
292 if (result
!= null && clazz
!= null && !result
.isInstanceOf(clazz
)){
298 state
.getResult().addError("Taxon for uuid "+strUuidTaxon
+" could not be found in database. "
299 + "Taxon could not be loaded. Data not imported.", null, line
);
301 verifyName(state
, colNameCache
, colNameTitleCache
, colTaxonTitleCache
, line
, record
, result
);
303 result
= CdmBase
.deproxy(result
, clazz
);
306 return CdmBase
.deproxy(result
, clazz
);
308 String message
= "No taxon identifier column found";
309 state
.getResult().addWarning(message
, null, line
);
314 protected Map
<String
, String
> getRecord(STATE state
) {
315 Map
<String
, String
> record
= state
.getOriginalRecord();
321 * @see #getTaxonByCdmId(ExcelImportState, String, String, String, String, Class, String)
323 protected void verifyName(STATE state
, String colNameCache
, String colNameTitleCache
, String colTaxonTitleCache
,
324 String line
, Map
<String
, String
> record
, TaxonBase
<?
> result
) {
326 String strExpectedNameCache
= record
.get(colNameCache
);
327 String nameCache
= result
.getName() == null ?
null : result
.getName().getNameCache();
328 if (isNotBlank(strExpectedNameCache
) && (!strExpectedNameCache
.trim().equals(nameCache
))){
329 String message
= "Name cache (%s) does not match expected name (%s)";
330 message
= String
.format(message
, nameCache
==null?
"null":nameCache
, strExpectedNameCache
);
331 state
.getResult().addWarning(message
, null, line
);
334 String strExpectedNameTitleCache
= record
.get(colNameTitleCache
);
335 String nameTitleCache
= result
.getName() == null ?
null : result
.getName().getTitleCache();
336 if (isNotBlank(strExpectedNameTitleCache
) && (!strExpectedNameTitleCache
.trim().equals(nameTitleCache
))){
337 String message
= "Name title cache (%s) does not match expected name (%s)";
338 message
= String
.format(message
, nameTitleCache
==null?
"null":nameTitleCache
, strExpectedNameTitleCache
);
339 state
.getResult().addWarning(message
, null, line
);
342 String strExpectedTaxonTitleCache
= record
.get(colTaxonTitleCache
);
343 String taxonTitleCache
= result
.getTitleCache();
344 if (isNotBlank(strExpectedTaxonTitleCache
) && (!strExpectedTaxonTitleCache
.trim().equals(taxonTitleCache
))){
345 String message
= "Name cache (%s) does not match expected name (%s)";
346 message
= String
.format(message
, taxonTitleCache
==null?
"null":taxonTitleCache
, strExpectedTaxonTitleCache
);
347 state
.getResult().addWarning(message
, null, line
);
353 * Non transaction save method to retrieve the source reference
354 * if either existent or not in the database (uses check for uuid).
357 * @return the source reference
359 protected Reference
getSourceReference(STATE state
) {
361 Reference sourceRef
= state
.getSourceReference();
362 if (sourceRef
!= null){
365 UUID uuid
= state
.getConfig().getSourceRefUuid();
367 sourceRef
= state
.getConfig().getSourceReference();
368 if (sourceRef
!= null){
369 uuid
= sourceRef
.getUuid();
373 Reference existingRef
= getReferenceService().find(uuid
);
374 if (existingRef
!= null){
375 sourceRef
= existingRef
;
377 // else if (sourceRef != null){
378 // getReferenceService().save(sourceRef);
381 if (sourceRef
== null){
382 sourceRef
= ReferenceFactory
.newGeneric();
383 String title
= state
.getConfig().getSourceNameString();
384 sourceRef
.setTitle(title
);
385 state
.getConfig().setSourceReference(sourceRef
);
387 state
.setSourceReference(sourceRef
);