Project

General

Profile

Download (13.8 KB) Statistics
| Branch: | Tag: | Revision:
1
/**
2
 * Copyright (C) 2007 EDIT
3
 * European Distributed Institute of Taxonomy
4
 * http://www.e-taxonomy.eu
5
 *
6
 * The contents of this file are subject to the Mozilla Public License Version 1.1
7
 * See LICENSE.TXT at the top of this package for the full license terms.
8
 */
9

    
10
package eu.etaxonomy.cdm.io.excel.common;
11

    
12
import java.io.ByteArrayInputStream;
13
import java.io.FileNotFoundException;
14
import eu.etaxonomy.cdm.common.URI;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.UUID;
18

    
19
import org.apache.commons.lang.StringUtils;
20
import org.apache.logging.log4j.LogManager;import org.apache.logging.log4j.Logger;
21
import org.springframework.transaction.support.DefaultTransactionStatus;
22

    
23
import eu.etaxonomy.cdm.common.CdmUtils;
24
import eu.etaxonomy.cdm.common.ExcelUtils;
25
import eu.etaxonomy.cdm.io.common.CdmImportBase;
26
import eu.etaxonomy.cdm.io.distribution.excelupdate.ExcelDistributionUpdateConfigurator;
27
import eu.etaxonomy.cdm.io.excel.taxa.NormalExplicitImportConfigurator;
28
import eu.etaxonomy.cdm.io.excel.taxa.TaxonListImportConfigurator;
29
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
30
import eu.etaxonomy.cdm.model.common.CdmBase;
31
import eu.etaxonomy.cdm.model.common.TimePeriod;
32
import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
33
import eu.etaxonomy.cdm.model.reference.Reference;
34
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
35
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
36
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
37

    
38
/**
39
 * @author a.babadshanjan
40
 * @since 17.12.2008
41
 */
42
public abstract class ExcelImportBase<STATE extends ExcelImportState<CONFIG, ROW>, CONFIG extends ExcelImportConfiguratorBase, ROW extends ExcelRowBase>
43
        extends CdmImportBase<CONFIG, STATE> {
44

    
45
    private static final long serialVersionUID = 2759164811664484732L;
46
    private static final Logger logger = LogManager.getLogger(ExcelImportBase.class);
47

    
48
	protected static final String SCIENTIFIC_NAME_COLUMN = "ScientificName";
49

    
50
	private List<Map<String, String>> recordList = null;
51

    
52
	private ExcelImportConfiguratorBase configurator = null;
53

    
54

    
55
	/** Reads data from an Excel file and stores them into a CDM DB.
56
     *
57
     * @param config
58
     * @param stores (not used)
59
     */
60
	@Override
61
	protected void doInvoke(STATE state){
62

    
63
		logger.debug("Importing excel data");
64

    
65
		//cleanup state from prior session
66
		state.setSourceReference(null);
67

    
68
    	configurator = state.getConfig();
69

    
70
		NomenclaturalCode nc = getConfigurator().getNomenclaturalCode();
71
		if (nc == null && requiresNomenclaturalCode()) {
72
			logger.error("Nomenclatural code could not be determined. Skip invoke.");
73
			state.setUnsuccessfull();
74
			return;
75
		}
76
		URI source = null;
77

    
78
		byte[] data = null;
79
		// read and save all rows of the excel worksheet
80
		if ((state.getConfig() instanceof NormalExplicitImportConfigurator
81
		        || state.getConfig() instanceof ExcelDistributionUpdateConfigurator
82
		        || state.getConfig() instanceof TaxonListImportConfigurator) && (state.getConfig().getStream() != null)
83
		    ){
84
		    data =  state.getConfig().getStream();
85
		} else{
86
		    source = state.getConfig().getSource();
87
		}
88

    
89
		String sheetName = getWorksheetName(state.getConfig());
90

    
91
		if (data != null){
92
            try {
93
                ByteArrayInputStream stream = new ByteArrayInputStream(data);
94
                recordList = ExcelUtils.parseXLS(stream, sheetName);
95
            } catch (Exception e) {
96
                throw new RuntimeException(e);
97
            }
98
        }else{
99
    		try {
100
    			recordList = ExcelUtils.parseXLS(source, sheetName);
101
    		} catch (FileNotFoundException e) {
102
    			String message = "File not found: " + source;
103
    			warnProgress(state, message, e);
104
    			logger.error(message);
105
    			state.setUnsuccessfull();
106
    			return;
107
    		}
108
        }
109

    
110
    	handleRecordList(state, source);
111
    	logger.debug("End excel data import");
112
    	return;
113
	}
114

    
115
	protected boolean requiresNomenclaturalCode() {
116
		return true;
117
	}
118

    
119
	private void handleRecordList(STATE state, URI source) {
120
		Integer startingLine = 2;
121
		if (recordList != null) {
122
    		Map<String,String> record = null;
123

    
124
    		state.setTransactionStatus(startTransaction());
125

    
126
    		//first pass
127
    		state.setCurrentLine(startingLine);
128
    		for (int i = 0; i < recordList.size(); i++) {
129
    			record = recordList.get(i);
130
    			analyzeRecord(record, state);
131
    			state.setOriginalRecord(record);
132
    			try {
133
					firstPass(state);
134
					//for debugging only
135
//					if (i % 1000 == 0){
136
//					    try {
137
//                            System.out.println(i);
138
//					        getSession().flush();
139
//                        } catch (Exception e) {
140
//                            e.printStackTrace();
141
//                        }
142
//					}
143
					DefaultTransactionStatus defStatus = (DefaultTransactionStatus) state.getTransactionStatus();
144
			        if (defStatus.isRollbackOnly()){
145
			            logger.warn("Rollback only in line: " + i);
146
			        }
147
				} catch (Exception e) {
148
					e.printStackTrace();
149
				}finally{
150
					state.incCurrentLine();
151
				}
152
    		}
153
    		//second pass
154
    		state.setCurrentLine(startingLine);
155
    		for (int i = 0; i < recordList.size(); i++) {
156
    			record = recordList.get(i);
157
    			analyzeRecord(record, state);
158
    			state.setOriginalRecord(record);
159
                secondPass(state);
160
    			state.incCurrentLine();
161
    	   	}
162
    		if (configurator.isDeduplicateReferences()){
163
    		    getReferenceService().deduplicate(Reference.class, null, null);
164
    		}
165
    		if (configurator.isDeduplicateAuthors()){
166
                getAgentService().deduplicate(TeamOrPersonBase.class, null, null);
167
            }
168
    		commitTransaction(state.getTransactionStatus());
169
    	}else{
170
    		logger.warn("No records found in " + source);
171
    	}
172
		return;
173
	}
174

    
175
	/**
176
	 * To define a worksheet name other then the one defined in the configurator
177
	 * override this method with a non <code>null</code> return value.
178
	 * If <code>null</code> is returned the first worksheet is taken.
179

    
180
	 * @return worksheet name. <code>null</null> if no worksheet is defined.
181
	 */
182
	protected String getWorksheetName(CONFIG config) {
183
		return config.getWorksheetName();
184
	}
185

    
186
	@Override
187
	protected boolean doCheck(STATE state) {
188
		boolean result = true;
189
		logger.warn("No check implemented for Excel import");
190
		return result;
191
	}
192

    
193
	/**
194
	 *
195
	 *
196
	 * @param record
197
	 * @return
198
	 */
199
	protected abstract void analyzeRecord(Map<String,String> record, STATE state);
200

    
201
	protected abstract void firstPass(STATE state);
202
	protected abstract void secondPass(STATE state);
203

    
204

    
205
	public ExcelImportConfiguratorBase getConfigurator() {
206
		return configurator;
207
	}
208

    
209

    
210
	protected int floatString2IntValue(String value) {
211
		int intValue = 0;
212
		try {
213
			Float fobj = new Float(Float.parseFloat(value));
214
			intValue = fobj.intValue();
215
			if (logger.isDebugEnabled()) { logger.debug("Value formatted: " + intValue); }
216
		} catch (NumberFormatException ex) {
217
			logger.error(value + " is not an integer");
218
		}
219
		return intValue;
220
	}
221

    
222
	protected String floatString2IntStringValue(String value) {
223
		int i = floatString2IntValue(value);
224
		return String.valueOf(i);
225
	}
226

    
227

    
228
	/**
229
	 * @param start
230
	 * @param end
231
	 * @return
232
	 */
233
	protected TimePeriod getTimePeriod(String start, String end) {
234
		String strPeriod = CdmUtils.concat(" - ", start, end);
235
		TimePeriod result = TimePeriodParser.parseString(strPeriod);
236
		return result;
237
	}
238

    
239
    /**
240
     * Returns the value of the record map for the given key.
241
     * The value is trimmed and empty values are set to <code>null</code>.
242
     * @param record
243
     * @param originalKey
244
     * @return the value
245
     */
246
    protected static String getValue(Map<String, String> record, String originalKey) {
247
        String value = record.get(originalKey);
248
        if (! StringUtils.isBlank(value)) {
249
            if (logger.isDebugEnabled()) { logger.debug(originalKey + ": " + value); }
250
            value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
251
            return value;
252
        }else{
253
            return null;
254
        }
255
    }
256

    
257
    protected String getValue(STATE state, String key){
258
        key = state.getConfig().replaceColumnLabel(key);
259
        return getValue(state.getOriginalRecord(), key);
260
    }
261

    
262
    /**
263
     * Returns the taxon for the given CDM uuid. If no taxon exists for the given id
264
     * no record is returned. If a name cache, name title cache (full name) or
265
     * taxon title cache column is given the name is checked against the given columns.
266
     * If they don't manage it is logged as a warning in import result.
267
     * <BR>If clazz is given, only objects of the given class are loaded.
268
     *
269
     *
270
     * @param state
271
     * @param colTaxonUuid taxon uuid column
272
     * @param colNameCache name cache column (if exists)
273
     * @param colNameTitleCache name title cache column (if exists)
274
     * @param colTaxonTitleCache taxon title cache column (if exists)
275
     * @param clazz the clazz null
276
     * @param line the row, for debug information
277
     * @return the taxon to load
278
     */
279
    protected <T extends TaxonBase<?>> T getTaxonByCdmId(STATE state, String colTaxonUuid,
280
            String colNameCache, String colNameTitleCache, String colTaxonTitleCache,
281
            Class<T> clazz, String line) {
282

    
283
        Map<String, String> record = getRecord(state);
284
        String strUuidTaxon = record.get(colTaxonUuid);
285
        if (strUuidTaxon != null){
286
            UUID uuidTaxon;
287
            try {
288
                uuidTaxon = UUID.fromString(strUuidTaxon);
289
            } catch (Exception e) {
290
                state.getResult().addError("Taxon uuid has incorrect format. Taxon could not be loaded. Data not imported.", null, line);
291
                return null;
292
            }
293
            TaxonBase<?> result = getTaxonService().find(uuidTaxon);
294
            //TODO load only objects of correct class
295
            if (result != null && clazz != null && !result.isInstanceOf(clazz)){
296
                result = null;
297
            }
298

    
299

    
300
            if (result == null){
301
                state.getResult().addError("Taxon for uuid  "+strUuidTaxon+" could not be found in database. "
302
                        + "Taxon could not be loaded. Data not imported.", null, line);
303
            }else{
304
                verifyName(state, colNameCache, colNameTitleCache, colTaxonTitleCache, line, record, result);
305
            }
306
            result = CdmBase.deproxy(result, clazz);
307

    
308

    
309
            return CdmBase.deproxy(result, clazz);
310
        }else{
311
            String message = "No taxon identifier column found";
312
            state.getResult().addWarning(message, null, line);
313
            return null;
314
        }
315
    }
316

    
317
    protected Map<String, String> getRecord(STATE state) {
318
        Map<String, String> record = state.getOriginalRecord();
319
        return record;
320
    }
321

    
322

    
323
    /**
324
     * @see #getTaxonByCdmId(ExcelImportState, String, String, String, String, Class, String)
325
     */
326
    protected void verifyName(STATE state, String colNameCache, String colNameTitleCache, String colTaxonTitleCache,
327
            String line, Map<String, String> record, TaxonBase<?> result) {
328
        //nameCache
329
        String strExpectedNameCache = record.get(colNameCache);
330
        String nameCache = result.getName() == null ? null : result.getName().getNameCache();
331
        if (isNotBlank(strExpectedNameCache) && (!strExpectedNameCache.trim().equals(nameCache))){
332
            String message = "Name cache (%s) does not match expected name (%s)";
333
            message = String.format(message, nameCache==null? "null":nameCache, strExpectedNameCache);
334
            state.getResult().addWarning(message, null, line);
335
        }
336
        //name title
337
        String strExpectedNameTitleCache = record.get(colNameTitleCache);
338
        String nameTitleCache = result.getName() == null ? null : result.getName().getTitleCache();
339
        if (isNotBlank(strExpectedNameTitleCache) && (!strExpectedNameTitleCache.trim().equals(nameTitleCache))){
340
            String message = "Name title cache (%s) does not match expected name (%s)";
341
            message = String.format(message, nameTitleCache==null? "null":nameTitleCache, strExpectedNameTitleCache);
342
            state.getResult().addWarning(message, null, line);
343
        }
344
        //taxon title cache
345
        String strExpectedTaxonTitleCache = record.get(colTaxonTitleCache);
346
        String taxonTitleCache = result.getTitleCache();
347
        if (isNotBlank(strExpectedTaxonTitleCache) && (!strExpectedTaxonTitleCache.trim().equals(taxonTitleCache))){
348
            String message = "Name cache (%s) does not match expected name (%s)";
349
            message = String.format(message, taxonTitleCache==null? "null":taxonTitleCache, strExpectedTaxonTitleCache);
350
            state.getResult().addWarning(message, null, line);
351
        }
352
    }
353

    
354

    
355
    /**
356
     * Non transaction save method to retrieve the source reference
357
     * if either existent or not in the database (uses check for uuid).
358
     *
359
     * @param state
360
     * @return the source reference
361
     */
362
    protected Reference getSourceReference(STATE state) {
363

    
364
        Reference sourceRef = state.getSourceReference();
365
        if (sourceRef != null){
366
            return sourceRef;
367
        }
368
        UUID uuid = state.getConfig().getSourceRefUuid();
369
        if (uuid == null){
370
            sourceRef = state.getConfig().getSourceReference();
371
            if (sourceRef != null){
372
                uuid = sourceRef.getUuid();
373
            }
374
        }
375
        if (uuid != null){
376
            Reference existingRef = getReferenceService().find(uuid);
377
            if (existingRef != null){
378
                sourceRef = existingRef;
379
            }
380
//            else if (sourceRef != null){
381
//                getReferenceService().save(sourceRef);
382
//            }
383
        }
384
        if (sourceRef == null){
385
            sourceRef = ReferenceFactory.newGeneric();
386
            String title = state.getConfig().getSourceNameString();
387
            sourceRef.setTitle(title);
388
            state.getConfig().setSourceReference(sourceRef);
389
        }
390
        state.setSourceReference(sourceRef);
391

    
392
        return sourceRef;
393
    }
394
}
(1-1/5)