add generic config to ExcelImportState
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / distribution / DistributionImport.java
1 /**
2 * Copyright (C) 2008 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.excel.distribution;
10
11 import java.io.FileNotFoundException;
12 import java.net.URI;
13 import java.util.ArrayList;
14 import java.util.HashMap;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18
19 import org.apache.log4j.Logger;
20 import org.springframework.stereotype.Component;
21 import org.springframework.transaction.TransactionStatus;
22
23 import eu.etaxonomy.cdm.api.service.config.IIdentifiableEntityServiceConfigurator;
24 import eu.etaxonomy.cdm.api.service.config.impl.IdentifiableServiceConfiguratorImpl;
25 import eu.etaxonomy.cdm.common.CdmUtils;
26 import eu.etaxonomy.cdm.common.ExcelUtils;
27 import eu.etaxonomy.cdm.io.common.CdmIoBase;
28 import eu.etaxonomy.cdm.io.common.ICdmIO;
29 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
30 import eu.etaxonomy.cdm.io.common.MapWrapper;
31 import eu.etaxonomy.cdm.io.excel.common.ExcelImportConfiguratorBase;
32 import eu.etaxonomy.cdm.io.excel.common.ExcelImportState;
33 import eu.etaxonomy.cdm.model.common.CdmBase;
34 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
35 import eu.etaxonomy.cdm.model.description.Distribution;
36 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
37 import eu.etaxonomy.cdm.model.description.PresenceTerm;
38 import eu.etaxonomy.cdm.model.description.TaxonDescription;
39 import eu.etaxonomy.cdm.model.location.NamedArea;
40 import eu.etaxonomy.cdm.model.location.TdwgArea;
41 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
42 import eu.etaxonomy.cdm.model.taxon.Taxon;
43 import eu.etaxonomy.cdm.persistence.query.MatchMode;
44
45 /**
46 * @author a.babadshanjan
47 * @created 10.11.2008
48 * @version 1.0
49 */
50 @Component
51 public class DistributionImport extends CdmIoBase<ExcelImportState<ExcelImportConfiguratorBase>> implements ICdmIO<ExcelImportState<ExcelImportConfiguratorBase>> {
52
53 /* used */
54 private static final String EDIT_NAME_COLUMN = "EDIT";
55 private static final String TDWG_DISTRIBUTION_COLUMN = "TDWG";
56 private static final String STATUS_COLUMN = "Status";
57 /* not used */
58 // private static final String LITERATURE_NUMBER_COLUMN = "Lit.";
59 // private static final String LITERATURE_COLUMN = "Literature";
60 // private static final String VERNACULAR_NAME_COLUMN = "Vernacular";
61 // private static final String HABITAT_COLUMN = "Habitat";
62 // private static final String CONTROL_COLUMN = "Control";
63 // private static final String TRANSLATED_COLUMN = "Translated";
64 // private static final String ISO_DISTRIBUTION_COLUMN = "ISO";
65 // private static final String NOTES_COLUMN = "Notes";
66 // private static final String PAGE_NUMBER_COLUMN = "Page";
67 // private static final String INFO_COLUMN = "Info";
68
69 private static final Logger logger = Logger.getLogger(DistributionImport.class);
70
71 // Stores already processed descriptions
72 Map<Taxon, TaxonDescription> myDescriptions = new HashMap<Taxon, TaxonDescription>();
73
74 @Override
75 protected boolean doInvoke(ExcelImportState<ExcelImportConfiguratorBase> state) {
76
77 if (logger.isDebugEnabled()) { logger.debug("Importing distribution data"); }
78
79 // read and save all rows of the excel worksheet
80 ArrayList<HashMap<String, String>> recordList;
81 URI source = state.getConfig().getSource();
82 try{
83 recordList = ExcelUtils.parseXLS(source);
84 } catch (FileNotFoundException e) {
85 String message = "File not found: " + source;
86 warnProgress(state, message, e);
87 logger.error(message);
88 return false;
89 }
90 if (recordList != null) {
91 HashMap<String,String> record = null;
92 TransactionStatus txStatus = startTransaction();
93
94 for (int i = 0; i < recordList.size(); i++) {
95 record = recordList.get(i);
96 analyzeRecord(record);
97 }
98 commitTransaction(txStatus);
99 }
100
101 try {
102 if (logger.isDebugEnabled()) { logger.debug("End distribution data import"); }
103
104 } catch (Exception e) {
105 logger.error("Error closing the application context");
106 e.printStackTrace();
107 }
108
109 return true;
110 }
111
112
113 /**
114 * Reads the data of one Excel sheet row
115 */
116 private void analyzeRecord(HashMap<String,String> record) {
117 /*
118 * Relevant columns:
119 * Name (EDIT)
120 * Distribution TDWG
121 * Status (only entries if not native)
122 * Literature number
123 * Literature
124 */
125
126 String editName = "";
127 ArrayList<String> distributionList = new ArrayList<String>();
128 String status = "";
129 String literatureNumber = "";
130 String literature = "";
131
132 Set<String> keys = record.keySet();
133
134 for (String key: keys) {
135
136 String value = (String) record.get(key);
137 if (!value.equals("")) {
138 if (logger.isDebugEnabled()) { logger.debug(key + ": '" + value + "'"); }
139 }
140
141 if (key.contains(EDIT_NAME_COLUMN)) {
142 editName = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
143
144 } else if(key.contains(TDWG_DISTRIBUTION_COLUMN)) {
145 distributionList = CdmUtils.buildList(value);
146
147 } else if(key.contains(STATUS_COLUMN)) {
148 status = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
149
150 // } else if(key.contains(LITERATURE_NUMBER_COLUMN)) {
151 // literatureNumber = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
152 //
153 // } else if(key.contains(LITERATURE_COLUMN)) {
154 // literature = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
155 //
156 } else {
157 //logger.warn("Column " + key + " ignored");
158 }
159 }
160
161 // Store the data of this record in the DB
162 if (!editName.equals("")) {
163 saveRecord(editName, distributionList, status, literatureNumber, literature);
164 }
165 }
166
167
168 /**
169 * Stores the data of one Excel sheet row in the database
170 */
171 private void saveRecord(String taxonName, ArrayList<String> distributionList,
172 String status, String literatureNumber, String literature) {
173
174 IdentifiableServiceConfiguratorImpl config =
175 IdentifiableServiceConfiguratorImpl.NewInstance();
176 config.setTitleSearchString(taxonName);
177 config.setMatchMode(MatchMode.BEGINNING);
178
179 try {
180 // get the matching names from the DB
181 //List<TaxonNameBase> taxonNameBases = getNameService().findByTitle(config);
182 List<TaxonNameBase<?,?>> taxonNameBases = getNameService().findNamesByTitle(taxonName);
183 if (taxonNameBases.isEmpty()) {
184 logger.error("Taxon name '" + taxonName + "' not found in DB");
185 } else {
186 if (logger.isDebugEnabled()) { logger.debug("Taxon found"); }
187 }
188
189 // get the taxa for the matching names
190 for(TaxonNameBase<?,?> dbTaxonName: taxonNameBases) {
191
192 Set<Taxon> taxa = dbTaxonName.getTaxa();
193 if (taxa.isEmpty()) {
194 logger.warn("No taxon found for name '" + taxonName + "'");
195 } else if (taxa.size() > 1) {
196 logger.warn("More than one taxa found for name '" + taxonName + "'");
197 }
198
199 for(Taxon taxon: taxa) {
200
201 TaxonDescription myDescription = null;
202
203 // If we have created a description for this taxon earlier, take this one.
204 // Otherwise, create a new description.
205 // We don't update any existing descriptions in the database at this point.
206 if (myDescriptions.containsKey(taxon)) {
207 myDescription = myDescriptions.get(taxon);
208 } else {
209 myDescription = TaxonDescription.NewInstance(taxon);
210 taxon.addDescription(myDescription);
211 myDescriptions.put(taxon, myDescription);
212 }
213
214 // Status
215 PresenceAbsenceTermBase<?> presenceAbsenceStatus = PresenceTerm.NewInstance();
216 if (status.equals("")) {
217 presenceAbsenceStatus = PresenceTerm.NATIVE();
218 } else {
219 presenceAbsenceStatus = PresenceTerm.getPresenceTermByAbbreviation(status);
220 }
221 // TODO: Handle absence case.
222 // This case has not yet occurred in the excel input file, though.
223
224 /* Set to true if taxon needs to be saved if at least one new distribution exists */
225 boolean save = false;
226
227 // TDWG areas
228 for (String distribution: distributionList) {
229
230 /* Set to true if this distribution is a new one*/
231 boolean ignore = false;
232
233 if(!distribution.equals("")) {
234 NamedArea namedArea = TdwgArea.getAreaByTdwgAbbreviation(distribution);
235 TaxonDescription taxonDescription = myDescriptions.get(taxon);
236 if (namedArea != null) {
237 // Check against existing distributions and ignore the ones that occur multiple times
238 Set<DescriptionElementBase> myDescriptionElements = taxonDescription.getElements();
239 for(DescriptionElementBase descriptionElement : myDescriptionElements) {
240 if (descriptionElement instanceof Distribution) {
241 if (namedArea == ((Distribution)descriptionElement).getArea()) {
242 ignore = true;
243 if (logger.isDebugEnabled()) {
244 logger.debug("Distribution ignored: " + distribution);
245 }
246 break;
247 }
248 }
249 }
250 // Create new distribution if not yet exist
251 if (ignore == false) {
252 save = true;
253 Distribution newDistribution = Distribution.NewInstance(namedArea, presenceAbsenceStatus);
254 myDescription.addElement(newDistribution);
255 if (logger.isDebugEnabled()) {
256 logger.debug("Distribution created: " + newDistribution.toString());
257 }
258 }
259 }
260 }
261 }
262 if (save == true) {
263 getTaxonService().save(taxon);
264 if (logger.isDebugEnabled()) { logger.debug("Taxon saved"); }
265 }
266 }
267 }
268 } catch (Exception e) {
269 logger.error("Error");
270 e.printStackTrace();
271 }
272 }
273
274
275 @Override
276 protected boolean doCheck(ExcelImportState state) {
277 boolean result = true;
278 logger.warn("No check implemented for distribution data import");
279 return result;
280 }
281
282
283 @Override
284 protected boolean isIgnore(ExcelImportState state) {
285 return false;
286 }
287
288 }