major refactoring in io-layer (config -> state)
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / distribution / DistributionImport.java
1 /**
2 * Copyright (C) 2008 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.excel.distribution;
10
11 import java.io.FileNotFoundException;
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Set;
17
18 import org.apache.log4j.Logger;
19 import org.springframework.stereotype.Component;
20 import org.springframework.transaction.TransactionStatus;
21
22 import eu.etaxonomy.cdm.api.service.config.IIdentifiableEntityServiceConfigurator;
23 import eu.etaxonomy.cdm.api.service.config.impl.IdentifiableServiceConfiguratorImpl;
24 import eu.etaxonomy.cdm.common.CdmUtils;
25 import eu.etaxonomy.cdm.common.ExcelUtils;
26 import eu.etaxonomy.cdm.io.common.CdmIoBase;
27 import eu.etaxonomy.cdm.io.common.ICdmIO;
28 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
29 import eu.etaxonomy.cdm.io.common.MapWrapper;
30 import eu.etaxonomy.cdm.io.excel.common.ExcelImportState;
31 import eu.etaxonomy.cdm.model.common.CdmBase;
32 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
33 import eu.etaxonomy.cdm.model.description.Distribution;
34 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
35 import eu.etaxonomy.cdm.model.description.PresenceTerm;
36 import eu.etaxonomy.cdm.model.description.TaxonDescription;
37 import eu.etaxonomy.cdm.model.location.NamedArea;
38 import eu.etaxonomy.cdm.model.location.TdwgArea;
39 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
40 import eu.etaxonomy.cdm.model.taxon.Taxon;
41 import eu.etaxonomy.cdm.persistence.query.MatchMode;
42
43 /**
44 * @author a.babadshanjan
45 * @created 10.11.2008
46 * @version 1.0
47 */
48 @Component
49 public class DistributionImport extends CdmIoBase<ExcelImportState> implements ICdmIO<ExcelImportState> {
50
51 /* used */
52 private static final String EDIT_NAME_COLUMN = "EDIT";
53 private static final String TDWG_DISTRIBUTION_COLUMN = "TDWG";
54 private static final String STATUS_COLUMN = "Status";
55 /* not used */
56 // private static final String LITERATURE_NUMBER_COLUMN = "Lit.";
57 // private static final String LITERATURE_COLUMN = "Literature";
58 // private static final String VERNACULAR_NAME_COLUMN = "Vernacular";
59 // private static final String HABITAT_COLUMN = "Habitat";
60 // private static final String CONTROL_COLUMN = "Control";
61 // private static final String TRANSLATED_COLUMN = "Translated";
62 // private static final String ISO_DISTRIBUTION_COLUMN = "ISO";
63 // private static final String NOTES_COLUMN = "Notes";
64 // private static final String PAGE_NUMBER_COLUMN = "Page";
65 // private static final String INFO_COLUMN = "Info";
66
67 private static final Logger logger = Logger.getLogger(DistributionImport.class);
68
69 // Stores already processed descriptions
70 Map<Taxon, TaxonDescription> myDescriptions = new HashMap<Taxon, TaxonDescription>();
71
72 @Override
73 protected boolean doInvoke(ExcelImportState state) {
74
75 if (logger.isDebugEnabled()) { logger.debug("Importing distribution data"); }
76
77 // read and save all rows of the excel worksheet
78 ArrayList<HashMap<String, String>> recordList;
79 try{
80 recordList = ExcelUtils.parseXLS(state.getConfig().getSourceNameString());
81 } catch (FileNotFoundException e1) {
82 logger.error("File not found: " + (String)state.getConfig().getSource());
83 return false;
84 }
85 if (recordList != null) {
86 HashMap<String,String> record = null;
87 TransactionStatus txStatus = startTransaction();
88
89 for (int i = 0; i < recordList.size(); i++) {
90 record = recordList.get(i);
91 analyzeRecord(record);
92 }
93 commitTransaction(txStatus);
94 }
95
96 try {
97 if (logger.isDebugEnabled()) { logger.debug("End distribution data import"); }
98
99 } catch (Exception e) {
100 logger.error("Error closing the application context");
101 e.printStackTrace();
102 }
103
104 return true;
105 }
106
107
108 /**
109 * Reads the data of one Excel sheet row
110 */
111 private void analyzeRecord(HashMap<String,String> record) {
112 /*
113 * Relevant columns:
114 * Name (EDIT)
115 * Distribution TDWG
116 * Status (only entries if not native)
117 * Literature number
118 * Literature
119 */
120
121 String editName = "";
122 ArrayList<String> distributionList = new ArrayList<String>();
123 String status = "";
124 String literatureNumber = "";
125 String literature = "";
126
127 Set<String> keys = record.keySet();
128
129 for (String key: keys) {
130
131 String value = (String) record.get(key);
132 if (!value.equals("")) {
133 if (logger.isDebugEnabled()) { logger.debug(key + ": '" + value + "'"); }
134 }
135
136 if (key.contains(EDIT_NAME_COLUMN)) {
137 editName = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
138
139 } else if(key.contains(TDWG_DISTRIBUTION_COLUMN)) {
140 distributionList = CdmUtils.buildList(value);
141
142 } else if(key.contains(STATUS_COLUMN)) {
143 status = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
144
145 // } else if(key.contains(LITERATURE_NUMBER_COLUMN)) {
146 // literatureNumber = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
147 //
148 // } else if(key.contains(LITERATURE_COLUMN)) {
149 // literature = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
150 //
151 } else {
152 //logger.warn("Column " + key + " ignored");
153 }
154 }
155
156 // Store the data of this record in the DB
157 if (!editName.equals("")) {
158 saveRecord(editName, distributionList, status, literatureNumber, literature);
159 }
160 }
161
162
163 /**
164 * Stores the data of one Excel sheet row in the database
165 */
166 private void saveRecord(String taxonName, ArrayList<String> distributionList,
167 String status, String literatureNumber, String literature) {
168
169 IdentifiableServiceConfiguratorImpl config =
170 IdentifiableServiceConfiguratorImpl.NewInstance();
171 config.setTitleSearchString(taxonName);
172 config.setMatchMode(MatchMode.BEGINNING);
173
174 try {
175 // get the matching names from the DB
176 //List<TaxonNameBase> taxonNameBases = getNameService().findByTitle(config);
177 List<TaxonNameBase<?,?>> taxonNameBases = getNameService().findNamesByTitle(taxonName);
178 if (taxonNameBases.isEmpty()) {
179 logger.error("Taxon name '" + taxonName + "' not found in DB");
180 } else {
181 if (logger.isDebugEnabled()) { logger.debug("Taxon found"); }
182 }
183
184 // get the taxa for the matching names
185 for(TaxonNameBase<?,?> dbTaxonName: taxonNameBases) {
186
187 Set<Taxon> taxa = dbTaxonName.getTaxa();
188 if (taxa.isEmpty()) {
189 logger.warn("No taxon found for name '" + taxonName + "'");
190 } else if (taxa.size() > 1) {
191 logger.warn("More than one taxa found for name '" + taxonName + "'");
192 }
193
194 for(Taxon taxon: taxa) {
195
196 TaxonDescription myDescription = null;
197
198 // If we have created a description for this taxon earlier, take this one.
199 // Otherwise, create a new description.
200 // We don't update any existing descriptions in the database at this point.
201 if (myDescriptions.containsKey(taxon)) {
202 myDescription = myDescriptions.get(taxon);
203 } else {
204 myDescription = TaxonDescription.NewInstance(taxon);
205 taxon.addDescription(myDescription);
206 myDescriptions.put(taxon, myDescription);
207 }
208
209 // Status
210 PresenceAbsenceTermBase<?> presenceAbsenceStatus = PresenceTerm.NewInstance();
211 if (status.equals("")) {
212 presenceAbsenceStatus = PresenceTerm.NATIVE();
213 } else {
214 presenceAbsenceStatus = PresenceTerm.getPresenceTermByAbbreviation(status);
215 }
216 // TODO: Handle absence case.
217 // This case has not yet occurred in the excel input file, though.
218
219 /* Set to true if taxon needs to be saved if at least one new distribution exists */
220 boolean save = false;
221
222 // TDWG areas
223 for (String distribution: distributionList) {
224
225 /* Set to true if this distribution is a new one*/
226 boolean ignore = false;
227
228 if(!distribution.equals("")) {
229 NamedArea namedArea = TdwgArea.getAreaByTdwgAbbreviation(distribution);
230 TaxonDescription taxonDescription = myDescriptions.get(taxon);
231 if (namedArea != null) {
232 // Check against existing distributions and ignore the ones that occur multiple times
233 Set<DescriptionElementBase> myDescriptionElements = taxonDescription.getElements();
234 for(DescriptionElementBase descriptionElement : myDescriptionElements) {
235 if (descriptionElement instanceof Distribution) {
236 if (namedArea == ((Distribution)descriptionElement).getArea()) {
237 ignore = true;
238 if (logger.isDebugEnabled()) {
239 logger.debug("Distribution ignored: " + distribution);
240 }
241 break;
242 }
243 }
244 }
245 // Create new distribution if not yet exist
246 if (ignore == false) {
247 save = true;
248 Distribution newDistribution = Distribution.NewInstance(namedArea, presenceAbsenceStatus);
249 myDescription.addElement(newDistribution);
250 if (logger.isDebugEnabled()) {
251 logger.debug("Distribution created: " + newDistribution.toString());
252 }
253 }
254 }
255 }
256 }
257 if (save == true) {
258 getTaxonService().saveTaxon(taxon);
259 if (logger.isDebugEnabled()) { logger.debug("Taxon saved"); }
260 }
261 }
262 }
263 } catch (Exception e) {
264 logger.error("Error");
265 e.printStackTrace();
266 }
267 }
268
269
270 @Override
271 protected boolean doCheck(ExcelImportState state) {
272 boolean result = true;
273 logger.warn("No check implemented for distribution data import");
274 return result;
275 }
276
277
278 @Override
279 protected boolean isIgnore(ExcelImportState state) {
280 return false;
281 }
282
283 }