4e14808ae8b283b20036a2b55584dbf4b6d76dd0
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / distribution / DistributionImport.java
1 /**
2 * Copyright (C) 2008 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.excel.distribution;
10
11 import java.io.FileNotFoundException;
12 import java.net.URI;
13 import java.util.ArrayList;
14 import java.util.HashMap;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18
19 import org.apache.log4j.Logger;
20 import org.springframework.stereotype.Component;
21 import org.springframework.transaction.TransactionStatus;
22
23 import eu.etaxonomy.cdm.api.service.config.IIdentifiableEntityServiceConfigurator;
24 import eu.etaxonomy.cdm.api.service.config.impl.IdentifiableServiceConfiguratorImpl;
25 import eu.etaxonomy.cdm.common.CdmUtils;
26 import eu.etaxonomy.cdm.common.ExcelUtils;
27 import eu.etaxonomy.cdm.io.common.CdmIoBase;
28 import eu.etaxonomy.cdm.io.common.ICdmIO;
29 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
30 import eu.etaxonomy.cdm.io.common.MapWrapper;
31 import eu.etaxonomy.cdm.io.excel.common.ExcelImportState;
32 import eu.etaxonomy.cdm.model.common.CdmBase;
33 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
34 import eu.etaxonomy.cdm.model.description.Distribution;
35 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTermBase;
36 import eu.etaxonomy.cdm.model.description.PresenceTerm;
37 import eu.etaxonomy.cdm.model.description.TaxonDescription;
38 import eu.etaxonomy.cdm.model.location.NamedArea;
39 import eu.etaxonomy.cdm.model.location.TdwgArea;
40 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
41 import eu.etaxonomy.cdm.model.taxon.Taxon;
42 import eu.etaxonomy.cdm.persistence.query.MatchMode;
43
44 /**
45 * @author a.babadshanjan
46 * @created 10.11.2008
47 * @version 1.0
48 */
49 @Component
50 public class DistributionImport extends CdmIoBase<ExcelImportState> implements ICdmIO<ExcelImportState> {
51
52 /* used */
53 private static final String EDIT_NAME_COLUMN = "EDIT";
54 private static final String TDWG_DISTRIBUTION_COLUMN = "TDWG";
55 private static final String STATUS_COLUMN = "Status";
56 /* not used */
57 // private static final String LITERATURE_NUMBER_COLUMN = "Lit.";
58 // private static final String LITERATURE_COLUMN = "Literature";
59 // private static final String VERNACULAR_NAME_COLUMN = "Vernacular";
60 // private static final String HABITAT_COLUMN = "Habitat";
61 // private static final String CONTROL_COLUMN = "Control";
62 // private static final String TRANSLATED_COLUMN = "Translated";
63 // private static final String ISO_DISTRIBUTION_COLUMN = "ISO";
64 // private static final String NOTES_COLUMN = "Notes";
65 // private static final String PAGE_NUMBER_COLUMN = "Page";
66 // private static final String INFO_COLUMN = "Info";
67
68 private static final Logger logger = Logger.getLogger(DistributionImport.class);
69
70 // Stores already processed descriptions
71 Map<Taxon, TaxonDescription> myDescriptions = new HashMap<Taxon, TaxonDescription>();
72
73 @Override
74 protected boolean doInvoke(ExcelImportState state) {
75
76 if (logger.isDebugEnabled()) { logger.debug("Importing distribution data"); }
77
78 // read and save all rows of the excel worksheet
79 ArrayList<HashMap<String, String>> recordList;
80 URI source = state.getConfig().getSource();
81 try{
82 recordList = ExcelUtils.parseXLS(source);
83 } catch (FileNotFoundException e) {
84 String message = "File not found: " + source;
85 warnProgress(state, message, e);
86 logger.error(message);
87 return false;
88 }
89 if (recordList != null) {
90 HashMap<String,String> record = null;
91 TransactionStatus txStatus = startTransaction();
92
93 for (int i = 0; i < recordList.size(); i++) {
94 record = recordList.get(i);
95 analyzeRecord(record);
96 }
97 commitTransaction(txStatus);
98 }
99
100 try {
101 if (logger.isDebugEnabled()) { logger.debug("End distribution data import"); }
102
103 } catch (Exception e) {
104 logger.error("Error closing the application context");
105 e.printStackTrace();
106 }
107
108 return true;
109 }
110
111
112 /**
113 * Reads the data of one Excel sheet row
114 */
115 private void analyzeRecord(HashMap<String,String> record) {
116 /*
117 * Relevant columns:
118 * Name (EDIT)
119 * Distribution TDWG
120 * Status (only entries if not native)
121 * Literature number
122 * Literature
123 */
124
125 String editName = "";
126 ArrayList<String> distributionList = new ArrayList<String>();
127 String status = "";
128 String literatureNumber = "";
129 String literature = "";
130
131 Set<String> keys = record.keySet();
132
133 for (String key: keys) {
134
135 String value = (String) record.get(key);
136 if (!value.equals("")) {
137 if (logger.isDebugEnabled()) { logger.debug(key + ": '" + value + "'"); }
138 }
139
140 if (key.contains(EDIT_NAME_COLUMN)) {
141 editName = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
142
143 } else if(key.contains(TDWG_DISTRIBUTION_COLUMN)) {
144 distributionList = CdmUtils.buildList(value);
145
146 } else if(key.contains(STATUS_COLUMN)) {
147 status = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
148
149 // } else if(key.contains(LITERATURE_NUMBER_COLUMN)) {
150 // literatureNumber = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
151 //
152 // } else if(key.contains(LITERATURE_COLUMN)) {
153 // literature = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
154 //
155 } else {
156 //logger.warn("Column " + key + " ignored");
157 }
158 }
159
160 // Store the data of this record in the DB
161 if (!editName.equals("")) {
162 saveRecord(editName, distributionList, status, literatureNumber, literature);
163 }
164 }
165
166
167 /**
168 * Stores the data of one Excel sheet row in the database
169 */
170 private void saveRecord(String taxonName, ArrayList<String> distributionList,
171 String status, String literatureNumber, String literature) {
172
173 IdentifiableServiceConfiguratorImpl config =
174 IdentifiableServiceConfiguratorImpl.NewInstance();
175 config.setTitleSearchString(taxonName);
176 config.setMatchMode(MatchMode.BEGINNING);
177
178 try {
179 // get the matching names from the DB
180 //List<TaxonNameBase> taxonNameBases = getNameService().findByTitle(config);
181 List<TaxonNameBase<?,?>> taxonNameBases = getNameService().findNamesByTitle(taxonName);
182 if (taxonNameBases.isEmpty()) {
183 logger.error("Taxon name '" + taxonName + "' not found in DB");
184 } else {
185 if (logger.isDebugEnabled()) { logger.debug("Taxon found"); }
186 }
187
188 // get the taxa for the matching names
189 for(TaxonNameBase<?,?> dbTaxonName: taxonNameBases) {
190
191 Set<Taxon> taxa = dbTaxonName.getTaxa();
192 if (taxa.isEmpty()) {
193 logger.warn("No taxon found for name '" + taxonName + "'");
194 } else if (taxa.size() > 1) {
195 logger.warn("More than one taxa found for name '" + taxonName + "'");
196 }
197
198 for(Taxon taxon: taxa) {
199
200 TaxonDescription myDescription = null;
201
202 // If we have created a description for this taxon earlier, take this one.
203 // Otherwise, create a new description.
204 // We don't update any existing descriptions in the database at this point.
205 if (myDescriptions.containsKey(taxon)) {
206 myDescription = myDescriptions.get(taxon);
207 } else {
208 myDescription = TaxonDescription.NewInstance(taxon);
209 taxon.addDescription(myDescription);
210 myDescriptions.put(taxon, myDescription);
211 }
212
213 // Status
214 PresenceAbsenceTermBase<?> presenceAbsenceStatus = PresenceTerm.NewInstance();
215 if (status.equals("")) {
216 presenceAbsenceStatus = PresenceTerm.NATIVE();
217 } else {
218 presenceAbsenceStatus = PresenceTerm.getPresenceTermByAbbreviation(status);
219 }
220 // TODO: Handle absence case.
221 // This case has not yet occurred in the excel input file, though.
222
223 /* Set to true if taxon needs to be saved if at least one new distribution exists */
224 boolean save = false;
225
226 // TDWG areas
227 for (String distribution: distributionList) {
228
229 /* Set to true if this distribution is a new one*/
230 boolean ignore = false;
231
232 if(!distribution.equals("")) {
233 NamedArea namedArea = TdwgArea.getAreaByTdwgAbbreviation(distribution);
234 TaxonDescription taxonDescription = myDescriptions.get(taxon);
235 if (namedArea != null) {
236 // Check against existing distributions and ignore the ones that occur multiple times
237 Set<DescriptionElementBase> myDescriptionElements = taxonDescription.getElements();
238 for(DescriptionElementBase descriptionElement : myDescriptionElements) {
239 if (descriptionElement instanceof Distribution) {
240 if (namedArea == ((Distribution)descriptionElement).getArea()) {
241 ignore = true;
242 if (logger.isDebugEnabled()) {
243 logger.debug("Distribution ignored: " + distribution);
244 }
245 break;
246 }
247 }
248 }
249 // Create new distribution if not yet exist
250 if (ignore == false) {
251 save = true;
252 Distribution newDistribution = Distribution.NewInstance(namedArea, presenceAbsenceStatus);
253 myDescription.addElement(newDistribution);
254 if (logger.isDebugEnabled()) {
255 logger.debug("Distribution created: " + newDistribution.toString());
256 }
257 }
258 }
259 }
260 }
261 if (save == true) {
262 getTaxonService().save(taxon);
263 if (logger.isDebugEnabled()) { logger.debug("Taxon saved"); }
264 }
265 }
266 }
267 } catch (Exception e) {
268 logger.error("Error");
269 e.printStackTrace();
270 }
271 }
272
273
274 @Override
275 protected boolean doCheck(ExcelImportState state) {
276 boolean result = true;
277 logger.warn("No check implemented for distribution data import");
278 return result;
279 }
280
281
282 @Override
283 protected boolean isIgnore(ExcelImportState state) {
284 return false;
285 }
286
287 }