Merge branch 'release/5.45.0'
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / distribution / DistributionImport.java
1 /**
2 * Copyright (C) 2008 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.excel.distribution;
10
11 import java.io.FileNotFoundException;
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Set;
17
18 import org.apache.logging.log4j.LogManager;
19 import org.apache.logging.log4j.Logger;
20 import org.springframework.stereotype.Component;
21 import org.springframework.transaction.TransactionStatus;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.common.ExcelUtils;
25 import eu.etaxonomy.cdm.common.URI;
26 import eu.etaxonomy.cdm.io.common.CdmImportBase;
27 import eu.etaxonomy.cdm.io.common.TdwgAreaProvider;
28 import eu.etaxonomy.cdm.io.excel.common.ExcelImportConfiguratorBase;
29 import eu.etaxonomy.cdm.io.excel.common.ExcelImportState;
30 import eu.etaxonomy.cdm.io.excel.common.ExcelRowBase;
31 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
32 import eu.etaxonomy.cdm.model.description.Distribution;
33 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
34 import eu.etaxonomy.cdm.model.description.TaxonDescription;
35 import eu.etaxonomy.cdm.model.location.NamedArea;
36 import eu.etaxonomy.cdm.model.name.TaxonName;
37 import eu.etaxonomy.cdm.model.taxon.Taxon;
38
39 /**
40 * @author a.babadshanjan
41 * @since 10.11.2008
42 */
43 @Component
44 public class DistributionImport
45 extends CdmImportBase<ExcelImportConfiguratorBase, ExcelImportState<ExcelImportConfiguratorBase, ExcelRowBase>> {
46
47 private static final long serialVersionUID = 7765309119416657235L;
48 private static final Logger logger = LogManager.getLogger();
49
50 /* used */
51 private static final String EDIT_NAME_COLUMN = "EDIT";
52 private static final String TDWG_DISTRIBUTION_COLUMN = "TDWG";
53 private static final String STATUS_COLUMN = "Status";
54 /* not used */
55 // private static final String LITERATURE_NUMBER_COLUMN = "Lit.";
56 // private static final String LITERATURE_COLUMN = "Literature";
57 // private static final String VERNACULAR_NAME_COLUMN = "Vernacular";
58 // private static final String HABITAT_COLUMN = "Habitat";
59 // private static final String CONTROL_COLUMN = "Control";
60 // private static final String TRANSLATED_COLUMN = "Translated";
61 // private static final String ISO_DISTRIBUTION_COLUMN = "ISO";
62 // private static final String NOTES_COLUMN = "Notes";
63 // private static final String PAGE_NUMBER_COLUMN = "Page";
64 // private static final String INFO_COLUMN = "Info";
65
66
67 // Stores already processed descriptions
68 Map<Taxon, TaxonDescription> myDescriptions = new HashMap<Taxon, TaxonDescription>();
69
70 @Override
71 protected void doInvoke(ExcelImportState<ExcelImportConfiguratorBase, ExcelRowBase> state) {
72
73 if (logger.isDebugEnabled()) { logger.debug("Importing distribution data"); }
74
75 // read and save all rows of the excel worksheet
76 List<Map<String, String>> recordList;
77 URI source = state.getConfig().getSource();
78 try{
79 recordList = ExcelUtils.parseXLS(source);
80 } catch (FileNotFoundException e) {
81 String message = "File not found: " + source;
82 warnProgress(state, message, e);
83 logger.error(message);
84 state.setUnsuccessfull();
85 return;
86 }
87 if (recordList != null) {
88 Map<String,String> record = null;
89 TransactionStatus txStatus = startTransaction();
90
91 for (int i = 0; i < recordList.size(); i++) {
92 record = recordList.get(i);
93 analyzeRecord(record);
94 }
95 commitTransaction(txStatus);
96 }
97
98 try {
99 if (logger.isDebugEnabled()) { logger.debug("End distribution data import"); }
100
101 } catch (Exception e) {
102 logger.error("Error closing the application context");
103 e.printStackTrace();
104 }
105
106 return;
107 }
108
109 /**
110 * Reads the data of one Excel sheet row
111 */
112 private void analyzeRecord(Map<String,String> record) {
113 /*
114 * Relevant columns:
115 * Name (EDIT)
116 * Distribution TDWG
117 * Status (only entries if not native)
118 * Literature number
119 * Literature
120 */
121
122 String editName = "";
123 List<String> distributionList = new ArrayList<String>();
124 String status = "";
125 String literatureNumber = "";
126 String literature = "";
127
128 Set<String> keys = record.keySet();
129
130 for (String key: keys) {
131
132 String value = record.get(key);
133 if (!value.equals("")) {
134 if (logger.isDebugEnabled()) { logger.debug(key + ": '" + value + "'"); }
135 }
136
137 if (key.contains(EDIT_NAME_COLUMN)) {
138 editName = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
139
140 } else if(key.contains(TDWG_DISTRIBUTION_COLUMN)) {
141 distributionList = CdmUtils.buildList(value);
142
143 } else if(key.contains(STATUS_COLUMN)) {
144 status = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
145
146 // } else if(key.contains(LITERATURE_NUMBER_COLUMN)) {
147 // literatureNumber = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
148 //
149 // } else if(key.contains(LITERATURE_COLUMN)) {
150 // literature = (String) CdmUtils.removeDuplicateWhitespace(value.trim());
151 //
152 } else {
153 //logger.warn("Column " + key + " ignored");
154 }
155 }
156
157 // Store the data of this record in the DB
158 if (!editName.equals("")) {
159 saveRecord(editName, distributionList, status, literatureNumber, literature);
160 }
161 }
162
163 /**
164 * Stores the data of one Excel sheet row in the database
165 */
166 private void saveRecord(String taxonName, List<String> distributionList,
167 String status, String literatureNumber, String literature) {
168
169 try {
170 // get the matching names from the DB
171 //List<TaxonName> taxonNames = getNameService().findByTitle(config);
172 List<TaxonName> taxonNames = getNameService().findByName(null, taxonName, null, null, null, null,null,null).getRecords();
173 if (taxonNames.isEmpty()) {
174 logger.error("Taxon name '" + taxonName + "' not found in DB");
175 } else {
176 if (logger.isDebugEnabled()) { logger.debug("Taxon found"); }
177 }
178
179 // get the taxa for the matching names
180 for(TaxonName dbTaxonName: taxonNames) {
181
182 Set<Taxon> taxa = dbTaxonName.getTaxa();
183 if (taxa.isEmpty()) {
184 logger.warn("No taxon found for name '" + taxonName + "'");
185 } else if (taxa.size() > 1) {
186 logger.warn("More than one taxa found for name '" + taxonName + "'");
187 }
188
189 for(Taxon taxon: taxa) {
190
191 TaxonDescription myDescription = null;
192
193 // If we have created a description for this taxon earlier, take this one.
194 // Otherwise, create a new description.
195 // We don't update any existing descriptions in the database at this point.
196 if (myDescriptions.containsKey(taxon)) {
197 myDescription = myDescriptions.get(taxon);
198 } else {
199 myDescription = TaxonDescription.NewInstance(taxon);
200 taxon.addDescription(myDescription);
201 myDescriptions.put(taxon, myDescription);
202 }
203
204 // Status
205 PresenceAbsenceTerm presenceAbsenceStatus = PresenceAbsenceTerm.NewInstance();
206 if (status.equals("")) {
207 presenceAbsenceStatus = PresenceAbsenceTerm.NATIVE();
208 } else {
209 presenceAbsenceStatus = PresenceAbsenceTerm.getPresenceAbsenceTermByAbbreviation(status);
210 }
211 // TODO: Handle absence case.
212 // This case has not yet occurred in the excel input file, though.
213
214 /* Set to true if taxon needs to be saved if at least one new distribution exists */
215 boolean save = false;
216
217 // TDWG areas
218 for (String distribution: distributionList) {
219
220 /* Set to true if this distribution is a new one*/
221 boolean ignore = false;
222
223 if(!distribution.equals("")) {
224 NamedArea namedArea = TdwgAreaProvider.getAreaByTdwgAbbreviation(distribution);
225 TaxonDescription taxonDescription = myDescriptions.get(taxon);
226 if (namedArea != null) {
227 // Check against existing distributions and ignore the ones that occur multiple times
228 Set<DescriptionElementBase> myDescriptionElements = taxonDescription.getElements();
229 for(DescriptionElementBase descriptionElement : myDescriptionElements) {
230 if (descriptionElement instanceof Distribution) {
231 if (namedArea == ((Distribution)descriptionElement).getArea()) {
232 ignore = true;
233 if (logger.isDebugEnabled()) {
234 logger.debug("Distribution ignored: " + distribution);
235 }
236 break;
237 }
238 }
239 }
240 // Create new distribution if not yet exist
241 if (ignore == false) {
242 save = true;
243 Distribution newDistribution = Distribution.NewInstance(namedArea, presenceAbsenceStatus);
244 myDescription.addElement(newDistribution);
245 if (logger.isDebugEnabled()) {
246 logger.debug("Distribution created: " + newDistribution.toString());
247 }
248 }
249 }
250 }
251 }
252 if (save == true) {
253 getTaxonService().save(taxon);
254 if (logger.isDebugEnabled()) { logger.debug("Taxon saved"); }
255 }
256 }
257 }
258 } catch (Exception e) {
259 logger.error("Error");
260 e.printStackTrace();
261 }
262 }
263
264 @Override
265 protected boolean doCheck(ExcelImportState<ExcelImportConfiguratorBase, ExcelRowBase> state) {
266 boolean result = true;
267 logger.warn("No check implemented for distribution data import");
268 return result;
269 }
270
271 @Override
272 protected boolean isIgnore(ExcelImportState<ExcelImportConfiguratorBase, ExcelRowBase> state) {
273 return false;
274 }
275 }