bugfixes for normal explicit
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / taxa / NormalExplicitImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.excel.taxa;
11
12 import java.net.MalformedURLException;
13 import java.util.Arrays;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Set;
17
18 import org.apache.commons.lang.StringUtils;
19 import org.apache.log4j.Logger;
20 import org.springframework.stereotype.Component;
21
22 import eu.etaxonomy.cdm.common.CdmUtils;
23 import eu.etaxonomy.cdm.model.common.CdmBase;
24 import eu.etaxonomy.cdm.model.common.Language;
25 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
26 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
27 import eu.etaxonomy.cdm.model.description.Distribution;
28 import eu.etaxonomy.cdm.model.description.Feature;
29 import eu.etaxonomy.cdm.model.description.PresenceTerm;
30 import eu.etaxonomy.cdm.model.description.TaxonDescription;
31 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
32 import eu.etaxonomy.cdm.model.description.TextData;
33 import eu.etaxonomy.cdm.model.location.NamedArea;
34 import eu.etaxonomy.cdm.model.location.TdwgArea;
35 import eu.etaxonomy.cdm.model.media.Media;
36 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
37 import eu.etaxonomy.cdm.model.name.NonViralName;
38 import eu.etaxonomy.cdm.model.name.Rank;
39 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
40 import eu.etaxonomy.cdm.model.reference.ReferenceBase;
41 import eu.etaxonomy.cdm.model.taxon.Synonym;
42 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
43 import eu.etaxonomy.cdm.model.taxon.Taxon;
44 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
45 import eu.etaxonomy.cdm.model.taxon.TaxonomicTree;
46 import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
47 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
48 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
49
50 /**
51 * @author a.babadshanjan
52 * @created 08.01.2009
53 * @version 1.0
54 */
55
56 @Component
57 public class NormalExplicitImport extends TaxonExcelImporterBase {
58 private static final Logger logger = Logger.getLogger(NormalExplicitImport.class);
59
60 public static Set<String> validMarkers = new HashSet<String>(Arrays.asList(new String[]{"", "valid", "accepted", "a", "v", "t"}));
61 public static Set<String> synonymMarkers = new HashSet<String>(Arrays.asList(new String[]{"", "invalid", "synonym", "s", "i"}));
62
63
64 @Override
65 protected boolean isIgnore(TaxonExcelImportState state) {
66 return false;
67 }
68
69
70 /* (non-Javadoc)
71 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
72 */
73 @Override
74 protected boolean doCheck(TaxonExcelImportState state) {
75 logger.warn("DoCheck not yet implemented for NormalExplicitImport");
76 return true;
77 }
78
79 @Override
80 protected boolean analyzeRecord(HashMap<String, String> record, TaxonExcelImportState state) {
81
82 boolean success = true;
83 Set<String> keys = record.keySet();
84
85 NormalExplicitRow normalExplicitRow = new NormalExplicitRow();
86 state.setTaxonLight(normalExplicitRow);
87
88 for (String originalKey: keys) {
89 Integer index = 0;
90 String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString();
91 String[] split = indexedKey.split("_");
92 String key = split[0];
93 if (split.length > 1){
94 String indexString = split[1];
95 try {
96 index = Integer.valueOf(indexString);
97 } catch (NumberFormatException e) {
98 String message = "Index must be integer";
99 logger.error(message);
100 continue;
101 }
102 }
103
104 String value = (String) record.get(indexedKey);
105 if (! StringUtils.isBlank(value)) {
106 if (logger.isDebugEnabled()) { logger.debug(key + ": " + value); }
107 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
108 }else{
109 continue;
110 }
111
112
113 if (key.equalsIgnoreCase(ID_COLUMN)) {
114 int ivalue = floatString2IntValue(value);
115 normalExplicitRow.setId(ivalue);
116
117 } else if(key.equalsIgnoreCase(PARENT_ID_COLUMN)) {
118 int ivalue = floatString2IntValue(value);
119 normalExplicitRow.setParentId(ivalue);
120
121 } else if(key.equalsIgnoreCase(RANK_COLUMN)) {
122 normalExplicitRow.setRank(value);
123
124 } else if(key.equalsIgnoreCase(SCIENTIFIC_NAME_COLUMN)) {
125 normalExplicitRow.setScientificName(value);
126
127 } else if(key.equalsIgnoreCase(AUTHOR_COLUMN)) {
128 normalExplicitRow.setAuthor(value);
129
130 } else if(key.equalsIgnoreCase(NAME_STATUS_COLUMN)) {
131 normalExplicitRow.setNameStatus(value);
132
133 } else if(key.equalsIgnoreCase(VERNACULAR_NAME_COLUMN)) {
134 normalExplicitRow.setCommonName(value);
135
136 } else if(key.equalsIgnoreCase(LANGUAGE_COLUMN)) {
137 normalExplicitRow.setLanguage(value);
138
139 } else if(key.equalsIgnoreCase(TDWG_COLUMN)) {
140 value = value.replace(".0", "");
141 normalExplicitRow.putDistribution(index, value);
142
143 } else if(key.equalsIgnoreCase(PROTOLOGUE_COLUMN)) {
144 normalExplicitRow.putProtologue(index, value);
145
146 } else if(key.equalsIgnoreCase(IMAGE_COLUMN)) {
147 normalExplicitRow.putImage(index, value);
148
149 } else {
150 success = false;
151 logger.error("Unexpected column header " + key);
152 }
153 }
154 return success;
155 }
156
157
158 /**
159 * Stores taxa records in DB
160 */
161 @Override
162 protected boolean firstPass(TaxonExcelImportState state) {
163 boolean success = true;
164 Rank rank = null;
165 NormalExplicitRow taxonLight = state.getTaxonLight();
166
167 String rankStr = taxonLight.getRank();
168 String taxonNameStr = taxonLight.getScientificName();
169 String authorStr = taxonLight.getAuthor();
170 String nameStatus = taxonLight.getNameStatus();
171 Integer id = taxonLight.getId();
172
173 if (CdmUtils.isNotEmpty(taxonNameStr)) {
174
175 // Determine the rank
176 try {
177 rank = Rank.getRankByNameOrAbbreviation(rankStr);
178 } catch (UnknownCdmTypeException ex) {
179 try {
180 rank = Rank.getRankByEnglishName(rankStr, state.getConfig().getNomenclaturalCode(), false);
181 } catch (UnknownCdmTypeException e) {
182 success = false;
183 logger.error(rankStr + " is not a valid rank.");
184 }
185 }
186
187 // Create the taxon name object depending on the setting of the nomenclatural code
188 // in the configurator (botanical code, zoological code, etc.)
189 NomenclaturalCode nc = getConfigurator().getNomenclaturalCode();
190
191 TaxonBase taxonBase = null;
192
193 if (! synonymMarkers.contains(nameStatus) && state.getConfig().isDoMatchTaxa()){
194 String titleCache = CdmUtils.concat(" ", taxonNameStr, authorStr);
195 taxonBase = getTaxonService().findBestMatchingTaxon(titleCache);
196 if (taxonBase != null){
197 logger.warn("Matching taxon found for " + titleCache);
198 }
199 }
200 if (taxonBase == null){
201 taxonBase = createTaxon(state, rank, taxonNameStr, authorStr, nameStatus, nc);
202 }
203 if (taxonBase == null){
204 return false;
205 }
206
207
208 //protologue
209 for (String protologue : taxonLight.getProtologues()){
210 TextData textData = TextData.NewInstance(Feature.PROTOLOGUE());
211 this.getNameDescription(taxonBase.getName()).addElement(textData);
212 textData.addMedia(Media.NewInstance(protologue, null, null, null));
213 }
214
215 //media
216 for (String imageUrl : taxonLight.getImages()){
217 //TODO
218 Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
219 TaxonDescription td = taxon.getImageGallery(true);
220 DescriptionElementBase mediaHolder;
221 if (td.getElements().size() != 0){
222 mediaHolder = td.getElements().iterator().next();
223 }else{
224 mediaHolder = TextData.NewInstance(Feature.IMAGE());
225 td.addElement(mediaHolder);
226 }
227 try {
228 Media media = getImageMedia(imageUrl, true);
229 mediaHolder.addMedia(media);
230 } catch (MalformedURLException e) {
231 logger.warn("Can't add media: " + e.getMessage());
232 }
233 }
234
235 //tdwg label
236 for (String tdwg : taxonLight.getDistributions()){
237 //TODO
238 Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
239 TaxonDescription td = this.getTaxonDescription(taxon, false, true);
240 NamedArea area = TdwgArea.getAreaByTdwgAbbreviation(tdwg);
241 if (area == null){
242 area = TdwgArea.getAreaByTdwgLabel(tdwg);
243 }
244 if (area != null){
245 Distribution distribution = Distribution.NewInstance(area, PresenceTerm.PRESENT());
246 td.addElement(distribution);
247 }else{
248 String message = "TDWG area could not be recognized: " + tdwg;
249 logger.warn(message);
250 }
251
252 }
253
254
255 state.putTaxon(id, taxonBase);
256 getTaxonService().save(taxonBase);
257 }
258 return success;
259 }
260
261
262
263
264 /**
265 * Stores parent-child, synonym and common name relationships
266 */
267 @Override
268 protected boolean secondPass(TaxonExcelImportState state) {
269 boolean success = true;
270 try {
271 String taxonNameStr = state.getTaxonLight().getScientificName();
272 String nameStatus = state.getTaxonLight().getNameStatus();
273 String commonNameStr = state.getTaxonLight().getCommonName();
274 Integer parentId = state.getTaxonLight().getParentId();
275 Integer childId = state.getTaxonLight().getId();
276
277 Taxon parentTaxon = (Taxon)state.getTaxonBase(parentId);
278 if (CdmUtils.isNotEmpty(taxonNameStr)) {
279 nameStatus = CdmUtils.Nz(nameStatus).trim().toLowerCase();
280 if (validMarkers.contains(nameStatus)){
281 Taxon taxon = (Taxon)state.getTaxonBase(childId);
282 // Add the parent relationship
283 if (state.getTaxonLight().getParentId() != 0) {
284 if (parentTaxon != null) {
285 //Taxon taxon = (Taxon)state.getTaxonBase(childId);
286
287 ReferenceBase citation = state.getConfig().getSourceReference();
288 String microCitation = null;
289 Taxon childTaxon = taxon;
290 success &= makeParent(state, parentTaxon, childTaxon, citation, microCitation);
291 getTaxonService().saveOrUpdate(parentTaxon);
292 } else {
293 logger.warn("Taxonomic parent not found for " + taxonNameStr);
294 success = false;
295 }
296 }else{
297 //do nothing (parent == 0) no parent exists
298 }
299 }else if (synonymMarkers.contains(nameStatus)){
300 //add synonym relationship
301 try {
302 TaxonBase taxonBase = state.getTaxonBase(childId);
303 Synonym synonym = CdmBase.deproxy(taxonBase,Synonym.class);
304 parentTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
305 getTaxonService().saveOrUpdate(parentTaxon);
306 } catch (Exception e) {
307 logger.warn("Child id = " + childId);
308 e.printStackTrace();
309 }
310 }
311 }
312 if (CdmUtils.isNotEmpty(commonNameStr)){ // add common name to taxon
313 handleCommonName(state, taxonNameStr, commonNameStr, parentId);
314 }
315 } catch (Exception e) {
316 e.printStackTrace();
317 }
318 return success;
319 }
320
321
322 /**
323 * @param state
324 * @param taxonNameStr
325 * @param commonNameStr
326 * @param parentId
327 */
328 private void handleCommonName(TaxonExcelImportState state,
329 String taxonNameStr, String commonNameStr, Integer parentId) {
330 Language language = getTermService().getLanguageByIso(state.getTaxonLight().getLanguage());
331 if (language == null && CdmUtils.isNotEmpty(state.getTaxonLight().getLanguage()) ){
332 String error ="Language is null but shouldn't";
333 logger.error(error);
334 throw new IllegalArgumentException(error);
335 }
336 CommonTaxonName commonTaxonName = CommonTaxonName.NewInstance(commonNameStr, language);
337 try {
338 Taxon taxon = (Taxon)state.getTaxonBase(parentId);
339 TaxonDescription taxonDescription = getTaxonDescription(taxon, false, true);
340 taxonDescription.addElement(commonTaxonName);
341 logger.info("Common name " + commonNameStr + " added to " + taxon.getTitleCache());
342 } catch (ClassCastException ex) {
343 logger.error(taxonNameStr + " is not a taxon instance.");
344 }
345 }
346
347
348 /**
349 * @param state
350 * @param rank
351 * @param taxonNameStr
352 * @param authorStr
353 * @param nameStatus
354 * @param nc
355 * @return
356 */
357 private TaxonBase createTaxon(TaxonExcelImportState state, Rank rank, String taxonNameStr,
358 String authorStr, String nameStatus, NomenclaturalCode nc) {
359 TaxonBase taxonBase;
360 NonViralName taxonNameBase = null;
361 if (nc == NomenclaturalCode.ICVCN){
362 logger.warn("ICVCN not yet supported");
363
364 }else{
365 taxonNameBase =(NonViralName) nc.getNewTaxonNameInstance(rank);
366 //NonViralName nonViralName = (NonViralName)taxonNameBase;
367 NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
368 taxonNameBase = parser.parseFullName(taxonNameStr, nc, rank);
369
370 taxonNameBase.setNameCache(taxonNameStr);
371
372 // Create the author
373 if (CdmUtils.isNotEmpty(authorStr)) {
374 try {
375 parser.parseAuthors(taxonNameBase, authorStr);
376 } catch (StringNotParsableException e) {
377 taxonNameBase.setAuthorshipCache(authorStr);
378 }
379 }
380 }
381
382 //Create the taxon
383 ReferenceBase sec = state.getConfig().getSourceReference();
384 // Create the status
385 nameStatus = CdmUtils.Nz(nameStatus).trim().toLowerCase();
386 if (validMarkers.contains(nameStatus)){
387 taxonBase = Taxon.NewInstance(taxonNameBase, sec);
388 }else if (synonymMarkers.contains(nameStatus)){
389 taxonBase = Synonym.NewInstance(taxonNameBase, sec);
390 }else {
391 Taxon taxon = Taxon.NewInstance(taxonNameBase, sec);
392 taxon.setTaxonStatusUnknown(true);
393 taxonBase = taxon;
394 }
395 return taxonBase;
396 }
397
398 /**
399 * @param taxon
400 * @return
401 */
402 //TODO implementation must be improved when matching of taxon names with existing names is implemented
403 //=> the assumption that the only description is the description added by this import
404 //is wrong then
405 private TaxonNameDescription getNameDescription(TaxonNameBase name) {
406 Set<TaxonNameDescription> descriptions = name.getDescriptions();
407 if (descriptions.size()>1){
408 throw new IllegalStateException("Implementation does not yet support names with multiple descriptions");
409 }else if (descriptions.size()==1){
410 return descriptions.iterator().next();
411 }else{
412 return TaxonNameDescription.NewInstance(name);
413 }
414 }
415
416 private boolean makeParent(TaxonExcelImportState state, Taxon parentTaxon, Taxon childTaxon, ReferenceBase citation, String microCitation){
417 boolean success = true;
418 ReferenceBase sec = state.getConfig().getSourceReference();
419
420 // ReferenceBase sec = parentTaxon.getSec();
421 TaxonomicTree tree = state.getTree(sec);
422 if (tree == null){
423 tree = makeTree(state, sec);
424 }
425 if (sec.equals(childTaxon.getSec())){
426 success &= (null != tree.addParentChild(parentTaxon, childTaxon, citation, microCitation));
427 }else{
428 logger.warn("No relationship added for child " + childTaxon.getTitleCache());
429 }
430 return success;
431 }
432
433
434
435 }