move base column handling to base class (Excel import)
[cdmlib.git] / cdmlib-io / src / main / java / eu / etaxonomy / cdm / io / excel / taxa / NormalExplicitImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.excel.taxa;
11
12 import java.net.MalformedURLException;
13 import java.net.URI;
14 import java.net.URISyntaxException;
15 import java.util.Arrays;
16 import java.util.HashSet;
17 import java.util.Set;
18
19 import org.apache.log4j.Logger;
20 import org.springframework.stereotype.Component;
21
22 import eu.etaxonomy.cdm.common.CdmUtils;
23 import eu.etaxonomy.cdm.model.common.CdmBase;
24 import eu.etaxonomy.cdm.model.common.Language;
25 import eu.etaxonomy.cdm.model.description.CommonTaxonName;
26 import eu.etaxonomy.cdm.model.description.DescriptionElementBase;
27 import eu.etaxonomy.cdm.model.description.Distribution;
28 import eu.etaxonomy.cdm.model.description.Feature;
29 import eu.etaxonomy.cdm.model.description.PresenceTerm;
30 import eu.etaxonomy.cdm.model.description.TaxonDescription;
31 import eu.etaxonomy.cdm.model.description.TaxonNameDescription;
32 import eu.etaxonomy.cdm.model.description.TextData;
33 import eu.etaxonomy.cdm.model.location.NamedArea;
34 import eu.etaxonomy.cdm.model.location.TdwgArea;
35 import eu.etaxonomy.cdm.model.media.Media;
36 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
37 import eu.etaxonomy.cdm.model.name.NonViralName;
38 import eu.etaxonomy.cdm.model.name.Rank;
39 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
40 import eu.etaxonomy.cdm.model.reference.Reference;
41 import eu.etaxonomy.cdm.model.taxon.Classification;
42 import eu.etaxonomy.cdm.model.taxon.Synonym;
43 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
44 import eu.etaxonomy.cdm.model.taxon.Taxon;
45 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
46 import eu.etaxonomy.cdm.strategy.exceptions.StringNotParsableException;
47 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
48 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
49
50 /**
51 * @author a.babadshanjan
52 * @created 08.01.2009
53 * @version 1.0
54 */
55
56 @Component
57 public class NormalExplicitImport extends TaxonExcelImporterBase {
58 private static final Logger logger = Logger.getLogger(NormalExplicitImport.class);
59
60 public static Set<String> validMarkers = new HashSet<String>(Arrays.asList(new String[]{"", "valid", "accepted", "a", "v", "t"}));
61 public static Set<String> synonymMarkers = new HashSet<String>(Arrays.asList(new String[]{"", "invalid", "synonym", "s", "i"}));
62
63
64 /* (non-Javadoc)
65 * @see eu.etaxonomy.cdm.io.excel.common.ExcelTaxonOrSpecimenImportBase#analyzeSingleValue(eu.etaxonomy.cdm.io.excel.common.ExcelTaxonOrSpecimenImportBase.KeyValue, eu.etaxonomy.cdm.io.excel.common.ExcelImportState)
66 */
67 @Override
68 protected boolean analyzeSingleValue(KeyValue keyValue, TaxonExcelImportState state) {
69 boolean success = true;
70
71 NormalExplicitRow normalExplicitRow = state.getCurrentRow();
72 String key = keyValue.key;
73 String value = keyValue.value;
74 Integer index = keyValue.index;
75 if (key.equalsIgnoreCase(ID_COLUMN)) {
76 int ivalue = floatString2IntValue(value);
77 normalExplicitRow.setId(ivalue);
78
79 } else if(key.equalsIgnoreCase(PARENT_ID_COLUMN)) {
80 int ivalue = floatString2IntValue(value);
81 normalExplicitRow.setParentId(ivalue);
82
83 } else if(key.equalsIgnoreCase(RANK_COLUMN)) {
84 normalExplicitRow.setRank(value);
85
86 } else if(key.equalsIgnoreCase(SCIENTIFIC_NAME_COLUMN)) {
87 normalExplicitRow.setScientificName(value);
88
89 } else if(key.equalsIgnoreCase(AUTHOR_COLUMN)) {
90 normalExplicitRow.setAuthor(value);
91
92 } else if(key.equalsIgnoreCase(NAME_STATUS_COLUMN)) {
93 normalExplicitRow.setNameStatus(value);
94
95 } else if(key.equalsIgnoreCase(VERNACULAR_NAME_COLUMN)) {
96 normalExplicitRow.setCommonName(value);
97
98 } else if(key.equalsIgnoreCase(LANGUAGE_COLUMN)) {
99 normalExplicitRow.setLanguage(value);
100
101 } else if(key.equalsIgnoreCase(TDWG_COLUMN)) {
102 value = value.replace(".0", "");
103 normalExplicitRow.putDistribution(index, value);
104
105 } else if(key.equalsIgnoreCase(PROTOLOGUE_COLUMN)) {
106 normalExplicitRow.putProtologue(index, value);
107
108 } else if(key.equalsIgnoreCase(IMAGE_COLUMN)) {
109 normalExplicitRow.putImage(index, value);
110
111 } else {
112 success = false;
113 logger.error("Unexpected column header " + key);
114 }
115 return success;
116 }
117
118 /* (non-Javadoc)
119 * @see eu.etaxonomy.cdm.io.excel.common.ExcelTaxonOrSpecimenImportBase#createDataHolderRow()
120 */
121 @Override
122 protected NormalExplicitRow createDataHolderRow() {
123 return new NormalExplicitRow();
124 }
125
126
127 /**
128 * Stores taxa records in DB
129 */
130 @Override
131 protected boolean firstPass(TaxonExcelImportState state) {
132 boolean success = true;
133 Rank rank = null;
134 NormalExplicitRow taxonDataHolder = state.getCurrentRow();
135
136 String rankStr = taxonDataHolder.getRank();
137 String taxonNameStr = taxonDataHolder.getScientificName();
138 String authorStr = taxonDataHolder.getAuthor();
139 String nameStatus = taxonDataHolder.getNameStatus();
140 Integer id = taxonDataHolder.getId();
141
142 if (CdmUtils.isNotEmpty(taxonNameStr)) {
143
144 TaxonBase taxonBase = null;
145 if (taxonDataHolder.getCdmUuid() != null){
146 taxonBase = getTaxonService().find(taxonDataHolder.getCdmUuid());
147 }else{
148
149 // Rank
150 try {
151 rank = Rank.getRankByNameOrAbbreviation(rankStr);
152 } catch (UnknownCdmTypeException ex) {
153 try {
154 rank = Rank.getRankByEnglishName(rankStr, state.getConfig().getNomenclaturalCode(), false);
155 } catch (UnknownCdmTypeException e) {
156 success = false;
157 logger.error(rankStr + " is not a valid rank.");
158 }
159 }
160
161 //taxon
162 taxonBase = createTaxon(state, rank, taxonNameStr, authorStr, nameStatus);
163 }
164 if (taxonBase == null){
165 String message = "Taxon could not be created. Record will not be handled";
166 fireWarningEvent(message, "Record: " + state.getCurrentLine(), 6);
167 return false;
168 }
169
170 //protologue
171 for (String protologue : taxonDataHolder.getProtologues()){
172 TextData textData = TextData.NewInstance(Feature.PROTOLOGUE());
173 this.getNameDescription(taxonBase.getName()).addElement(textData);
174 URI uri;
175 try {
176 uri = new URI(protologue);
177 textData.addMedia(Media.NewInstance(uri, null, null, null));
178 } catch (URISyntaxException e) {
179 String warning = "URISyntaxException when trying to convert to URI: " + protologue;
180 logger.error(warning);
181 }
182 }
183
184 //media
185 for (String imageUrl : taxonDataHolder.getImages()){
186 //TODO
187 Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
188 TaxonDescription td = taxon.getImageGallery(true);
189 DescriptionElementBase mediaHolder;
190 if (td.getElements().size() != 0){
191 mediaHolder = td.getElements().iterator().next();
192 }else{
193 mediaHolder = TextData.NewInstance(Feature.IMAGE());
194 td.addElement(mediaHolder);
195 }
196 try {
197 Media media = getImageMedia(imageUrl, true);
198 mediaHolder.addMedia(media);
199 } catch (MalformedURLException e) {
200 logger.warn("Can't add media: " + e.getMessage());
201 }
202 }
203
204 //tdwg label
205 for (String tdwg : taxonDataHolder.getDistributions()){
206 //TODO
207 Taxon taxon = CdmBase.deproxy(taxonBase, Taxon.class);
208 TaxonDescription td = this.getTaxonDescription(taxon, false, true);
209 NamedArea area = TdwgArea.getAreaByTdwgAbbreviation(tdwg);
210 if (area == null){
211 area = TdwgArea.getAreaByTdwgLabel(tdwg);
212 }
213 if (area != null){
214 Distribution distribution = Distribution.NewInstance(area, PresenceTerm.PRESENT());
215 td.addElement(distribution);
216 }else{
217 String message = "TDWG area could not be recognized: " + tdwg;
218 logger.warn(message);
219 }
220
221 }
222
223
224 state.putTaxon(id, taxonBase);
225 getTaxonService().save(taxonBase);
226 }
227 return success;
228 }
229
230 /**
231 * @param state
232 * @param rank
233 * @param taxonNameStr
234 * @param authorStr
235 * @param nameStatus
236 * @return
237 */
238 private TaxonBase createTaxon(TaxonExcelImportState state, Rank rank,
239 String taxonNameStr, String authorStr, String nameStatus) {
240 // Create the taxon name object depending on the setting of the nomenclatural code
241 // in the configurator (botanical code, zoological code, etc.)
242 NomenclaturalCode nc = getConfigurator().getNomenclaturalCode();
243
244 TaxonBase taxonBase = null;
245
246 String titleCache = CdmUtils.concat(" ", taxonNameStr, authorStr);
247 if (! synonymMarkers.contains(nameStatus) && state.getConfig().isDoMatchTaxa()){
248 titleCache = CdmUtils.concat(" ", taxonNameStr, authorStr);
249 taxonBase = getTaxonService().findBestMatchingTaxon(titleCache);
250 }else{
251 taxonBase = getTaxonService().findBestMatchingSynonym(titleCache);
252 if (taxonBase != null){
253 logger.info("Matching taxon/synonym found for " + titleCache);
254 }
255 }
256 if (taxonBase != null){
257 logger.info("Matching taxon/synonym found for " + titleCache);
258 }else {
259 taxonBase = createTaxon(state, rank, taxonNameStr, authorStr, nameStatus, nc);
260 }
261 return taxonBase;
262 }
263
264
265
266
267 /**
268 * Stores parent-child, synonym and common name relationships
269 */
270 @Override
271 protected boolean secondPass(TaxonExcelImportState state) {
272 boolean success = true;
273 try {
274 String taxonNameStr = state.getCurrentRow().getScientificName();
275 String nameStatus = state.getCurrentRow().getNameStatus();
276 String commonNameStr = state.getCurrentRow().getCommonName();
277 Integer parentId = state.getCurrentRow().getParentId();
278 Integer childId = state.getCurrentRow().getId();
279
280 Taxon parentTaxon = (Taxon)state.getTaxonBase(parentId);
281 if (CdmUtils.isNotEmpty(taxonNameStr)) {
282 nameStatus = CdmUtils.Nz(nameStatus).trim().toLowerCase();
283 if (validMarkers.contains(nameStatus)){
284 Taxon taxon = (Taxon)state.getTaxonBase(childId);
285 // Add the parent relationship
286 if (state.getCurrentRow().getParentId() != 0) {
287 if (parentTaxon != null) {
288 //Taxon taxon = (Taxon)state.getTaxonBase(childId);
289
290 Reference citation = state.getConfig().getSourceReference();
291 String microCitation = null;
292 Taxon childTaxon = taxon;
293 success &= makeParent(state, parentTaxon, childTaxon, citation, microCitation);
294 getTaxonService().saveOrUpdate(parentTaxon);
295 } else {
296 logger.warn("Taxonomic parent not found for " + taxonNameStr);
297 success = false;
298 }
299 }else{
300 //do nothing (parent == 0) no parent exists
301 }
302 }else if (synonymMarkers.contains(nameStatus)){
303 //add synonym relationship
304 try {
305 TaxonBase taxonBase = state.getTaxonBase(childId);
306 Synonym synonym = CdmBase.deproxy(taxonBase,Synonym.class);
307 parentTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
308 getTaxonService().saveOrUpdate(parentTaxon);
309 } catch (Exception e) {
310 logger.warn("Child id = " + childId);
311 e.printStackTrace();
312 }
313 }
314 }
315 if (CdmUtils.isNotEmpty(commonNameStr)){ // add common name to taxon
316 handleCommonName(state, taxonNameStr, commonNameStr, parentId);
317 }
318 } catch (Exception e) {
319 e.printStackTrace();
320 }
321 return success;
322 }
323
324
325 /**
326 * @param state
327 * @param taxonNameStr
328 * @param commonNameStr
329 * @param parentId
330 */
331 private void handleCommonName(TaxonExcelImportState state,
332 String taxonNameStr, String commonNameStr, Integer parentId) {
333 Language language = getTermService().getLanguageByIso(state.getCurrentRow().getLanguage());
334 if (language == null && CdmUtils.isNotEmpty(state.getCurrentRow().getLanguage()) ){
335 String error ="Language is null but shouldn't";
336 logger.error(error);
337 throw new IllegalArgumentException(error);
338 }
339 CommonTaxonName commonTaxonName = CommonTaxonName.NewInstance(commonNameStr, language);
340 try {
341 Taxon taxon = (Taxon)state.getTaxonBase(parentId);
342 TaxonDescription taxonDescription = getTaxonDescription(taxon, false, true);
343 taxonDescription.addElement(commonTaxonName);
344 logger.info("Common name " + commonNameStr + " added to " + taxon.getTitleCache());
345 } catch (ClassCastException ex) {
346 logger.error(taxonNameStr + " is not a taxon instance.");
347 }
348 }
349
350
351 /**
352 * @param state
353 * @param rank
354 * @param taxonNameStr
355 * @param authorStr
356 * @param nameStatus
357 * @param nc
358 * @return
359 */
360 private TaxonBase createTaxon(TaxonExcelImportState state, Rank rank, String taxonNameStr,
361 String authorStr, String nameStatus, NomenclaturalCode nc) {
362 TaxonBase taxonBase;
363 NonViralName taxonNameBase = null;
364 if (nc == NomenclaturalCode.ICVCN){
365 logger.warn("ICVCN not yet supported");
366
367 }else{
368 taxonNameBase =(NonViralName) nc.getNewTaxonNameInstance(rank);
369 //NonViralName nonViralName = (NonViralName)taxonNameBase;
370 NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
371 taxonNameBase = parser.parseFullName(taxonNameStr, nc, rank);
372
373 taxonNameBase.setNameCache(taxonNameStr);
374
375 // Create the author
376 if (CdmUtils.isNotEmpty(authorStr)) {
377 try {
378 parser.parseAuthors(taxonNameBase, authorStr);
379 } catch (StringNotParsableException e) {
380 taxonNameBase.setAuthorshipCache(authorStr);
381 }
382 }
383 }
384
385 //Create the taxon
386 Reference sec = state.getConfig().getSourceReference();
387 // Create the status
388 nameStatus = CdmUtils.Nz(nameStatus).trim().toLowerCase();
389 if (validMarkers.contains(nameStatus)){
390 taxonBase = Taxon.NewInstance(taxonNameBase, sec);
391 }else if (synonymMarkers.contains(nameStatus)){
392 taxonBase = Synonym.NewInstance(taxonNameBase, sec);
393 }else {
394 Taxon taxon = Taxon.NewInstance(taxonNameBase, sec);
395 taxon.setTaxonStatusUnknown(true);
396 taxonBase = taxon;
397 }
398 return taxonBase;
399 }
400
401 /**
402 * @param taxon
403 * @return
404 */
405 //TODO implementation must be improved when matching of taxon names with existing names is implemented
406 //=> the assumption that the only description is the description added by this import
407 //is wrong then
408 private TaxonNameDescription getNameDescription(TaxonNameBase name) {
409 Set<TaxonNameDescription> descriptions = name.getDescriptions();
410 if (descriptions.size()>1){
411 throw new IllegalStateException("Implementation does not yet support names with multiple descriptions");
412 }else if (descriptions.size()==1){
413 return descriptions.iterator().next();
414 }else{
415 return TaxonNameDescription.NewInstance(name);
416 }
417 }
418
419 private boolean makeParent(TaxonExcelImportState state, Taxon parentTaxon, Taxon childTaxon, Reference citation, String microCitation){
420 boolean success = true;
421 Reference sec = state.getConfig().getSourceReference();
422
423 // Reference sec = parentTaxon.getSec();
424 Classification tree = state.getTree(sec);
425 if (tree == null){
426 tree = makeTree(state, sec);
427 }
428 if (sec.equals(childTaxon.getSec())){
429 success &= (null != tree.addParentChild(parentTaxon, childTaxon, citation, microCitation));
430 }else{
431 logger.warn("No relationship added for child " + childTaxon.getTitleCache());
432 }
433 return success;
434 }
435
436
437 /* (non-Javadoc)
438 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
439 */
440 @Override
441 protected boolean doCheck(TaxonExcelImportState state) {
442 logger.warn("DoCheck not yet implemented for NormalExplicitImport");
443 return true;
444 }
445
446 /* (non-Javadoc)
447 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IoStateBase)
448 */
449 @Override
450 protected boolean isIgnore(TaxonExcelImportState state) {
451 return false;
452 }
453
454
455
456 }