Minor cleanups in appimport
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / cyprus / CyprusExcelImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.cyprus;
11
12 import java.util.Arrays;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Set;
17 import java.util.UUID;
18
19 import org.apache.commons.lang.StringUtils;
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.io.common.TdwgAreaProvider;
25 import eu.etaxonomy.cdm.io.common.mapping.IInputTransformer;
26 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
27 import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
28 import eu.etaxonomy.cdm.model.common.Language;
29 import eu.etaxonomy.cdm.model.description.Distribution;
30 import eu.etaxonomy.cdm.model.description.Feature;
31 import eu.etaxonomy.cdm.model.description.PresenceAbsenceTerm;
32 import eu.etaxonomy.cdm.model.description.TaxonDescription;
33 import eu.etaxonomy.cdm.model.description.TextData;
34 import eu.etaxonomy.cdm.model.location.NamedArea;
35 import eu.etaxonomy.cdm.model.name.BotanicalName;
36 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
37 import eu.etaxonomy.cdm.model.name.NonViralName;
38 import eu.etaxonomy.cdm.model.name.Rank;
39 import eu.etaxonomy.cdm.model.reference.Reference;
40 import eu.etaxonomy.cdm.model.taxon.Classification;
41 import eu.etaxonomy.cdm.model.taxon.Synonym;
42 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
43 import eu.etaxonomy.cdm.model.taxon.Taxon;
44 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
45 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
46 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
47
48 /**
49 * @author a.babadshanjan
50 * @created 08.01.2009
51 */
52
53 @Component
54 public class CyprusExcelImport extends ExcelImporterBase<CyprusImportState> {
55 private static final Logger logger = Logger.getLogger(CyprusExcelImport.class);
56
57 public static Set<String> validMarkers = new HashSet<String>(Arrays.asList(new String[]{"", "valid", "accepted", "a", "v", "t"}));
58 public static Set<String> synonymMarkers = new HashSet<String>(Arrays.asList(new String[]{"", "invalid", "synonym", "s", "i"}));
59
60
61 @Override
62 protected boolean isIgnore(CyprusImportState state) {
63 return ! state.getConfig().isDoTaxa();
64 }
65
66
67 /* (non-Javadoc)
68 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
69 */
70 @Override
71 protected boolean doCheck(CyprusImportState state) {
72 logger.warn("DoCheck not yet implemented for CyprusExcelImport");
73 return true;
74 }
75
76 // protected static final String ID_COLUMN = "Id";
77 protected static final String SPECIES_COLUMN = "species";
78 protected static final String SUBSPECIES_COLUMN = "subspecies";
79 protected static final String GENUS_COLUMN = "genus";
80 protected static final String FAMILY_COLUMN = "family";
81 protected static final String DIVISION_COLUMN = "division";
82 protected static final String HOMOTYPIC_SYNONYM_COLUMN = "homotypic synonyms";
83 protected static final String HETEROTYPIC_SYNONYMS_COLUMN = "heterotypic synonyms";
84 protected static final String ENDEMISM_COLUMN = "endemism";
85
86 protected static final String STATUS_COLUMN = "status";
87 protected static final String RED_DATA_BOOK_CATEGORY_COLUMN = "red data book category";
88 protected static final String SYSTEMATICS_COLUMN = "systematics";
89
90
91
92 // TODO: This enum is for future use (perhaps).
93 protected enum Columns {
94 // Id("Id"),
95 Species("species"),
96 Subspecies("subspecies"),
97 Genus("genus"),
98 Family("family"),
99 Division("division"),
100 HomotypicSynonyms("homotypic synonyms"),
101 HeterotypicSynonyms("heterotypic synonyms"),
102 Status("status"),
103 Endemism("endemism");
104
105 private final String head;
106 private String value;
107
108 private Columns(String head) {
109 this.head = head;
110 }
111
112 public String head() {
113 return this.head;
114 }
115
116 public String value() {
117 return this.value;
118 }
119 }
120
121
122 @Override
123 protected void analyzeRecord(HashMap<String, String> record, CyprusImportState state) {
124
125 Set<String> keys = record.keySet();
126
127 CyprusRow cyprusRow = new CyprusRow();
128 state.setCyprusRow(cyprusRow);
129
130 for (String originalKey: keys) {
131 Integer index = 0;
132 String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString();
133 String[] split = indexedKey.split("_");
134 String key = split[0];
135 if (split.length > 1){
136 String indexString = split[1];
137 try {
138 index = Integer.valueOf(indexString);
139 } catch (NumberFormatException e) {
140 String message = "Index must be integer";
141 logger.error(message);
142 continue;
143 }
144 }
145
146 String value = record.get(indexedKey);
147 if (! StringUtils.isBlank(value)) {
148 if (logger.isDebugEnabled()) { logger.debug(key + ": " + value); }
149 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
150 }else{
151 continue;
152 }
153
154
155 if (key.equalsIgnoreCase(SPECIES_COLUMN)) {
156 // int ivalue = floatString2IntValue(value);
157 cyprusRow.setSpecies(value);
158
159 } else if(key.equalsIgnoreCase(SUBSPECIES_COLUMN)) {
160 cyprusRow.setSubspecies(value);
161
162 } else if(key.equalsIgnoreCase(HOMOTYPIC_SYNONYM_COLUMN)) {
163 cyprusRow.setHomotypicSynonyms(value);
164
165 } else if(key.equalsIgnoreCase(HETEROTYPIC_SYNONYMS_COLUMN)) {
166 cyprusRow.setHeterotypicSynonyms(value);
167
168 } else if(key.equalsIgnoreCase(ENDEMISM_COLUMN)) {
169 cyprusRow.setEndemism(value);
170
171 } else if(key.equalsIgnoreCase(STATUS_COLUMN)) {
172 cyprusRow.setStatus(value);
173
174 } else if(key.equalsIgnoreCase(RED_DATA_BOOK_CATEGORY_COLUMN)) {
175 cyprusRow.setRedDataBookCategory(value);
176
177 } else if(key.equalsIgnoreCase(SYSTEMATICS_COLUMN)) {
178 cyprusRow.setSystematics(value);
179
180 } else if(key.equalsIgnoreCase(GENUS_COLUMN)) {
181 cyprusRow.setGenus(value);
182
183 } else if(key.equalsIgnoreCase(FAMILY_COLUMN)) {
184 cyprusRow.setFamily(value);
185
186 } else if(key.equalsIgnoreCase(DIVISION_COLUMN)) {
187 cyprusRow.setDivision(value);
188
189 } else {
190 state.setUnsuccessfull();
191 logger.error("Unexpected column header " + key);
192 }
193 }
194 return;
195 }
196
197 private static INonViralNameParser nameParser = NonViralNameParserImpl.NewInstance();
198 private static NomenclaturalCode nc = NomenclaturalCode.ICNAFP;
199 private Feature redBookCategory;
200 private Feature endemism;
201 private PresenceAbsenceTerm indigenous;
202 private PresenceAbsenceTerm indigenousDoubtful;
203 private PresenceAbsenceTerm cultivatedDoubtful;
204
205 private PresenceAbsenceTerm casual;
206 private PresenceAbsenceTerm casualDoubtful;
207 private PresenceAbsenceTerm nonInvasive;
208 private PresenceAbsenceTerm nonInvasiveDoubtful;
209 private PresenceAbsenceTerm invasive;
210 private PresenceAbsenceTerm invasiveDoubtful;
211 private PresenceAbsenceTerm questionable;
212 private PresenceAbsenceTerm questionableDoubtful;
213
214 private boolean termsCreated = false;
215
216 private boolean makeTerms(CyprusImportState state) {
217 if (termsCreated == false){
218 IInputTransformer transformer = state.getTransformer();
219
220 try {
221 //feature
222 UUID redBookUuid = transformer.getFeatureUuid("Red book");
223 redBookCategory = this.getFeature(state, redBookUuid, "Red book category", "Red data book category", "Red book", null);
224 getTermService().save(redBookCategory);
225
226 UUID endemismUuid = transformer.getFeatureUuid("Endemism");
227 endemism = this.getFeature(state, endemismUuid, "Endemism", "Endemism", "Endemism", null);
228 getTermService().save(endemism);
229
230 //status
231
232 UUID indigenousUuid = transformer.getPresenceTermUuid("IN");
233 indigenous = this.getPresenceTerm(state, indigenousUuid, "indigenous", "Indigenous", "IN");
234 getTermService().save(indigenous);
235 UUID indigenousDoubtfulUuid = transformer.getPresenceTermUuid("IN?");
236 indigenousDoubtful = this.getPresenceTerm(state, indigenousDoubtfulUuid, "indigenous?", "Indigenous?", "IN?");
237 getTermService().save(indigenousDoubtful);
238
239 UUID cultivatedDoubtfulUuid = transformer.getPresenceTermUuid("CU?");
240 cultivatedDoubtful = this.getPresenceTerm(state, cultivatedDoubtfulUuid, "cultivated?", "Cultivated?", "CU?");
241 getTermService().save(cultivatedDoubtful);
242
243
244 UUID casualUuid = transformer.getPresenceTermUuid("CA");
245 casual = this.getPresenceTerm(state, casualUuid, "casual", "Casual", "CA");
246 getTermService().save(casual);
247 UUID casualDoubtfulUuid = transformer.getPresenceTermUuid("CA?");
248 casualDoubtful = this.getPresenceTerm(state, casualDoubtfulUuid, "casual?", "Casual?", "CA?");
249 getTermService().save(casualDoubtful);
250
251
252 UUID nonInvasiveUuid = transformer.getPresenceTermUuid("NN");
253 nonInvasive = this.getPresenceTerm(state, nonInvasiveUuid, "naturalized non-invasive", "Naturalized non-invasive", "NN");
254 getTermService().save(nonInvasive);
255 UUID nonInvasiveDoubtfulUuid = transformer.getPresenceTermUuid("NN?");
256 nonInvasiveDoubtful = this.getPresenceTerm(state, nonInvasiveDoubtfulUuid, "naturalized non-invasive?", "Naturalized non-invasive?", "NN?");
257 getTermService().save(nonInvasiveDoubtful);
258
259 UUID invasiveUuid = transformer.getPresenceTermUuid("NA");
260 invasive = this.getPresenceTerm(state, invasiveUuid, "naturalized invasive", "Naturalized invasive", "NA");
261 getTermService().save(invasive);
262 UUID invasiveDoubtfulUuid = transformer.getPresenceTermUuid("NA?");
263 invasiveDoubtful = this.getPresenceTerm(state, invasiveDoubtfulUuid, "naturalized invasive?", "Naturalized invasive?", "NA?");
264 getTermService().save(invasiveDoubtful);
265
266 UUID questionableUuid = transformer.getPresenceTermUuid("Q");
267 questionable = this.getPresenceTerm(state, questionableUuid, "questionable", "Questionable", "Q");
268 getTermService().save(questionable);
269 UUID questionableDoubtfulUuid = transformer.getPresenceTermUuid("Q?");
270 questionableDoubtful = this.getPresenceTerm(state, questionableDoubtfulUuid, "questionable?", "Questionable?", "Q?");
271 getTermService().save(questionableDoubtful);
272
273 termsCreated = true;
274
275 return true;
276 } catch (UndefinedTransformerMethodException e) {
277 e.printStackTrace();
278 return false;
279 }
280 }
281 return true;
282
283 }
284
285 /**
286 * Stores taxa records in DB
287 */
288 @Override
289 protected void firstPass(CyprusImportState state) {
290
291 makeTerms(state);
292 CyprusRow taxonLight = state.getCyprusRow();
293 Reference citation = null;
294 String microCitation = null;
295
296 //species name
297 String speciesStr = taxonLight.getSpecies();
298 String subSpeciesStr = taxonLight.getSubspecies();
299 String homotypicSynonymsString = taxonLight.getHomotypicSynonyms();
300 List<String> homotypicSynonymList = Arrays.asList(homotypicSynonymsString.split(";"));
301 String heterotypicSynonymsString = taxonLight.getHeterotypicSynonyms();
302 List<String> heterotypicSynonymList = Arrays.asList(heterotypicSynonymsString.split(";"));
303
304 String systematicsString = taxonLight.getSystematics();
305 String endemismString = taxonLight.getEndemism();
306 String statusString = taxonLight.getStatus();
307 String redBookCategory = taxonLight.getRedDataBookCategory();
308
309 if (StringUtils.isNotBlank(speciesStr)) {
310 boolean speciesIsExisting = false;
311 Taxon mainTaxon = null;
312 //species
313 Taxon speciesTaxon = (Taxon)createTaxon(state, Rank.SPECIES(), speciesStr, Taxon.class, nc);
314 mainTaxon = speciesTaxon;
315
316 //subspecies
317 if (StringUtils.isNotBlank(subSpeciesStr)){
318 Taxon existingSpecies = state.getHigherTaxon(speciesStr);
319 if (existingSpecies != null){
320 speciesIsExisting = true;
321 speciesTaxon = existingSpecies;
322 }
323
324 Taxon subSpeciesTaxon = (Taxon)createTaxon(state, Rank.SUBSPECIES(), subSpeciesStr, Taxon.class, nc);
325
326 if (subSpeciesTaxon != null){
327 makeParent(state, speciesTaxon, subSpeciesTaxon, citation, microCitation);
328 }
329 mainTaxon = subSpeciesTaxon;
330 state.putHigherTaxon(speciesStr, speciesTaxon);
331 }
332
333 if (! speciesIsExisting){
334 makeHigherTaxa(state, taxonLight, speciesTaxon, citation, microCitation);
335 }
336 makeHomotypicSynonyms(state, citation, microCitation, homotypicSynonymList, mainTaxon);
337 makeHeterotypicSynonyms(state, citation, microCitation, heterotypicSynonymList, mainTaxon);
338 makeSystematics(systematicsString, mainTaxon);
339 makeEndemism(endemismString, mainTaxon);
340 makeStatus(statusString, mainTaxon);
341 makeRedBookCategory(redBookCategory, mainTaxon);
342
343 // state.putHigherTaxon(higherName, uuid);//(speciesStr, mainTaxon);
344 getTaxonService().save(mainTaxon);
345 }
346 return;
347 }
348
349
350 private void makeHigherTaxa(CyprusImportState state, CyprusRow taxonLight, Taxon speciesTaxon, Reference citation, String microCitation) {
351 String divisionStr = taxonLight.getDivision();
352 String genusStr = taxonLight.getGenus();
353 String familyStr = taxonLight.getFamily();
354
355 Taxon division = getTaxon(state, divisionStr, Rank.DIVISION(), null, citation, microCitation);
356 Taxon family = getTaxon(state, familyStr, Rank.FAMILY(), division, citation, microCitation);
357 Taxon genus = getTaxon(state, genusStr, Rank.GENUS(), family, citation, microCitation);
358 makeParent(state, genus, speciesTaxon, citation, microCitation) ;
359 }
360
361
362 private Taxon getTaxon(CyprusImportState state, String taxonNameStr, Rank rank, Taxon parent, Reference citation, String microCitation) {
363 Taxon result;
364 if (state.containsHigherTaxon(taxonNameStr)){
365 result = state.getHigherTaxon(taxonNameStr);
366 }else{
367 result = (Taxon)createTaxon(state, rank, taxonNameStr, Taxon.class, nc);
368 state.putHigherTaxon(taxonNameStr, result);
369 if (parent == null){
370 makeParent(state, null,result, citation, microCitation);
371 }else{
372 makeParent(state, parent, result, citation, microCitation);
373 }
374
375 }
376 return result;
377 }
378
379
380 private void makeHomotypicSynonyms(CyprusImportState state,
381 Reference citation, String microCitation, List<String> homotypicSynonymList, Taxon mainTaxon) {
382 for (String homotypicSynonym: homotypicSynonymList){
383 if (StringUtils.isNotBlank(homotypicSynonym)){
384 Synonym synonym = (Synonym)createTaxon(state, null, homotypicSynonym, Synonym.class, nc);
385 mainTaxon.addHomotypicSynonym(synonym, citation, microCitation);
386 }
387 }
388 }
389
390
391 private void makeHeterotypicSynonyms(CyprusImportState state, Reference citation, String microCitation, List<String> heterotypicSynonymList, Taxon mainTaxon) {
392 for (String heterotypicSynonym: heterotypicSynonymList){
393 if (StringUtils.isNotBlank(heterotypicSynonym)){
394 Synonym synonym = (Synonym)createTaxon(state, null, heterotypicSynonym, Synonym.class, nc);
395 mainTaxon.addSynonym(synonym, SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF(), citation, microCitation);
396 }
397 }
398 }
399
400
401 private void makeSystematics(String systematicsString, Taxon mainTaxon) {
402 //Systematics
403 if (StringUtils.isNotBlank(systematicsString)){
404 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
405 TextData textData = TextData.NewInstance(Feature.SYSTEMATICS());
406 textData.putText(Language.UNDETERMINED(), systematicsString);
407 td.addElement(textData);
408 }
409 }
410
411
412 private void makeEndemism(String endemismString, Taxon mainTaxon) {
413 //endemism
414 if (StringUtils.isNotBlank(endemismString)){
415 //OLD - not wanted as marker
416 // boolean flag;
417 // if (endemismString.trim().equalsIgnoreCase("not endemic") || endemismString.trim().equalsIgnoreCase("ne?")){
418 // flag = false;
419 // }else if (endemismString.trim().equalsIgnoreCase("endemic")){
420 // flag = true;
421 // }else{
422 // throw new RuntimeException(endemismString + " is not a valid value for endemism");
423 // }
424 // Marker marker = Marker.NewInstance(MarkerType.ENDEMIC(), flag);
425 // mainTaxon.addMarker(marker);
426 //text data
427 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
428 TextData textData = TextData.NewInstance(endemism);
429 textData.putText(Language.ENGLISH(), endemismString);
430 td.addElement(textData);
431 }
432 }
433
434
435 private void makeStatus(String statusString, Taxon mainTaxon) {
436 //status
437 if (StringUtils.isNotBlank(statusString)){
438 PresenceAbsenceTerm status = null;
439 if (statusString.contains("Indigenous?")){
440 status = indigenousDoubtful;
441 }else if (statusString.contains("Indigenous")){
442 status = indigenous;
443 }else if (statusString.contains("Casual?") || statusString.contains("Causal?")){
444 status = casualDoubtful;
445 }else if (statusString.contains("Casual")){
446 status = casual;
447 }else if (statusString.contains("Cultivated?")){
448 status = cultivatedDoubtful;
449 }else if (statusString.contains("Cultivated")){
450 status = PresenceAbsenceTerm.CULTIVATED();
451 }else if (statusString.contains("non-invasive?")){
452 status = nonInvasiveDoubtful;
453 }else if (statusString.contains("non-invasive")){
454 status = nonInvasive;
455 }else if (statusString.contains("invasive?")){
456 status = invasiveDoubtful;
457 }else if (statusString.contains("invasive")){
458 status = invasive;
459 }else if (statusString.contains("Questionable?")){
460 status = questionableDoubtful;
461 }else if (statusString.contains("Questionable")){
462 status = questionable;
463 }else if (statusString.startsWith("F")){
464 status = null;
465 }else if (statusString.equals("##")){
466 status = null;
467 }else{
468 logger.warn("Unknown status: " + statusString);
469 status = null;
470 }
471 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
472 NamedArea area = TdwgAreaProvider.getAreaByTdwgAbbreviation("CYP");
473 Distribution distribution = Distribution.NewInstance(area, status);
474 td.addElement(distribution);
475
476 //text data
477 TextData textData = TextData.NewInstance(Feature.STATUS());
478 textData.putText(Language.ENGLISH(), statusString);
479 td.addElement(textData);
480 }
481 }
482
483
484 private void makeRedBookCategory(String redBookCategory, Taxon mainTaxon) {
485 //red data book category
486 if (StringUtils.isNotBlank(redBookCategory)){
487 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
488 TextData textData = TextData.NewInstance(this.redBookCategory);
489 textData.putText(Language.ENGLISH(), redBookCategory);
490 td.addElement(textData);
491 }
492 }
493
494
495
496
497 /**
498 * Stores parent-child, synonym and common name relationships
499 */
500 @Override
501 protected void secondPass(CyprusImportState state) {
502 // CyprusRow cyprusRow = state.getCyprusRow();
503 return;
504 }
505
506
507
508 /**
509 * @param state
510 * @param rank
511 * @param taxonNameStr
512 * @param authorStr
513 * @param nameStatus
514 * @param nc
515 * @return
516 */
517 private TaxonBase createTaxon(CyprusImportState state, Rank rank, String taxonNameStr,
518 Class statusClass, NomenclaturalCode nc) {
519 TaxonBase taxonBase;
520 NonViralName taxonNameBase = null;
521 if (nc == NomenclaturalCode.ICVCN){
522 logger.warn("ICVCN not yet supported");
523
524 }else{
525 taxonNameBase =(NonViralName) nc.getNewTaxonNameInstance(rank);
526 //NonViralName nonViralName = (NonViralName)taxonNameBase;
527 INonViralNameParser parser = nameParser;//NonViralNameParserImpl.NewInstance();
528 taxonNameBase = (NonViralName<BotanicalName>)parser.parseFullName(taxonNameStr, nc, rank);
529
530 //taxonNameBase.setNameCache(taxonNameStr);
531
532 }
533
534 //Create the taxon
535 Reference sec = state.getConfig().getSourceReference();
536 // Create the status
537 if (statusClass.equals(Taxon.class)){
538 taxonBase = Taxon.NewInstance(taxonNameBase, sec);
539 }else if (statusClass.equals(Synonym.class)){
540 taxonBase = Synonym.NewInstance(taxonNameBase, sec);
541 }else {
542 Taxon taxon = Taxon.NewInstance(taxonNameBase, sec);
543 taxon.setTaxonStatusUnknown(true);
544 taxonBase = taxon;
545 }
546 return taxonBase;
547 }
548
549 private boolean makeParent(CyprusImportState state, Taxon parentTaxon, Taxon childTaxon, Reference citation, String microCitation){
550 boolean success = true;
551 Reference sec = state.getConfig().getSourceReference();
552
553 // Reference sec = parentTaxon.getSec();
554 Classification tree = state.getTree(sec);
555 if (tree == null){
556 tree = makeTree(state, sec);
557 tree.setTitleCache(state.getConfig().getSourceReferenceTitle());
558 }
559 if (sec.equals(childTaxon.getSec())){
560 success &= (null != tree.addParentChild(parentTaxon, childTaxon, citation, microCitation));
561 }else{
562 logger.warn("No relationship added for child " + childTaxon.getTitleCache());
563 }
564 return success;
565 }
566
567
568
569 }