add voc to cyprus.getFeature
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / cyprus / CyprusExcelImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.cyprus;
11
12 import java.util.Arrays;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Set;
17 import java.util.UUID;
18
19 import org.apache.commons.lang.StringUtils;
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.io.common.mapping.IInputTransformer;
25 import eu.etaxonomy.cdm.io.common.mapping.UndefinedTransformerMethodException;
26 import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
27 import eu.etaxonomy.cdm.model.common.Language;
28 import eu.etaxonomy.cdm.model.description.Distribution;
29 import eu.etaxonomy.cdm.model.description.Feature;
30 import eu.etaxonomy.cdm.model.description.PresenceTerm;
31 import eu.etaxonomy.cdm.model.description.TaxonDescription;
32 import eu.etaxonomy.cdm.model.description.TextData;
33 import eu.etaxonomy.cdm.model.location.NamedArea;
34 import eu.etaxonomy.cdm.model.location.TdwgArea;
35 import eu.etaxonomy.cdm.model.name.BotanicalName;
36 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
37 import eu.etaxonomy.cdm.model.name.NonViralName;
38 import eu.etaxonomy.cdm.model.name.Rank;
39 import eu.etaxonomy.cdm.model.reference.Reference;
40 import eu.etaxonomy.cdm.model.taxon.Classification;
41 import eu.etaxonomy.cdm.model.taxon.Synonym;
42 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
43 import eu.etaxonomy.cdm.model.taxon.Taxon;
44 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
45 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
46 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
47
48 /**
49 * @author a.babadshanjan
50 * @created 08.01.2009
51 * @version 1.0
52 */
53
54 @Component
55 public class CyprusExcelImport extends ExcelImporterBase<CyprusImportState> {
56 private static final Logger logger = Logger.getLogger(CyprusExcelImport.class);
57
58 public static Set<String> validMarkers = new HashSet<String>(Arrays.asList(new String[]{"", "valid", "accepted", "a", "v", "t"}));
59 public static Set<String> synonymMarkers = new HashSet<String>(Arrays.asList(new String[]{"", "invalid", "synonym", "s", "i"}));
60
61
62 @Override
63 protected boolean isIgnore(CyprusImportState state) {
64 return ! state.getConfig().isDoTaxa();
65 }
66
67
68 /* (non-Javadoc)
69 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
70 */
71 @Override
72 protected boolean doCheck(CyprusImportState state) {
73 logger.warn("DoCheck not yet implemented for CyprusExcelImport");
74 return true;
75 }
76
77 // protected static final String ID_COLUMN = "Id";
78 protected static final String SPECIES_COLUMN = "species";
79 protected static final String SUBSPECIES_COLUMN = "subspecies";
80 protected static final String GENUS_COLUMN = "genus";
81 protected static final String FAMILY_COLUMN = "family";
82 protected static final String DIVISION_COLUMN = "division";
83 protected static final String HOMOTYPIC_SYNONYM_COLUMN = "homotypic synonyms";
84 protected static final String HETEROTYPIC_SYNONYMS_COLUMN = "heterotypic synonyms";
85 protected static final String ENDEMISM_COLUMN = "endemism";
86
87 protected static final String STATUS_COLUMN = "status";
88 protected static final String RED_DATA_BOOK_CATEGORY_COLUMN = "red data book category";
89 protected static final String SYSTEMATICS_COLUMN = "systematics";
90
91
92
93 // TODO: This enum is for future use (perhaps).
94 protected enum Columns {
95 // Id("Id"),
96 Species("species"),
97 Subspecies("subspecies"),
98 Genus("genus"),
99 Family("family"),
100 Division("division"),
101 HomotypicSynonyms("homotypic synonyms"),
102 HeterotypicSynonyms("heterotypic synonyms"),
103 Status("status"),
104 Endemism("endemism");
105
106 private String head;
107 private String value;
108
109 private Columns(String head) {
110 this.head = head;
111 }
112
113 public String head() {
114 return this.head;
115 }
116
117 public String value() {
118 return this.value;
119 }
120 }
121
122
123 @Override
124 protected void analyzeRecord(HashMap<String, String> record, CyprusImportState state) {
125
126 Set<String> keys = record.keySet();
127
128 CyprusRow cyprusRow = new CyprusRow();
129 state.setCyprusRow(cyprusRow);
130
131 for (String originalKey: keys) {
132 Integer index = 0;
133 String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString();
134 String[] split = indexedKey.split("_");
135 String key = split[0];
136 if (split.length > 1){
137 String indexString = split[1];
138 try {
139 index = Integer.valueOf(indexString);
140 } catch (NumberFormatException e) {
141 String message = "Index must be integer";
142 logger.error(message);
143 continue;
144 }
145 }
146
147 String value = (String) record.get(indexedKey);
148 if (! StringUtils.isBlank(value)) {
149 if (logger.isDebugEnabled()) { logger.debug(key + ": " + value); }
150 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
151 }else{
152 continue;
153 }
154
155
156 if (key.equalsIgnoreCase(SPECIES_COLUMN)) {
157 // int ivalue = floatString2IntValue(value);
158 cyprusRow.setSpecies(value);
159
160 } else if(key.equalsIgnoreCase(SUBSPECIES_COLUMN)) {
161 cyprusRow.setSubspecies(value);
162
163 } else if(key.equalsIgnoreCase(HOMOTYPIC_SYNONYM_COLUMN)) {
164 cyprusRow.setHomotypicSynonyms(value);
165
166 } else if(key.equalsIgnoreCase(HETEROTYPIC_SYNONYMS_COLUMN)) {
167 cyprusRow.setHeterotypicSynonyms(value);
168
169 } else if(key.equalsIgnoreCase(ENDEMISM_COLUMN)) {
170 cyprusRow.setEndemism(value);
171
172 } else if(key.equalsIgnoreCase(STATUS_COLUMN)) {
173 cyprusRow.setStatus(value);
174
175 } else if(key.equalsIgnoreCase(RED_DATA_BOOK_CATEGORY_COLUMN)) {
176 cyprusRow.setRedDataBookCategory(value);
177
178 } else if(key.equalsIgnoreCase(SYSTEMATICS_COLUMN)) {
179 cyprusRow.setSystematics(value);
180
181 } else if(key.equalsIgnoreCase(GENUS_COLUMN)) {
182 cyprusRow.setGenus(value);
183
184 } else if(key.equalsIgnoreCase(FAMILY_COLUMN)) {
185 cyprusRow.setFamily(value);
186
187 } else if(key.equalsIgnoreCase(DIVISION_COLUMN)) {
188 cyprusRow.setDivision(value);
189
190 } else {
191 state.setUnsuccessfull();
192 logger.error("Unexpected column header " + key);
193 }
194 }
195 return;
196 }
197
198 private static INonViralNameParser nameParser = NonViralNameParserImpl.NewInstance();
199 private static NomenclaturalCode nc = NomenclaturalCode.ICBN;
200 private Feature redBookCategory;
201 private Feature endemism;
202 private PresenceTerm indigenous;
203 private PresenceTerm indigenousDoubtful;
204 private PresenceTerm cultivatedDoubtful;
205
206 private PresenceTerm casual;
207 private PresenceTerm casualDoubtful;
208 private PresenceTerm nonInvasive;
209 private PresenceTerm nonInvasiveDoubtful;
210 private PresenceTerm invasive;
211 private PresenceTerm invasiveDoubtful;
212 private PresenceTerm questionable;
213 private PresenceTerm questionableDoubtful;
214
215 private boolean termsCreated = false;
216
217 private boolean makeTerms(CyprusImportState state) {
218 if (termsCreated == false){
219 IInputTransformer transformer = state.getTransformer();
220
221 try {
222 //feature
223 UUID redBookUuid = transformer.getFeatureUuid("Red book");
224 redBookCategory = this.getFeature(state, redBookUuid, "Red book category", "Red data book category", "Red book", null);
225 getTermService().save(redBookCategory);
226
227 UUID endemismUuid = transformer.getFeatureUuid("Endemism");
228 endemism = this.getFeature(state, endemismUuid, "Endemism", "Endemism", "Endemism", null);
229 getTermService().save(endemism);
230
231 //status
232
233 UUID indigenousUuid = transformer.getPresenceTermUuid("IN");
234 indigenous = this.getPresenceTerm(state, indigenousUuid, "indigenous", "Indigenous", "IN");
235 getTermService().save(indigenous);
236 UUID indigenousDoubtfulUuid = transformer.getPresenceTermUuid("IN?");
237 indigenousDoubtful = this.getPresenceTerm(state, indigenousDoubtfulUuid, "indigenous?", "Indigenous?", "IN?");
238 getTermService().save(indigenousDoubtful);
239
240 UUID cultivatedDoubtfulUuid = transformer.getPresenceTermUuid("CU?");
241 cultivatedDoubtful = this.getPresenceTerm(state, cultivatedDoubtfulUuid, "cultivated?", "Cultivated?", "CU?");
242 getTermService().save(cultivatedDoubtful);
243
244
245 UUID casualUuid = transformer.getPresenceTermUuid("CA");
246 casual = this.getPresenceTerm(state, casualUuid, "casual", "Casual", "CA");
247 getTermService().save(casual);
248 UUID casualDoubtfulUuid = transformer.getPresenceTermUuid("CA?");
249 casualDoubtful = this.getPresenceTerm(state, casualDoubtfulUuid, "casual?", "Casual?", "CA?");
250 getTermService().save(casualDoubtful);
251
252
253 UUID nonInvasiveUuid = transformer.getPresenceTermUuid("NN");
254 nonInvasive = this.getPresenceTerm(state, nonInvasiveUuid, "naturalized non-invasive", "Naturalized non-invasive", "NN");
255 getTermService().save(nonInvasive);
256 UUID nonInvasiveDoubtfulUuid = transformer.getPresenceTermUuid("NN?");
257 nonInvasiveDoubtful = this.getPresenceTerm(state, nonInvasiveDoubtfulUuid, "naturalized non-invasive?", "Naturalized non-invasive?", "NN?");
258 getTermService().save(nonInvasiveDoubtful);
259
260 UUID invasiveUuid = transformer.getPresenceTermUuid("NA");
261 invasive = this.getPresenceTerm(state, invasiveUuid, "naturalized invasive", "Naturalized invasive", "NA");
262 getTermService().save(invasive);
263 UUID invasiveDoubtfulUuid = transformer.getPresenceTermUuid("NA?");
264 invasiveDoubtful = this.getPresenceTerm(state, invasiveDoubtfulUuid, "naturalized invasive?", "Naturalized invasive?", "NA?");
265 getTermService().save(invasiveDoubtful);
266
267 UUID questionableUuid = transformer.getPresenceTermUuid("Q");
268 questionable = this.getPresenceTerm(state, questionableUuid, "questionable", "Questionable", "Q");
269 getTermService().save(questionable);
270 UUID questionableDoubtfulUuid = transformer.getPresenceTermUuid("Q?");
271 questionableDoubtful = this.getPresenceTerm(state, questionableDoubtfulUuid, "questionable?", "Questionable?", "Q?");
272 getTermService().save(questionableDoubtful);
273
274 termsCreated = true;
275
276 return true;
277 } catch (UndefinedTransformerMethodException e) {
278 e.printStackTrace();
279 return false;
280 }
281 }
282 return true;
283
284 }
285
286 /**
287 * Stores taxa records in DB
288 */
289 @Override
290 protected void firstPass(CyprusImportState state) {
291
292 makeTerms(state);
293 CyprusRow taxonLight = state.getCyprusRow();
294 Reference citation = null;
295 String microCitation = null;
296
297 //species name
298 String speciesStr = taxonLight.getSpecies();
299 String subSpeciesStr = taxonLight.getSubspecies();
300 String homotypicSynonymsString = taxonLight.getHomotypicSynonyms();
301 List<String> homotypicSynonymList = Arrays.asList(homotypicSynonymsString.split(";"));
302 String heterotypicSynonymsString = taxonLight.getHeterotypicSynonyms();
303 List<String> heterotypicSynonymList = Arrays.asList(heterotypicSynonymsString.split(";"));
304
305 String systematicsString = taxonLight.getSystematics();
306 String endemismString = taxonLight.getEndemism();
307 String statusString = taxonLight.getStatus();
308 String redBookCategory = taxonLight.getRedDataBookCategory();
309
310 if (StringUtils.isNotBlank(speciesStr)) {
311 boolean speciesIsExisting = false;
312 Taxon mainTaxon = null;
313 //species
314 Taxon speciesTaxon = (Taxon)createTaxon(state, Rank.SPECIES(), speciesStr, Taxon.class, nc);
315 mainTaxon = speciesTaxon;
316
317 //subspecies
318 if (StringUtils.isNotBlank(subSpeciesStr)){
319 Taxon existingSpecies = state.getHigherTaxon(speciesStr);
320 if (existingSpecies != null){
321 speciesIsExisting = true;
322 speciesTaxon = existingSpecies;
323 }
324
325 Taxon subSpeciesTaxon = (Taxon)createTaxon(state, Rank.SUBSPECIES(), subSpeciesStr, Taxon.class, nc);
326
327 if (subSpeciesTaxon != null){
328 makeParent(state, speciesTaxon, subSpeciesTaxon, citation, microCitation);
329 }
330 mainTaxon = subSpeciesTaxon;
331 state.putHigherTaxon(speciesStr, speciesTaxon);
332 }
333
334 if (! speciesIsExisting){
335 makeHigherTaxa(state, taxonLight, speciesTaxon, citation, microCitation);
336 }
337 makeHomotypicSynonyms(state, citation, microCitation, homotypicSynonymList, mainTaxon);
338 makeHeterotypicSynonyms(state, citation, microCitation, heterotypicSynonymList, mainTaxon);
339 makeSystematics(systematicsString, mainTaxon);
340 makeEndemism(endemismString, mainTaxon);
341 makeStatus(statusString, mainTaxon);
342 makeRedBookCategory(redBookCategory, mainTaxon);
343
344 // state.putHigherTaxon(higherName, uuid);//(speciesStr, mainTaxon);
345 getTaxonService().save(mainTaxon);
346 }
347 return;
348 }
349
350
351 private void makeHigherTaxa(CyprusImportState state, CyprusRow taxonLight, Taxon speciesTaxon, Reference citation, String microCitation) {
352 String divisionStr = taxonLight.getDivision();
353 String genusStr = taxonLight.getGenus();
354 String familyStr = taxonLight.getFamily();
355
356 Taxon division = getTaxon(state, divisionStr, Rank.DIVISION(), null, citation, microCitation);
357 Taxon family = getTaxon(state, familyStr, Rank.FAMILY(), division, citation, microCitation);
358 Taxon genus = getTaxon(state, genusStr, Rank.GENUS(), family, citation, microCitation);
359 makeParent(state, genus, speciesTaxon, citation, microCitation) ;
360 }
361
362
363 private Taxon getTaxon(CyprusImportState state, String taxonNameStr, Rank rank, Taxon parent, Reference citation, String microCitation) {
364 Taxon result;
365 if (state.containsHigherTaxon(taxonNameStr)){
366 result = state.getHigherTaxon(taxonNameStr);
367 }else{
368 result = (Taxon)createTaxon(state, rank, taxonNameStr, Taxon.class, nc);
369 state.putHigherTaxon(taxonNameStr, result);
370 if (parent == null){
371 makeParent(state, null,result, citation, microCitation);
372 }else{
373 makeParent(state, parent, result, citation, microCitation);
374 }
375
376 }
377 return result;
378 }
379
380
381 private void makeHomotypicSynonyms(CyprusImportState state,
382 Reference citation, String microCitation, List<String> homotypicSynonymList, Taxon mainTaxon) {
383 for (String homotypicSynonym: homotypicSynonymList){
384 if (StringUtils.isNotBlank(homotypicSynonym)){
385 Synonym synonym = (Synonym)createTaxon(state, null, homotypicSynonym, Synonym.class, nc);
386 mainTaxon.addHomotypicSynonym(synonym, citation, microCitation);
387 }
388 }
389 }
390
391
392 private void makeHeterotypicSynonyms(CyprusImportState state, Reference citation, String microCitation, List<String> heterotypicSynonymList, Taxon mainTaxon) {
393 for (String heterotypicSynonym: heterotypicSynonymList){
394 if (StringUtils.isNotBlank(heterotypicSynonym)){
395 Synonym synonym = (Synonym)createTaxon(state, null, heterotypicSynonym, Synonym.class, nc);
396 mainTaxon.addSynonym(synonym, SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF(), citation, microCitation);
397 }
398 }
399 }
400
401
402 private void makeSystematics(String systematicsString, Taxon mainTaxon) {
403 //Systematics
404 if (StringUtils.isNotBlank(systematicsString)){
405 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
406 TextData textData = TextData.NewInstance(Feature.SYSTEMATICS());
407 textData.putText(Language.UNDETERMINED(), systematicsString);
408 td.addElement(textData);
409 }
410 }
411
412
413 private void makeEndemism(String endemismString, Taxon mainTaxon) {
414 //endemism
415 if (StringUtils.isNotBlank(endemismString)){
416 //OLD - not wanted as marker
417 // boolean flag;
418 // if (endemismString.trim().equalsIgnoreCase("not endemic") || endemismString.trim().equalsIgnoreCase("ne?")){
419 // flag = false;
420 // }else if (endemismString.trim().equalsIgnoreCase("endemic")){
421 // flag = true;
422 // }else{
423 // throw new RuntimeException(endemismString + " is not a valid value for endemism");
424 // }
425 // Marker marker = Marker.NewInstance(MarkerType.ENDEMIC(), flag);
426 // mainTaxon.addMarker(marker);
427 //text data
428 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
429 TextData textData = TextData.NewInstance(endemism);
430 textData.putText(Language.ENGLISH(), endemismString);
431 td.addElement(textData);
432 }
433 }
434
435
436 private void makeStatus(String statusString, Taxon mainTaxon) {
437 //status
438 if (StringUtils.isNotBlank(statusString)){
439 PresenceTerm status = null;
440 if (statusString.contains("Indigenous?")){
441 status = indigenousDoubtful;
442 }else if (statusString.contains("Indigenous")){
443 status = indigenous;
444 }else if (statusString.contains("Casual?") || statusString.contains("Causal?")){
445 status = casualDoubtful;
446 }else if (statusString.contains("Casual")){
447 status = casual;
448 }else if (statusString.contains("Cultivated?")){
449 status = cultivatedDoubtful;
450 }else if (statusString.contains("Cultivated")){
451 status = PresenceTerm.CULTIVATED();
452 }else if (statusString.contains("non-invasive?")){
453 status = nonInvasiveDoubtful;
454 }else if (statusString.contains("non-invasive")){
455 status = nonInvasive;
456 }else if (statusString.contains("invasive?")){
457 status = invasiveDoubtful;
458 }else if (statusString.contains("invasive")){
459 status = invasive;
460 }else if (statusString.contains("Questionable?")){
461 status = questionableDoubtful;
462 }else if (statusString.contains("Questionable")){
463 status = questionable;
464 }else if (statusString.startsWith("F")){
465 status = null;
466 }else if (statusString.equals("##")){
467 status = null;
468 }else{
469 logger.warn("Unknown status: " + statusString);
470 status = null;
471 }
472 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
473 NamedArea area = TdwgArea.getAreaByTdwgAbbreviation("CYP");
474 Distribution distribution = Distribution.NewInstance(area, status);
475 td.addElement(distribution);
476
477 //text data
478 TextData textData = TextData.NewInstance(Feature.STATUS());
479 textData.putText(Language.ENGLISH(), statusString);
480 td.addElement(textData);
481 }
482 }
483
484
485 private void makeRedBookCategory(String redBookCategory, Taxon mainTaxon) {
486 //red data book category
487 if (StringUtils.isNotBlank(redBookCategory)){
488 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
489 TextData textData = TextData.NewInstance(this.redBookCategory);
490 textData.putText(Language.ENGLISH(), redBookCategory);
491 td.addElement(textData);
492 }
493 }
494
495
496
497
498 /**
499 * Stores parent-child, synonym and common name relationships
500 */
501 @Override
502 protected void secondPass(CyprusImportState state) {
503 // CyprusRow cyprusRow = state.getCyprusRow();
504 return;
505 }
506
507
508
509 /**
510 * @param state
511 * @param rank
512 * @param taxonNameStr
513 * @param authorStr
514 * @param nameStatus
515 * @param nc
516 * @return
517 */
518 private TaxonBase createTaxon(CyprusImportState state, Rank rank, String taxonNameStr,
519 Class statusClass, NomenclaturalCode nc) {
520 TaxonBase taxonBase;
521 NonViralName taxonNameBase = null;
522 if (nc == NomenclaturalCode.ICVCN){
523 logger.warn("ICVCN not yet supported");
524
525 }else{
526 taxonNameBase =(NonViralName) nc.getNewTaxonNameInstance(rank);
527 //NonViralName nonViralName = (NonViralName)taxonNameBase;
528 INonViralNameParser parser = nameParser;//NonViralNameParserImpl.NewInstance();
529 taxonNameBase = (NonViralName<BotanicalName>)parser.parseFullName(taxonNameStr, nc, rank);
530
531 //taxonNameBase.setNameCache(taxonNameStr);
532
533 }
534
535 //Create the taxon
536 Reference sec = state.getConfig().getSourceReference();
537 // Create the status
538 if (statusClass.equals(Taxon.class)){
539 taxonBase = Taxon.NewInstance(taxonNameBase, sec);
540 }else if (statusClass.equals(Synonym.class)){
541 taxonBase = Synonym.NewInstance(taxonNameBase, sec);
542 }else {
543 Taxon taxon = Taxon.NewInstance(taxonNameBase, sec);
544 taxon.setTaxonStatusUnknown(true);
545 taxonBase = taxon;
546 }
547 return taxonBase;
548 }
549
550 private boolean makeParent(CyprusImportState state, Taxon parentTaxon, Taxon childTaxon, Reference citation, String microCitation){
551 boolean success = true;
552 Reference sec = state.getConfig().getSourceReference();
553
554 // Reference sec = parentTaxon.getSec();
555 Classification tree = state.getTree(sec);
556 if (tree == null){
557 tree = makeTree(state, sec);
558 tree.setTitleCache(state.getConfig().getSourceReferenceTitle());
559 }
560 if (sec.equals(childTaxon.getSec())){
561 success &= (null != tree.addParentChild(parentTaxon, childTaxon, citation, microCitation));
562 }else{
563 logger.warn("No relationship added for child " + childTaxon.getTitleCache());
564 }
565 return success;
566 }
567
568
569
570 }