latest changes for cyprus import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / cyprus / CyprusExcelImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.cyprus;
11
12 import java.util.Arrays;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.List;
16 import java.util.Set;
17 import java.util.UUID;
18
19 import org.apache.commons.lang.StringUtils;
20 import org.apache.log4j.Logger;
21 import org.springframework.stereotype.Component;
22
23 import eu.etaxonomy.cdm.common.CdmUtils;
24 import eu.etaxonomy.cdm.io.excel.common.ExcelImporterBase;
25 import eu.etaxonomy.cdm.model.common.Language;
26 import eu.etaxonomy.cdm.model.common.Marker;
27 import eu.etaxonomy.cdm.model.common.MarkerType;
28 import eu.etaxonomy.cdm.model.description.Distribution;
29 import eu.etaxonomy.cdm.model.description.Feature;
30 import eu.etaxonomy.cdm.model.description.PresenceTerm;
31 import eu.etaxonomy.cdm.model.description.TaxonDescription;
32 import eu.etaxonomy.cdm.model.description.TextData;
33 import eu.etaxonomy.cdm.model.location.NamedArea;
34 import eu.etaxonomy.cdm.model.location.TdwgArea;
35 import eu.etaxonomy.cdm.model.name.BotanicalName;
36 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
37 import eu.etaxonomy.cdm.model.name.NonViralName;
38 import eu.etaxonomy.cdm.model.name.Rank;
39 import eu.etaxonomy.cdm.model.reference.Reference;
40 import eu.etaxonomy.cdm.model.taxon.Classification;
41 import eu.etaxonomy.cdm.model.taxon.Synonym;
42 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
43 import eu.etaxonomy.cdm.model.taxon.Taxon;
44 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
45 import eu.etaxonomy.cdm.strategy.parser.INonViralNameParser;
46 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
47
48 /**
49 * @author a.babadshanjan
50 * @created 08.01.2009
51 * @version 1.0
52 */
53
54 @Component
55 public class CyprusExcelImport extends ExcelImporterBase<CyprusImportState> {
56 private static final Logger logger = Logger.getLogger(CyprusExcelImport.class);
57
58 public static Set<String> validMarkers = new HashSet<String>(Arrays.asList(new String[]{"", "valid", "accepted", "a", "v", "t"}));
59 public static Set<String> synonymMarkers = new HashSet<String>(Arrays.asList(new String[]{"", "invalid", "synonym", "s", "i"}));
60
61
62 @Override
63 protected boolean isIgnore(CyprusImportState state) {
64 return false;
65 }
66
67
68 /* (non-Javadoc)
69 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IoStateBase)
70 */
71 @Override
72 protected boolean doCheck(CyprusImportState state) {
73 logger.warn("DoCheck not yet implemented for CyprusExcelImport");
74 return true;
75 }
76
77 // protected static final String ID_COLUMN = "Id";
78 protected static final String SPECIES_COLUMN = "species";
79 protected static final String SUBSPECIES_COLUMN = "subspecies";
80 protected static final String GENUS_COLUMN = "genus";
81 protected static final String FAMILY_COLUMN = "family";
82 protected static final String DIVISION_COLUMN = "division";
83 protected static final String HOMOTYPIC_SYNONYM_COLUMN = "homotypic synonyms";
84 protected static final String HETEROTYPIC_SYNONYMS_COLUMN = "heterotypic synonyms";
85 protected static final String ENDEMISM_COLUMN = "endemism";
86
87 protected static final String STATUS_COLUMN = "status";
88 protected static final String RED_DATA_BOOK_CATEGORY_COLUMN = "red data book category";
89 protected static final String SYSTEMATICS_COLUMN = "systematics";
90
91
92
93 // TODO: This enum is for future use (perhaps).
94 protected enum Columns {
95 // Id("Id"),
96 Species("species"),
97 Subspecies("subspecies"),
98 Genus("genus"),
99 Family("family"),
100 Division("division"),
101 HomotypicSynonyms("homotypic synonyms"),
102 HeterotypicSynonyms("heterotypic synonyms"),
103 Status("status"),
104 Endemism("endemism");
105
106 private String head;
107 private String value;
108
109 private Columns(String head) {
110 this.head = head;
111 }
112
113 public String head() {
114 return this.head;
115 }
116
117 public String value() {
118 return this.value;
119 }
120 }
121
122
123 @Override
124 protected boolean analyzeRecord(HashMap<String, String> record, CyprusImportState state) {
125
126 boolean success = true;
127 Set<String> keys = record.keySet();
128
129 CyprusRow cyprusRow = new CyprusRow();
130 state.setCyprusRow(cyprusRow);
131
132 for (String originalKey: keys) {
133 Integer index = 0;
134 String indexedKey = CdmUtils.removeDuplicateWhitespace(originalKey.trim()).toString();
135 String[] split = indexedKey.split("_");
136 String key = split[0];
137 if (split.length > 1){
138 String indexString = split[1];
139 try {
140 index = Integer.valueOf(indexString);
141 } catch (NumberFormatException e) {
142 String message = "Index must be integer";
143 logger.error(message);
144 continue;
145 }
146 }
147
148 String value = (String) record.get(indexedKey);
149 if (! StringUtils.isBlank(value)) {
150 if (logger.isDebugEnabled()) { logger.debug(key + ": " + value); }
151 value = CdmUtils.removeDuplicateWhitespace(value.trim()).toString();
152 }else{
153 continue;
154 }
155
156
157 if (key.equalsIgnoreCase(SPECIES_COLUMN)) {
158 // int ivalue = floatString2IntValue(value);
159 cyprusRow.setSpecies(value);
160
161 } else if(key.equalsIgnoreCase(SUBSPECIES_COLUMN)) {
162 cyprusRow.setSubspecies(value);
163
164 } else if(key.equalsIgnoreCase(HOMOTYPIC_SYNONYM_COLUMN)) {
165 cyprusRow.setHomotypicSynonyms(value);
166
167 } else if(key.equalsIgnoreCase(HETEROTYPIC_SYNONYMS_COLUMN)) {
168 cyprusRow.setHeterotypicSynonyms(value);
169
170 } else if(key.equalsIgnoreCase(ENDEMISM_COLUMN)) {
171 cyprusRow.setEndemism(value);
172
173 } else if(key.equalsIgnoreCase(STATUS_COLUMN)) {
174 cyprusRow.setStatus(value);
175
176 } else if(key.equalsIgnoreCase(RED_DATA_BOOK_CATEGORY_COLUMN)) {
177 cyprusRow.setRedDataBookCategory(value);
178
179 } else if(key.equalsIgnoreCase(SYSTEMATICS_COLUMN)) {
180 cyprusRow.setSystematics(value);
181
182 } else if(key.equalsIgnoreCase(GENUS_COLUMN)) {
183 cyprusRow.setGenus(value);
184
185 } else if(key.equalsIgnoreCase(FAMILY_COLUMN)) {
186 cyprusRow.setFamily(value);
187
188 } else if(key.equalsIgnoreCase(DIVISION_COLUMN)) {
189 cyprusRow.setDivision(value);
190
191 } else {
192 success = false;
193 logger.error("Unexpected column header " + key);
194 }
195 }
196 return success;
197 }
198
199 private static INonViralNameParser nameParser = NonViralNameParserImpl.NewInstance();
200 private static NomenclaturalCode nc = NomenclaturalCode.ICBN;
201 private Feature redBookCategory;
202 private PresenceTerm indigenous;
203 private PresenceTerm casual;
204 private PresenceTerm nonInvasive;
205 private PresenceTerm invasive;
206 private PresenceTerm questionable;
207
208 private void makeTerms(CyprusImportState state) {
209
210 UUID redBookUuid = UUID.fromString("df59d44a-ee5a-4c01-8637-127cc804842d");
211 redBookCategory = this.getFeature(state, redBookUuid, "Red book category", "Red data book category", "Red book");
212 getTermService().save(redBookCategory);
213
214 UUID indigenousUuid = UUID.fromString("b325859b-504b-45e0-9ef0-d5c1602fcc0f");
215 indigenous = this.getPresenceTerm(state, indigenousUuid, "Indigenous", "Indigenous", "IN");
216 getTermService().save(indigenous);
217
218 UUID casualUuid = UUID.fromString("5e81353c-38a3-4ca6-b979-0d9abc93b877");
219 casual = this.getPresenceTerm(state, casualUuid, "Casual", "Casual", "CA");
220 getTermService().save(redBookCategory);
221
222 UUID nonInvasiveUuid = UUID.fromString("1b025e8b-901a-42e8-9739-119b410c6f03");
223 nonInvasive = this.getPresenceTerm(state, nonInvasiveUuid, "Naturalized non-invasive", "Naturalized non-invasive", "NN");
224 getTermService().save(nonInvasive);
225
226 UUID invasiveUuid = UUID.fromString("faf2d271-868a-4bf7-b0b8-a1c5ab309de2");
227 invasive = this.getPresenceTerm(state, invasiveUuid, "Naturalized invasive", "Naturalized invasive", "NA");
228 getTermService().save(invasive);
229
230
231 UUID questionableUuid = UUID.fromString("4b48f675-a6cf-49f3-a5ba-77e2c2979eb3");
232 questionable = this.getPresenceTerm(state, questionableUuid, "Questionable", "Questionable", "Q");
233 getTermService().save(questionable);
234
235
236 // UUID redBookUuid = UUID.fromString("d8416d46-b5b4-45d5-b26b-9bda4fa491c9");
237 // term = this.getPresenceTerm(state, redBookUuid, "Red book category", "Red data book category", "Red book");
238 // getTermService().save(term);
239 //
240 // UUID redBookUuid = UUID.fromString("813a58bd-f8ab-4a80-9029-87a112dbb59f");
241 // term = this.getPresenceTerm(state, redBookUuid, "Red book category", "Red data book category", "Red book");
242 // getTermService().save(term);
243
244
245
246 // PresenceTerm status = null;
247 // }else if (statusString.contains("Cultivated")){
248 // status = PresenceTerm.CULTIVATED();
249 // }else if (statusString.contains("non-invasive")){
250 // //FIXME
251 // status = PresenceTerm.NATURALISED();
252 // }else if (statusString.contains("invasive")){
253 // //FIXME
254 // status = PresenceTerm.NATURALISED();
255 }
256
257 /**
258 * Stores taxa records in DB
259 */
260 @Override
261 protected boolean firstPass(CyprusImportState state) {
262 boolean success = true;
263 makeTerms(state);
264 CyprusRow taxonLight = state.getCyprusRow();
265 Reference citation = null;
266 String microCitation = null;
267
268 //species name
269 String speciesStr = taxonLight.getSpecies();
270 String subSpeciesStr = taxonLight.getSubspecies();
271 String homotypicSynonymsString = taxonLight.getHomotypicSynonyms();
272 List<String> homotypicSynonymList = Arrays.asList(homotypicSynonymsString.split(";"));
273 String heterotypicSynonymsString = taxonLight.getHeterotypicSynonyms();
274 List<String> heterotypicSynonymList = Arrays.asList(heterotypicSynonymsString.split(";"));
275
276 String systematicsString = taxonLight.getSystematics();
277 String endemismString = taxonLight.getEndemism();
278 String statusString = taxonLight.getStatus();
279 String redBookCategory = taxonLight.getRedDataBookCategory();
280
281 // BotanicalName subSpeciesName = (BotanicalName)nameParser.parseSimpleName(subSpeciesStr, nc, Rank.SUBSPECIES());
282 // BotanicalName speciesName = (BotanicalName)nameParser.parseSimpleName(speciesStr, nc, Rank.SPECIES());
283
284 // Classification classification = null;
285
286 if (StringUtils.isNotBlank(speciesStr)) {
287 Taxon mainTaxon = null;
288 Taxon speciesTaxon = (Taxon)createTaxon(state, Rank.SPECIES(), speciesStr, Taxon.class, nc);
289 mainTaxon = speciesTaxon;
290 if (StringUtils.isNotBlank(subSpeciesStr)){
291 Taxon subSpeciesTaxon = (Taxon)createTaxon(state, Rank.SUBSPECIES(), subSpeciesStr, Taxon.class, nc);
292
293 if (subSpeciesTaxon != null){
294 makeParent(state, speciesTaxon, subSpeciesTaxon, citation, microCitation);
295 }
296 mainTaxon = subSpeciesTaxon;
297 }
298
299 makeHigherTaxa(state, taxonLight, speciesTaxon, citation, microCitation);
300 makeHomotypicSynonyms(state, citation, microCitation, homotypicSynonymList, mainTaxon);
301 makeHeterotypicSynonyms(state, citation, microCitation, heterotypicSynonymList, mainTaxon);
302 makeSystematics(systematicsString, mainTaxon);
303 makeEndemism(endemismString, mainTaxon);
304 makeStatus(statusString, mainTaxon);
305 makeRedBookCategory(redBookCategory, mainTaxon);
306
307 // state.putHigherTaxon(higherName, uuid);//(speciesStr, mainTaxon);
308 getTaxonService().save(mainTaxon);
309 }
310 return success;
311 }
312
313
314 private void makeHigherTaxa(CyprusImportState state, CyprusRow taxonLight, Taxon speciesTaxon, Reference citation, String microCitation) {
315 String divisionStr = taxonLight.getDivision();
316 String genusStr = taxonLight.getGenus();
317 String familyStr = taxonLight.getFamily();
318
319 Taxon division = getTaxon(state, divisionStr, Rank.DIVISION(), null, citation, microCitation);
320 Taxon family = getTaxon(state, familyStr, Rank.FAMILY(), division, citation, microCitation);
321 Taxon genus = getTaxon(state, genusStr, Rank.GENUS(), family, citation, microCitation);
322 makeParent(state, genus, speciesTaxon, citation, microCitation) ;
323 }
324
325
326 private Taxon getTaxon(CyprusImportState state, String taxonNameStr, Rank rank, Taxon parent, Reference citation, String microCitation) {
327 Taxon result;
328 if (state.containsHigherTaxon(taxonNameStr)){
329 result = state.getHigherTaxon(taxonNameStr);
330 }else{
331 result = (Taxon)createTaxon(state, rank, taxonNameStr, Taxon.class, nc);
332 state.putHigherTaxon(taxonNameStr, result);
333 if (parent == null){
334 makeParent(state, null,result, citation, microCitation);
335 }else{
336 makeParent(state, parent, result, citation, microCitation);
337 }
338
339 }
340 return result;
341 }
342
343
344 private void makeHomotypicSynonyms(CyprusImportState state,
345 Reference citation, String microCitation, List<String> homotypicSynonymList, Taxon mainTaxon) {
346 for (String homotypicSynonym: homotypicSynonymList){
347 if (StringUtils.isNotBlank(homotypicSynonym)){
348 Synonym synonym = (Synonym)createTaxon(state, null, homotypicSynonym, Synonym.class, nc);
349 mainTaxon.addHomotypicSynonym(synonym, citation, microCitation);
350 }
351 }
352 }
353
354
355 private void makeHeterotypicSynonyms(CyprusImportState state, Reference citation, String microCitation, List<String> heterotypicSynonymList, Taxon mainTaxon) {
356 for (String heterotypicSynonym: heterotypicSynonymList){
357 if (StringUtils.isNotBlank(heterotypicSynonym)){
358 Synonym synonym = (Synonym)createTaxon(state, null, heterotypicSynonym, Synonym.class, nc);
359 mainTaxon.addSynonym(synonym, SynonymRelationshipType.HETEROTYPIC_SYNONYM_OF(), citation, microCitation);
360 }
361 }
362 }
363
364
365 private void makeSystematics(String systematicsString, Taxon mainTaxon) {
366 //Systematics
367 if (StringUtils.isNotBlank(systematicsString)){
368 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
369 //FIXME feature type
370 TextData textData = TextData.NewInstance(Feature.SYSTEMATICS());
371 textData.putText(systematicsString, Language.UNDETERMINED());
372 td.addElement(textData);
373 }
374 }
375
376
377 private void makeEndemism(String endemismString, Taxon mainTaxon) {
378 //endemism
379 if (StringUtils.isNotBlank(endemismString)){
380 boolean flag;
381 if (endemismString.trim().equalsIgnoreCase("not endemic") || endemismString.trim().equalsIgnoreCase("ne?")){
382 flag = false;
383 }else if (endemismString.trim().equalsIgnoreCase("endemic")){
384 flag = true;
385 }else{
386 throw new RuntimeException(endemismString + " is not a valid value for endemism");
387 }
388 //FIXME marker type
389 Marker marker = Marker.NewInstance(MarkerType.ENDEMIC(), flag);
390 mainTaxon.addMarker(marker);
391 }
392 }
393
394
395 private void makeStatus(String statusString, Taxon mainTaxon) {
396 //status
397 //FIXME doubtful
398 if (StringUtils.isNotBlank(statusString)){
399 PresenceTerm status = null;
400 if (statusString.contains("Indigenous")){
401 status = indigenous;
402 }else if (statusString.contains("Casual") || statusString.contains("Causal")){
403 status = casual;
404 }else if (statusString.contains("Cultivated")){
405 status = PresenceTerm.CULTIVATED();
406 }else if (statusString.contains("non-invasive")){
407 status = nonInvasive;
408 }else if (statusString.contains("invasive")){
409 status = invasive;
410 }else if (statusString.contains("Questionable")){
411 // status = PresenceTerm.NATIVE_PRESENCE_QUESTIONABLE();
412 status = questionable;
413 }else if (statusString.startsWith("F")){
414 //FIXME
415 status = PresenceTerm.NATIVE_PRESENCE_QUESTIONABLE();
416 }else if (statusString.equals("##")){
417 //FIXME
418 status = PresenceTerm.NATIVE_PRESENCE_QUESTIONABLE();
419 }else{
420 logger.warn("Unknown status: " + statusString);
421 status = PresenceTerm.PRESENT();
422 }
423 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
424 NamedArea area = TdwgArea.getAreaByTdwgAbbreviation("CYP");
425 Distribution distribution = Distribution.NewInstance(area, status);
426 td.addElement(distribution);
427
428 //text data
429 //FIXME feature
430 TextData textData = TextData.NewInstance(Feature.STATUS());
431 textData.putText(statusString, Language.ENGLISH());
432 td.addElement(textData);
433 }
434 }
435
436
437 private void makeRedBookCategory(String redBookCategory, Taxon mainTaxon) {
438 //red data book category
439 if (StringUtils.isNotBlank(redBookCategory)){
440 TaxonDescription td = this.getTaxonDescription(mainTaxon, false, true);
441 //FIXME feature type
442 TextData textData = TextData.NewInstance(this.redBookCategory);
443 textData.putText(redBookCategory, Language.ENGLISH());
444 td.addElement(textData);
445 }
446 }
447
448
449
450
451 /**
452 * Stores parent-child, synonym and common name relationships
453 */
454 @Override
455 protected boolean secondPass(CyprusImportState state) {
456 boolean success = true;
457 // CyprusRow cyprusRow = state.getCyprusRow();
458
459 return success;
460 }
461
462
463
464 /**
465 * @param state
466 * @param rank
467 * @param taxonNameStr
468 * @param authorStr
469 * @param nameStatus
470 * @param nc
471 * @return
472 */
473 private TaxonBase createTaxon(CyprusImportState state, Rank rank, String taxonNameStr,
474 Class statusClass, NomenclaturalCode nc) {
475 TaxonBase taxonBase;
476 NonViralName taxonNameBase = null;
477 if (nc == NomenclaturalCode.ICVCN){
478 logger.warn("ICVCN not yet supported");
479
480 }else{
481 taxonNameBase =(NonViralName) nc.getNewTaxonNameInstance(rank);
482 //NonViralName nonViralName = (NonViralName)taxonNameBase;
483 INonViralNameParser parser = nameParser;//NonViralNameParserImpl.NewInstance();
484 taxonNameBase = (NonViralName<BotanicalName>)parser.parseFullName(taxonNameStr, nc, rank);
485
486 //taxonNameBase.setNameCache(taxonNameStr);
487
488 }
489
490 //Create the taxon
491 Reference sec = state.getConfig().getSourceReference();
492 // Create the status
493 if (statusClass.equals(Taxon.class)){
494 taxonBase = Taxon.NewInstance(taxonNameBase, sec);
495 }else if (statusClass.equals(Synonym.class)){
496 taxonBase = Synonym.NewInstance(taxonNameBase, sec);
497 }else {
498 Taxon taxon = Taxon.NewInstance(taxonNameBase, sec);
499 taxon.setTaxonStatusUnknown(true);
500 taxonBase = taxon;
501 }
502 return taxonBase;
503 }
504
505 private boolean makeParent(CyprusImportState state, Taxon parentTaxon, Taxon childTaxon, Reference citation, String microCitation){
506 boolean success = true;
507 Reference sec = state.getConfig().getSourceReference();
508
509 // Reference sec = parentTaxon.getSec();
510 Classification tree = state.getTree(sec);
511 if (tree == null){
512 tree = makeTree(state, sec);
513 tree.setTitleCache("Cyprus");
514 }
515 if (sec.equals(childTaxon.getSec())){
516 success &= (null != tree.addParentChild(parentTaxon, childTaxon, citation, microCitation));
517 }else{
518 logger.warn("No relationship added for child " + childTaxon.getTitleCache());
519 }
520 return success;
521 }
522
523
524
525 }