cleanup
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / bogota / BogotaChecklistTaxonImport.java
1 // $Id$
2 /**
3 * Copyright (C) 2017 EDIT
4 * European Distributed Institute of Taxonomy
5 * http://www.e-taxonomy.eu
6 *
7 * The contents of this file are subject to the Mozilla Public License Version 1.1
8 * See LICENSE.TXT at the top of this package for the full license terms.
9 */
10 package eu.etaxonomy.cdm.io.bogota;
11
12 import java.util.Map;
13 import java.util.UUID;
14
15 import org.apache.log4j.Logger;
16 import org.springframework.stereotype.Component;
17 import org.springframework.transaction.TransactionStatus;
18
19 import eu.etaxonomy.cdm.common.CdmUtils;
20 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
21 import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
22 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
23 import eu.etaxonomy.cdm.model.common.Language;
24 import eu.etaxonomy.cdm.model.name.IBotanicalName;
25 import eu.etaxonomy.cdm.model.name.Rank;
26 import eu.etaxonomy.cdm.model.name.TaxonName;
27 import eu.etaxonomy.cdm.model.name.TaxonNameFactory;
28 import eu.etaxonomy.cdm.model.reference.Reference;
29 import eu.etaxonomy.cdm.model.taxon.Classification;
30 import eu.etaxonomy.cdm.model.taxon.ITaxonTreeNode;
31 import eu.etaxonomy.cdm.model.taxon.Synonym;
32 import eu.etaxonomy.cdm.model.taxon.SynonymType;
33 import eu.etaxonomy.cdm.model.taxon.Taxon;
34 import eu.etaxonomy.cdm.model.taxon.TaxonNode;
35 import eu.etaxonomy.cdm.strategy.homotypicgroup.BasionymRelationCreator;
36 import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
37
38 /**
39 * @author a.mueller
40 * @since 21.04.2017
41 */
42 @Component
43 public class BogotaChecklistTaxonImport<CONFIG extends BogotaChecklistImportConfigurator>
44 extends SimpleExcelTaxonImport<CONFIG> {
45
46 private static final long serialVersionUID = -884838817884874228L;
47 private static final Logger logger = Logger.getLogger(BogotaChecklistTaxonImport.class);
48
49 private static final String ID_COL = "#";
50 private static final String AUTHOR = "Autor";
51 private static final String NAME = "Nombre";
52 private static final String GENUS = "GĂ©nero";
53 private static final String FAMILIA = "Familia";
54 private static final String INFRASPECIFIC = "Taxones infraespecĂ­ficos";
55 private static final String SINONIMOS = "Sinonimos";
56
57 private static UUID rootUuid = UUID.fromString("d66eda18-4c11-4472-bfe8-f6cd5ed95c9f");
58 private static UUID plantaeUuid = UUID.fromString("032fc183-eb4f-4f19-a290-28597a849096");
59
60 private String lastGenus;
61 private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
62 private BasionymRelationCreator basionymCreator = new BasionymRelationCreator();
63
64
65 @Override
66 protected String getWorksheetName(CONFIG config) {
67 return "Resultados Busqueda Avanzada";
68 }
69
70 private boolean isFirst = true;
71 private TransactionStatus tx = null;
72
73 @Override
74 protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
75 if (isFirst){
76 tx = this.startTransaction();
77 isFirst = false;
78 }
79
80 String line = state.getCurrentLine() + ": ";
81 Map<String, String> record = state.getOriginalRecord();
82
83 String noStr = getValue(record, ID_COL);
84
85 //species
86 TaxonNode taxonNode = makeTaxon(state, line, record, noStr);
87
88 if (taxonNode != null){
89 //synonyms
90 makeSynonyms(state, record, line, taxonNode.getTaxon(), noStr);
91
92 //infraspecific
93 makeInfraSpecific(state, record, line, taxonNode, noStr);
94 }else{
95 logger.warn(line + "No taxon node given");
96 }
97 }
98
99
100 @Override
101 protected void secondPass(SimpleExcelTaxonImportState<CONFIG> state) {
102 if (tx != null){
103 this.commitTransaction(tx);
104 tx = null;
105 }
106 }
107
108 /**
109 * @param state
110 * @param record
111 * @param line
112 * @param taxon
113 */
114 private void makeSynonyms(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record, String line,
115 Taxon taxon, String noStr) {
116
117 String synonymsStr = getValue(record, SINONIMOS);
118 if (synonymsStr != null){
119 String[] splits = synonymsStr.split(",");
120 for(String split : splits){
121 split = split.trim();
122 boolean isMisapplied = split.contains("auct.") || split.contains(" sensu ");
123 if (split.endsWith(" None")){
124 split = split.replace(" None", "").trim();
125 }
126 if (isMisapplied){
127 handleSingleMisapplied(state, split, line, taxon, noStr);
128 }else{
129 handleSingleSynonym(state, split, line, taxon, noStr);
130 }
131 }
132 }
133 basionymCreator.invoke(taxon);
134 }
135
136 /**
137 * @param state
138 * @param trim
139 * @param line
140 * @param taxon
141 * @param noStr
142 */
143 private void handleSingleMisapplied(SimpleExcelTaxonImportState<CONFIG> state, String nameStr, String line,
144 Taxon taxon, String noStr) {
145 Rank rank = Rank.SPECIES();
146 String AUCT_NON = "auct. non ";
147 String auctStr = nameStr.contains(AUCT_NON)? AUCT_NON: nameStr.endsWith("auct.")? "auct.": null;
148 boolean auctRequired = false;
149 if (auctStr == null){
150 auctRequired = true;
151 if (nameStr.endsWith("auct.colomb.")){
152 nameStr = nameStr.replace(" auct.colomb.", "");
153 auctStr = "auct.colomb.";
154 }else if (nameStr.endsWith(" [auct.mult.non Sw.]")){
155 nameStr = nameStr.replace(" [auct.mult.non Sw.]", "");
156 auctStr = "[auct.mult.non Sw.]";
157 }else if (nameStr.endsWith(" auct.pr.p.")){
158 nameStr = nameStr.replace(" auct.pr.p.", "");
159 auctStr = "auct.pr.p.";
160 }else if (nameStr.contains(" sensu ")){
161 logger.warn(line + "sensu not yet handled correctly:" + nameStr);
162 auctRequired = false;
163 }else{
164 auctRequired = false;
165 logger.warn(line + "auct. not recognized: " + nameStr);
166 }
167
168 }else{
169 nameStr = nameStr.replace(auctStr, "").trim();
170 }
171 TaxonName name = (TaxonName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank);
172 name.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
173 name = state.getDeduplicationHelper().getExistingName(name);
174 if (name.isProtectedTitleCache()){
175 logger.warn(line + "Misapplied name could not be parsed: " + nameStr);
176 }
177 state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
178
179 Taxon misApp = Taxon.NewInstance(name, null);
180 if (auctRequired){
181 misApp.setAppendedPhrase(auctStr);
182 }
183 misApp.addImportSource(noStr, getNamespace(state.getConfig()),
184 getSourceCitation(state), null);
185 taxon.addMisappliedName(misApp, state.getConfig().getSecReference(), null);
186 }
187
188 private String getNamespace(CONFIG config) {
189 return getWorksheetName(config)+"."+ ID_COL;
190 }
191
192 private void handleSingleSynonym(SimpleExcelTaxonImportState<CONFIG> state, String nameStr,
193 String line, Taxon taxon, String noStr) {
194 Rank rank = Rank.SPECIES();
195 TaxonName name = (TaxonName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank);
196 name.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
197 name = state.getDeduplicationHelper().getExistingName(name);
198 if (name.isProtectedTitleCache()){
199 logger.warn(line + "Synonym could not be parsed: " + nameStr);
200 }
201 state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
202
203 Synonym synonym = Synonym.NewInstance(name, getSecReference(state));
204 synonym.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
205 taxon.addSynonym(synonym, SynonymType.SYNONYM_OF());
206 }
207
208 private void makeInfraSpecific(SimpleExcelTaxonImportState<CONFIG> state, Map<String, String> record, String line,
209 TaxonNode speciesNode, String noStr) {
210 String subSpeciesStr = getValue(record, INFRASPECIFIC);
211 if (subSpeciesStr != null){
212 String[] splits = subSpeciesStr.split(",");
213 for(String split : splits){
214 if (split.endsWith(" None")){
215 split = split.replace(" None", "").trim();
216 }
217 Rank rank = Rank.SUBSPECIES();
218 TaxonName name = (TaxonName)parser.parseFullName(split.trim(), state.getConfig().getNomenclaturalCode(), rank);
219 name.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
220 name = state.getDeduplicationHelper().getExistingName(name);
221 if (name.isProtectedTitleCache()){
222 logger.warn(line + "Infraspecific taxon could not be parsed: " + split.trim());
223 }
224 state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
225
226 Taxon subSpecies = Taxon.NewInstance(name, getSecReference(state));
227 subSpecies.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
228 TaxonNode subSpeciesNode = speciesNode.addChildTaxon(subSpecies, getSecReference(state), null);
229 getTaxonNodeService().save(subSpeciesNode);
230 }
231 }
232 }
233
234 private TaxonNode makeTaxon(SimpleExcelTaxonImportState<CONFIG> state, String line, Map<String, String> record,
235 String noStr) {
236
237 TaxonNode familyTaxon = getFamilyTaxon(record, state);
238 if (familyTaxon == null){
239 logger.warn(line + "Family not created: " + record.get(FAMILIA));
240 }
241
242 String genusStr = getValue(record, GENUS);
243 String nameStr = getValue(record, NAME);
244 String speciesAuthorStr = getValue(record, AUTHOR);
245
246 nameStr = CdmUtils.concat(" ", nameStr, speciesAuthorStr);
247 Rank rank = Rank.SPECIES();
248 TaxonName name = (TaxonName)parser.parseFullName(nameStr, state.getConfig().getNomenclaturalCode(), rank);
249 name.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
250 name = state.getDeduplicationHelper().getExistingName(name);
251 if (name.isProtectedTitleCache()){
252 logger.warn(line + "Name could not be parsed: " + nameStr);
253 }
254 state.getDeduplicationHelper().replaceAuthorNamesAndNomRef(name);
255
256 Taxon taxon = Taxon.NewInstance(name, getSecReference(state));
257
258 taxon.addImportSource(noStr, getNamespace(state.getConfig()), getSourceCitation(state), null);
259
260 String parentStr = genusStr;
261 boolean genusAsBefore = genusStr.equals(lastGenus);
262 TaxonNode parent = getParent(state, parentStr);
263 TaxonNode newNode;
264 if (parent != null){
265 if (genusAsBefore ){
266 //everything as expected
267 newNode = parent.addChildTaxon(taxon, getSecReference(state), null);
268 getTaxonNodeService().save(newNode);
269 }else{
270 logger.warn(line + "Unexpected non-missing parent");
271 newNode = null;
272 }
273 }else{
274 if (genusAsBefore){
275 logger.warn(line + "Unexpected missing genus parent");
276 newNode = null;
277 }else{
278 parent = makeGenusNode(state, record, genusStr);
279 newNode = parent.addChildTaxon(taxon, getSecReference(state), null);
280 getTaxonNodeService().save(newNode);
281 }
282 }
283
284 this.lastGenus = genusStr;
285 return newNode;
286 }
287
288 /**
289 * @param record
290 * @param state
291 * @return
292 */
293 private TaxonNode getFamilyTaxon(Map<String, String> record, SimpleExcelTaxonImportState<CONFIG> state) {
294 String familyStr = getValue(record, FAMILIA);
295 if (familyStr == null){
296 return null;
297 }
298 familyStr = familyStr.trim();
299
300 Taxon family = state.getHigherTaxon(familyStr);
301 TaxonNode familyNode;
302 if (family != null){
303 familyNode = family.getTaxonNodes().iterator().next();
304 }else{
305 IBotanicalName name = makeFamilyName(state, familyStr);
306 Reference sec = getSecReference(state);
307 family = Taxon.NewInstance(name, sec);
308
309 ITaxonTreeNode classificationNode = getClassification(state);
310 familyNode = classificationNode.addChildTaxon(family, sec, null);
311 state.putHigherTaxon(familyStr, family);
312 getTaxonNodeService().save(familyNode);
313 }
314
315 return familyNode;
316 }
317
318
319 private TaxonNode rootNode;
320 private TaxonNode getClassification(SimpleExcelTaxonImportState<CONFIG> state) {
321 if (rootNode == null){
322 Reference sec = getSecReference(state);
323 String classificationName = state.getConfig().getClassificationName();
324 Language language = Language.DEFAULT();
325 Classification classification = Classification.NewInstance(classificationName, sec, language);
326 classification.setUuid(state.getConfig().getClassificationUuid());
327 classification.getRootNode().setUuid(rootUuid);
328
329 IBotanicalName plantaeName = TaxonNameFactory.NewBotanicalInstance(Rank.KINGDOM());
330 plantaeName.setGenusOrUninomial("Plantae");
331 Taxon plantae = Taxon.NewInstance(plantaeName, sec);
332 TaxonNode plantaeNode = classification.addChildTaxon(plantae, null, null);
333 plantaeNode.setUuid(plantaeUuid);
334 getClassificationService().save(classification);
335
336 rootNode = plantaeNode;
337 }
338 return rootNode;
339 }
340
341
342 protected IBotanicalName makeFamilyName(SimpleExcelTaxonImportState<CONFIG> state, String famStr) {
343 IBotanicalName name = TaxonNameFactory.NewBotanicalInstance(Rank.FAMILY());
344 famStr = decapitalize(famStr);
345 name.setGenusOrUninomial(famStr);
346 name.addSource(makeOriginalSource(state));
347 return name;
348 }
349
350 /**
351 * @param state
352 * @return
353 */
354 @Override
355 protected IdentifiableSource makeOriginalSource(SimpleExcelTaxonImportState<CONFIG> state) {
356 return IdentifiableSource.NewDataImportInstance(getValue(state.getOriginalRecord(),ID_COL),
357 getNamespace(state.getConfig()), state.getConfig().getSourceReference());
358 }
359
360 /**
361 * @param famStr
362 * @return
363 */
364 private String decapitalize(String famStr) {
365 String result = famStr.substring(0,1) + famStr.substring(1).toLowerCase();
366 return result;
367 }
368
369
370 protected Reference getSecReference(SimpleExcelTaxonImportState<CONFIG> state) {
371 return state.getConfig().getSecReference();
372 }
373
374 /**
375 * @param state
376 * @return
377 */
378 protected Reference getSourceCitation(SimpleExcelTaxonImportState<CONFIG> state) {
379 return state.getConfig().getSourceReference();
380 }
381
382 /**
383 * @param state
384 * @param parentStr
385 * @return
386 */
387 private TaxonNode getParent(SimpleExcelTaxonImportState<CONFIG> state, String parentStr) {
388 Taxon taxon = state.getHigherTaxon(parentStr);
389
390 return taxon == null ? null : taxon.getTaxonNodes().iterator().next();
391 }
392
393 /**
394 * @param state
395 * @param record
396 * @param genusStr
397 * @return
398 */
399 private TaxonNode makeGenusNode(SimpleExcelTaxonImportState<CONFIG> state,
400 Map<String, String> record, String genusStr) {
401 IBotanicalName name = TaxonNameFactory.NewBotanicalInstance(Rank.GENUS());
402 name.setGenusOrUninomial(genusStr);
403 Taxon genus = Taxon.NewInstance(name, getSecReference(state));
404 TaxonNode family = getFamilyTaxon(record, state);
405 TaxonNode genusNode = family.addChildTaxon(genus, getSecReference(state), null);
406 state.putHigherTaxon(genusStr, genus);
407 genus.addSource(makeOriginalSource(state));
408 getTaxonNodeService().save(genusNode);
409 return genusNode;
410 }
411
412 }